File size: 2,801 Bytes
59b7172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# start by importing the necessary packages
#standard
import numpy as np
import pandas as pd

#plt packages
import seaborn as sns
import altair as alt
import matplotlib.pyplot as plt
#streamlit
import streamlit as st

#Next importing the dataset, and doing elementary data analysis/discovery
tips = sns.load_dataset("tips")
tips['percentage'] = tips['tip']/tips['total_bill']


st.title("Tips Dataset Analysis")
st.subheader("What is the effect of the group size on the tip in percentage and as amount and on total bill amount?")
with st.sidebar:
    st.subheader("Filters")

    grp_size = sorted(tips['size'].unique().tolist())
    selected_size = st.multiselect("Select group size(s):", options=grp_size, default=None)

    feature_options = {
        'total_bill':'Total bill',
        'tip': 'Tip',
        'percentage': 'Tip percentage'}
    selected_feature = st.selectbox("Select feature to analyze:", options=list(feature_options.keys()), format_func=lambda x: feature_options[x])

#Making the dataframe "dynamic"
to_app = tips[tips['size'].isin(selected_size)] if selected_size else tips
#Dynamic text
st.write(f"Average {feature_options[selected_feature]} by Group Size is {to_app[selected_feature].mean():.2f} compared to {tips[selected_feature].mean():.2f} which is the full dataset, at all times (independent of sliders).")

#KPI's
k1, k2 = st.columns(2)
kpi1=k1.metric(f"Average {feature_options.get(selected_feature)} per grp size(s)", f"{to_app[selected_feature].mean():.2f}")
kpi2=k2.metric(f"Average {feature_options.get(selected_feature)} with all data", f"{tips[selected_feature].mean():.2f}")

#Charts baby
# Calculate the average tip percentage for each group size
summary = to_app.groupby('size', as_index=False)[selected_feature].mean()

# --- Visualization: bar chart of average tip percentage by group size ---
chart = alt.Chart(summary).mark_bar().encode(
    x=alt.X('size:O', title='Group Size'),
    y=alt.Y(f'{selected_feature}:Q', title=f'Average {feature_options[selected_feature]}')
).properties(
    width=400,
    height=300,
    title=f"Average {feature_options[selected_feature]} by Group Size"
)

# Add data labels
text = alt.Chart(summary).mark_text(
    align='center',
    baseline='bottom',
    dy=-2,  # Nudges text above bar
    fontSize=13
).encode(
    x=alt.X('size:O'),
    y=alt.Y(f'{selected_feature}:Q'),
    text=alt.Text(f'{selected_feature}:Q', format='.1f')
)

final_chart = chart + text
st.altair_chart(final_chart, use_container_width=True)


#st.write(f"Average {feature_options[selected_feature]} by Group Size is {to_app[selected_feature].mean():.2f} compared to {tips[selected_feature].mean():.2f} which is the full dataset, at all times (independent of sliders).")