| import numpy as np |
| import pandas as pd |
| import streamlit as st |
| from streamlit_option_menu import option_menu |
| import pickle |
| import catboost |
| import requests |
|
|
| |
| st.markdown( |
| """ |
| <style> |
| .top-bar { |
| background-color: #FF4C1B; |
| color: white; |
| padding: 1rem; |
| text-align: center; |
| } |
| .top-bar a { |
| text-decoration: none; |
| color: white; |
| margin: 10px; |
| } |
| </style> |
| """, |
| unsafe_allow_html=True, |
| ) |
|
|
| def home_page(): |
| |
| st.title("Income Prediction App") |
| st.image("https://i.ytimg.com/vi/WULwst0vW8g/maxresdefault.jpg") |
| |
| st.write(""" |
| This application is a machine learning project that aims to predict whether an individual's income falls above or below a specific income threshold. This information can be used to monitor income inequality and inform policy decisions. |
| """) |
|
|
| |
| st.header("The Problem: Income Inequality πΈ") |
| st.write( |
| """ |
| Income inequality, a pervasive challenge that hinders economic progress and social well-being, demands innovative solutions. This app tackles this issue head-on, harnessing the power of machine learning to predict individual income levels. |
| |
| **Key Challenges of Income Inequality:** β |
| |
| 1. **Limited Economic Mobility:** π |
| |
| Individuals from lower-income households often face barriers to education and professional growth, perpetuating income disparities. |
| |
| 2. **Healthcare Disparities:** π©Ί |
| |
| Income inequality often translates into unequal access to quality healthcare, leading to adverse health outcomes for lower-income individuals. |
| |
| 3. **Education Gaps:** π |
| |
| Children from low-income households may have limited access to quality education, hindering their future opportunities. |
| |
| 4. **Social Unrest:** π’ |
| |
| Extreme income inequality can fuel social unrest as individuals feel disenfranchised and discouraged. |
| |
| 5. **Economic Impact:** π |
| |
| Income inequality impedes economic growth by reducing aggregate demand and creating economic instability. |
| |
| 6. **Policymaking Challenges:** π§© |
| |
| Policymakers require accurate data and insights to formulate effective strategies for reducing income inequality. |
| """) |
|
|
| |
| def solution(): |
| |
| st.title("Income Prediction Solution") |
| st.image("https://d2gg9evh47fn9z.cloudfront.net/1600px_COLOURBOX15103453.jpg") |
|
|
| |
| st.header("Solution π‘: Combating Income Inequality with Data-Driven Solutions π") |
| st.write(""" |
| |
| The app utilizes machine learning to predict individual income levels, providing valuable data to policymakers for informed action. This data-driven approach offers several advantages: |
| |
| * **Cost-Effectiveness:** π° |
| |
| Machine learning models are more cost-effective than traditional census methods. |
| |
| * **Timeliness:** β±οΈ |
| |
| Income predictions can be generated frequently, enabling timely interventions. |
| |
| * **Scalability:** π |
| |
| Machine learning models can be scaled to predict incomes for large populations, making them applicable to a wide range of scenarios. |
| """) |
| |
| st.header("Objectives: π―") |
| st.write(""" |
| 1. **Income Prediction Model:** Develop a robust machine learning model to accurately predict individual income levels. |
| |
| 2. **Economic Inequality Mitigation:** Empower policymakers with data-driven insights to effectively address income inequality. |
| |
| 3. **Cost and Accuracy Improvement:** Enhance income-level monitoring through a cost-effective and accurate method compared to traditional census methods. |
| |
| Join us in tackling income inequality with data-driven solutions! |
| """) |
|
|
| |
| st.header("Model Description") |
| st.write(""" |
| **Model Training:** |
| *Trained on a dataset of demographic and socioeconomic factors influencing income levels π |
| |
| * A [CatBoost Classifier](https://catboost.ai/en/docs/concepts/python-reference_catboostclassifier) supervised learning algorithm used for model development βοΈ |
| |
| **Model Evaluation:** |
| * Performance assessed using metrics like accuracy, precision, recall, and F1 score ππ |
| |
| * Metrics evaluate the model's ability to classify individual income levels correctly βοΈ |
| """) |
|
|
| |
| st.header("Impact and Benefits π") |
| st.write(""" |
| |
| **Empowering Policymakers and Promoting Equitable Growth π** |
| |
| By providing accurate and timely insights into income distribution, we can empower policymakers to make informed decisions that: |
| |
| * Enhance understanding of income patterns π |
| |
| * Identify areas with high income inequality π |
| |
| * Target interventions to address income gaps π― |
| |
| * Effectively allocate resources to poverty reduction π° |
| |
| * Promote economic mobility for individuals from low-income backgrounds β¬οΈ |
| |
| Overall, this tool has the potential to make a meaningful contribution to the fight against income inequality and promote a more just and equitable society. βοΈ |
| """) |
|
|
| def perform_eda(): |
| st.title("Data Insights and Recommendations") |
| st.write(""" |
| ππ Welcome to the Exploratory Data Analysis for the Income Prediction Project! ππ |
| Gain insights into income distribution and factors influencing individuals' income levels based on census data. |
| Dive into the data to understand income patterns and key contributing factors. Let's start our data-driven journey! π°π |
| """) |
|
|
| |
| if st.button("Show Insights and Recommendations"): |
| display_insights_and_recommendations() |
|
|
| def display_insights_and_recommendations(): |
| |
| st.subheader("Exploring Income Data") |
| st.write("Let's dive deeper into the data to understand income distribution and relationships between variables.") |
| st.image("default.jpg", use_column_width=True) |
|
|
| st.write(""" |
| From the dashboard, you can now appreciate the serious income inequality problem. Explore key insights and actionable recommendations for stakeholders to fight income inequality. |
| """) |
| |
| |
| st.write("Explore the interactive Power BI dashboard [here](https://app.powerbi.com/view?r=eyJrIjoiZDNjMmExZjYtMWU2NS00NTBjLTk4Y2EtYmQ2MWU2OWMwODMyIiwidCI6IjQ0ODdiNTJmLWYxMTgtNDgzMC1iNDlkLTNjMjk4Y2I3MTA3NSJ9).") |
|
|
| |
| st.subheader("Insights and Recommendations") |
| st.table([ |
| ["π Higher education levels positively correlate with higher income.", "Invest in accessible and quality education, including scholarships and vocational training, for lower-income communities."], |
| ["π©βπ Women are more likely below the income threshold than men.", "Support gender equality programs addressing wage disparities and encouraging women in STEM fields."], |
| ["π₯ Income inequality exists across all employment statuses.", "Implement policies and programs supporting stable employment, job training, and entrepreneurship."], |
| ["π Racial income disparities: Foster diversity and inclusion in workplaces.", "Promote equal opportunities, diversity training, and an inclusive work environment."], |
| ["π Foreigners concentrated below the income threshold.", "Review immigration policies to ensure fair treatment and integration into the workforce."], |
| ["π’ Majority below threshold in 'Unknown' occupations.", "Research challenges in different occupations and implement targeted support programs."], |
| ["πΈ Nonfilers have higher representation below the threshold.", "Evaluate tax policies for fairness and consider incentives for low-income individuals."], |
| ["π Data-driven insights are crucial for addressing income inequality.", "Continue investing in data collection and analysis to inform evolving policies."] |
| ]) |
|
|
| def prediction(): |
|
|
| |
| with open("model_and_key_components.pkl", "rb") as f: |
| components = pickle.load(f) |
| |
| |
| dt_model = components["model"] |
| unique_values = components["unique_values"] |
| |
| |
| st.image("https://i.ytimg.com/vi/WULwst0vW8g/maxresdefault.jpg") |
| st.title("Income Prediction App") |
| |
| |
| st.sidebar.header("Description of the Required Input Fields") |
| st.sidebar.markdown("**Age**: Enter the age of the individual (e.g., 25, 42, 57).") |
| st.sidebar.markdown("**Gender**: Select the gender of the individual (e.g., Male, Female).") |
| st.sidebar.markdown("**Education**: Choose the highest education level of the individual (e.g., Bachelors Degree, High School Graduate, Masters Degree).") |
| st.sidebar.markdown("**Worker Class**: Select the class of worker for the individual (e.g., Private, Government, Self-employed).") |
| st.sidebar.markdown("**Marital Status**: Choose the marital status of the individual (e.g., Married, Never married, Divorced).") |
| st.sidebar.markdown("**Race**: Select the race of the individual (e.g., White, Black, Asian-Pac-Islander).") |
| st.sidebar.markdown("**Hispanic Origin**: Choose the Hispanic origin of the individual (e.g., Mexican, Puerto Rican, Cuban).") |
| st.sidebar.markdown("**Full/Part-Time Employment**: Select the employment status as full-time or part-time (e.g., Full-time schedules, Part-time schedules).") |
| st.sidebar.markdown("**Wage Per Hour**: Enter the wage per hour of the individual (numeric value, e.g., 20.50).") |
| st.sidebar.markdown("**Weeks Worked Per Year**: Specify the number of weeks the individual worked in a year (numeric value, e.g., 45).") |
| st.sidebar.markdown("**Industry Code**: Choose the category code of the industry where the individual works (e.g., Category 1, Category 2).") |
| st.sidebar.markdown("**Major Industry Code**: Select the major industry code of the individual's work (e.g., Industry A, Industry B).") |
| st.sidebar.markdown("**Occupation Code**: Choose the category code of the occupation of the individual (e.g., Category X, Category Y).") |
| st.sidebar.markdown("**Major Occupation Code**: Select the major occupation code of the individual (e.g., Occupation 1, Occupation 2).") |
| st.sidebar.markdown("**Total Employed**: Specify the number of persons worked for the employer (numeric value, e.g., 3, 5).") |
| st.sidebar.markdown("**Household Summary**: Select the detailed household summary (e.g., Child under 18 never married, Spouse of householder).") |
| st.sidebar.markdown("**Veteran Benefits**: Choose whether the individual receives veteran benefits (Yes or No).") |
| st.sidebar.markdown("**Tax Filer Status**: Select the tax filer status of the individual (e.g., Single, Joint both 65+).") |
| st.sidebar.markdown("**Gains**: Specify any gains the individual has (numeric value, e.g., 1500.0).") |
| st.sidebar.markdown("**Losses**: Specify any losses the individual has (numeric value, e.g., 300.0).") |
| st.sidebar.markdown("**Dividends from Stocks**: Specify any dividends from stocks for the individual (numeric value, e.g., 120.5).") |
| st.sidebar.markdown("**Citizenship**: Select the citizenship status of the individual (e.g., Native, Foreign Born- Not a citizen of U S).") |
| st.sidebar.markdown("**Importance of Record**: Enter the weight of the instance (numeric value, e.g., 0.9).") |
| |
| |
| input_data = { |
| 'age': 0, |
| 'gender': unique_values['gender'][0], |
| 'education': unique_values['education'][0], |
| 'worker_class': unique_values['worker_class'][0], |
| 'marital_status': unique_values['marital_status'][0], |
| 'race': unique_values['race'][0], |
| 'is_hispanic': unique_values['is_hispanic'][0], |
| 'employment_commitment': unique_values['employment_commitment'][0], |
| 'employment_stat': unique_values['employment_stat'][0], |
| 'wage_per_hour': 0, |
| 'working_week_per_year': 0, |
| 'industry_code': 0, |
| 'industry_code_main': unique_values['industry_code_main'][0], |
| 'occupation_code': 0, |
| 'occupation_code_main': unique_values['occupation_code_main'][0], |
| 'total_employed': 0, |
| 'household_summary': unique_values['household_summary'][0], |
| 'vet_benefit': 0, |
| 'tax_status': unique_values['tax_status'][0], |
| 'gains': 0, |
| 'losses': 0, |
| 'stocks_status': 0, |
| 'citizenship': unique_values['citizenship'][0], |
| 'importance_of_record': 0.0 |
| } |
| |
| |
| col1, col2, col3 = st.columns(3) |
| |
| with col1: |
| input_data['age'] = st.number_input("Age", min_value=0, key='age') |
| input_data['gender'] = st.selectbox("Gender", unique_values['gender'], key='gender') |
| input_data['education'] = st.selectbox("Education", unique_values['education'], key='education') |
| input_data['worker_class'] = st.selectbox("Class of Worker", unique_values['worker_class'], key='worker_class') |
| input_data['marital_status'] = st.selectbox("Marital Status", unique_values['marital_status'], key='marital_status') |
| input_data['race'] = st.selectbox("Race", unique_values['race'], key='race') |
| input_data['is_hispanic'] = st.selectbox("Hispanic Origin", unique_values['is_hispanic'], key='is_hispanic') |
| input_data['employment_commitment'] = st.selectbox("Full/Part-Time Employment", unique_values['employment_commitment'], key='employment_commitment') |
| input_data['employment_stat'] = st.selectbox("Has Own Business Or Is Self Employed", unique_values['employment_stat'], key='employment_stat') |
| input_data['wage_per_hour'] = st.number_input("Wage Per Hour", min_value=0, key='wage_per_hour') |
| |
| with col2: |
| input_data['working_week_per_year'] = st.number_input("Weeks Worked Per Year", min_value=0, key='working_week_per_year') |
| input_data['industry_code'] = st.selectbox("Category Code of Industry", unique_values['industry_code'], key='industry_code') |
| input_data['industry_code_main'] = st.selectbox("Major Industry Code", unique_values['industry_code_main'], key='industry_code_main') |
| input_data['occupation_code'] = st.selectbox("Category Code of Occupation", unique_values['occupation_code'], key='occupation_code') |
| input_data['occupation_code_main'] = st.selectbox("Major Occupation Code", unique_values['occupation_code_main'], key='occupation_code_main') |
| input_data['total_employed'] = st.number_input("Number of Persons Worked for Employer", min_value=0, key='total_employed') |
| input_data['household_summary'] = st.selectbox("Detailed Household Summary", unique_values['household_summary'], key='household_summary') |
| input_data['vet_benefit'] = st.selectbox("Veteran Benefits", unique_values['vet_benefit'], key='vet_benefit') |
| |
| with col3: |
| input_data['tax_status'] = st.selectbox("Tax Filer Status", unique_values['tax_status'], key='tax_status') |
| input_data['gains'] = st.number_input("Gains", min_value=0, key='gains') |
| input_data['losses'] = st.number_input("Losses", min_value=0, key='losses') |
| input_data['stocks_status'] = st.number_input("Dividends from Stocks", min_value=0, key='stocks_status') |
| input_data['citizenship'] = st.selectbox("Citizenship", unique_values['citizenship'], key='citizenship') |
| input_data['importance_of_record'] = st.number_input("Importance of Record", min_value=0, key='importance_of_record') |
| |
| |
| if st.button("Predict"): |
| |
| input_df = pd.DataFrame([input_data]) |
| |
| |
| prediction = dt_model.predict(input_df) |
| prediction_proba = dt_model.predict_proba(input_df) |
| |
| |
| st.subheader("Prediction") |
| if prediction[0] == 1: |
| st.success("This individual is predicted to have an income of over $50K.") |
| else: |
| st.error("This individual is predicted to have an income of under $50K") |
| |
| |
| st.subheader("Prediction Probability") |
| st.write(f"The probability of the individual having an income over $50K is: {prediction_proba[0][1]:.2f}") |
| |
|
|
| |
| selected_page = st.selectbox("Select a page", ["Home", "Solution", "Data Insights and Recommendations", "Predict Income"]) |
|
|
| if selected_page == "Home": |
| home_page() |
| elif selected_page == "Solution": |
| solution() |
| elif selected_page == "Data Insights and Recommendations": |
| perform_eda() |
| else: |
| prediction() |