| """ |
| Batch Analysis page for Smartwatch Normative Z-Score Calculator. |
| |
| Upload multiple patient records for bulk z-score analysis. |
| """ |
| import streamlit as st |
| import pandas as pd |
| import sys |
| import os |
| from io import BytesIO |
|
|
| |
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| from batch_utils import get_batch_template_df, process_batch_data, BIOMARKER_LABELS, AVAILABLE_BIOMARKERS |
| import normalizer_model |
|
|
| st.set_page_config( |
| page_title="Batch Analysis - Smartwatch Z-Score Calculator", |
| page_icon="📊", |
| layout="wide", |
| ) |
|
|
| |
| DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "Table_1_summary_measure.csv") |
|
|
| @st.cache_data |
| def get_normative_data(): |
| try: |
| return normalizer_model.load_normative_table(DATA_PATH) |
| except Exception as e: |
| st.error(f"Could not load normative data: {e}") |
| return None |
|
|
| normative_df = get_normative_data() |
|
|
| st.title("📊 Batch Analysis") |
| st.markdown("**Upload multiple patient records for bulk smartwatch biomarker analysis**") |
|
|
| st.info( |
| "Upload an Excel or CSV file with patient data. Each row will be analyzed and " |
| "z-scores will be calculated for all available biomarkers." |
| ) |
|
|
| col1, col2 = st.columns(2) |
|
|
| with col1: |
| st.subheader("📥 Download Template") |
| st.markdown("Use this template to prepare your data in the correct format.") |
| |
| template_df = get_batch_template_df() |
| |
| |
| output = BytesIO() |
| with pd.ExcelWriter(output, engine='xlsxwriter') as writer: |
| template_df.to_excel(writer, index=False, sheet_name='Patient Data') |
| workbook = writer.book |
| worksheet = writer.sheets['Patient Data'] |
| |
| |
| header_format = workbook.add_format({ |
| 'bold': True, |
| 'bg_color': '#e67e22', |
| 'font_color': 'white', |
| 'border': 1 |
| }) |
| for col_num, value in enumerate(template_df.columns.values): |
| worksheet.write(0, col_num, value, header_format) |
| worksheet.set_column(col_num, col_num, 18) |
| |
| st.download_button( |
| label="⬇️ Download Excel Template", |
| data=output.getvalue(), |
| file_name="smartwatch_zscore_template.xlsx", |
| mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" |
| ) |
| |
| st.markdown("#### Required Columns:") |
| st.markdown(""" |
| | Column | Description | Example | |
| |--------|-------------|---------| |
| | patient_id | Unique identifier | P001 | |
| | age | Age in years | 45 | |
| | gender | Man/Woman | Man | |
| | region | Geographic region | Western Europe | |
| | bmi | Body Mass Index | 24.5 | |
| """) |
| |
| st.markdown("#### Biomarker Columns (optional):") |
| biomarker_table = "| Column | Description |\n|--------|-------------|\n" |
| for code in AVAILABLE_BIOMARKERS: |
| label = BIOMARKER_LABELS.get(code, code) |
| biomarker_table += f"| {code} | {label} |\n" |
| st.markdown(biomarker_table) |
| |
| st.markdown("*Note: Include only the biomarkers you have data for. Leave cells blank if not measured.*") |
|
|
| with col2: |
| st.subheader("📤 Upload Data") |
| |
| uploaded_file = st.file_uploader( |
| "Choose an Excel or CSV file", |
| type=['xlsx', 'xls', 'csv'], |
| help="Upload a file with patient data following the template format" |
| ) |
| |
| if uploaded_file is not None: |
| try: |
| if uploaded_file.name.endswith('.csv'): |
| df = pd.read_csv(uploaded_file) |
| else: |
| df = pd.read_excel(uploaded_file) |
| |
| st.success(f"✅ Loaded {len(df)} patient records") |
| |
| |
| detected_biomarkers = [col for col in df.columns if col in AVAILABLE_BIOMARKERS] |
| |
| if detected_biomarkers: |
| st.markdown(f"**Detected biomarkers:** {', '.join([BIOMARKER_LABELS.get(b, b) for b in detected_biomarkers])}") |
| else: |
| st.warning("No recognized biomarker columns found. Please check your column names.") |
| |
| with st.expander("Preview uploaded data"): |
| st.dataframe(df, use_container_width=True) |
| |
| except Exception as e: |
| st.error(f"Error reading file: {str(e)}") |
| df = None |
|
|
| st.markdown("---") |
|
|
| |
| if uploaded_file is not None and 'df' in dir() and df is not None and normative_df is not None: |
| |
| |
| st.subheader("Select Biomarkers to Analyze") |
| detected_biomarkers = [col for col in df.columns if col in AVAILABLE_BIOMARKERS] |
| |
| if detected_biomarkers: |
| selected_biomarkers = st.multiselect( |
| "Choose biomarkers to include in analysis", |
| options=detected_biomarkers, |
| default=detected_biomarkers, |
| format_func=lambda x: BIOMARKER_LABELS.get(x, x) |
| ) |
| |
| if st.button("🔬 Process Batch Data", type="primary"): |
| if not selected_biomarkers: |
| st.error("Please select at least one biomarker to analyze.") |
| else: |
| with st.spinner("Processing patient data..."): |
| results_df = process_batch_data(df, normative_df, selected_biomarkers) |
| |
| st.success("✅ Processing complete!") |
| |
| |
| st.subheader("Results") |
| |
| |
| base_cols = ['patient_id', 'age', 'gender', 'region', 'bmi'] |
| display_cols = [c for c in base_cols if c in results_df.columns] |
| |
| for bm in selected_biomarkers: |
| if bm in results_df.columns: |
| display_cols.append(bm) |
| if f'{bm}_z' in results_df.columns: |
| display_cols.append(f'{bm}_z') |
| if f'{bm}_percentile' in results_df.columns: |
| display_cols.append(f'{bm}_percentile') |
| if f'{bm}_interpretation' in results_df.columns: |
| display_cols.append(f'{bm}_interpretation') |
| |
| available_cols = [c for c in display_cols if c in results_df.columns] |
| |
| |
| def highlight_interpretation(val): |
| if pd.isna(val) or val == 'N/A' or val == 'No data': |
| return '' |
| val_str = str(val).lower() |
| if 'average' in val_str and 'below' not in val_str and 'above' not in val_str: |
| return 'background-color: #90EE90' |
| elif 'below' in val_str: |
| return 'background-color: #87CEEB' |
| elif 'above' in val_str: |
| return 'background-color: #FFD700' |
| elif 'very low' in val_str: |
| return 'background-color: #ADD8E6' |
| elif 'very high' in val_str: |
| return 'background-color: #FF6B6B' |
| return '' |
| |
| |
| interp_cols = [c for c in available_cols if 'interpretation' in c] |
| if interp_cols: |
| styled_df = results_df[available_cols].style.applymap( |
| highlight_interpretation, |
| subset=interp_cols |
| ) |
| st.dataframe(styled_df, use_container_width=True) |
| else: |
| st.dataframe(results_df[available_cols], use_container_width=True) |
| |
| |
| st.subheader("Summary Statistics") |
| |
| |
| if len(selected_biomarkers) > 0: |
| cols = st.columns(min(len(selected_biomarkers), 3)) |
| |
| for idx, bm in enumerate(selected_biomarkers[:3]): |
| with cols[idx]: |
| st.markdown(f"**{BIOMARKER_LABELS.get(bm, bm)}**") |
| z_col = f'{bm}_z' |
| if z_col in results_df.columns: |
| |
| z_values = pd.to_numeric(results_df[z_col], errors='coerce').dropna() |
| if len(z_values) > 0: |
| st.metric("Mean Z-Score", f"{z_values.mean():.2f}") |
| st.metric("Patients Analyzed", len(z_values)) |
| |
| |
| interp_col = f'{bm}_interpretation' |
| if interp_col in results_df.columns: |
| interp_counts = results_df[interp_col].value_counts() |
| st.bar_chart(interp_counts) |
| |
| |
| st.subheader("📥 Export Results") |
| |
| output = BytesIO() |
| with pd.ExcelWriter(output, engine='xlsxwriter') as writer: |
| results_df.to_excel(writer, index=False, sheet_name='Results') |
| workbook = writer.book |
| worksheet = writer.sheets['Results'] |
| |
| |
| header_format = workbook.add_format({ |
| 'bold': True, |
| 'bg_color': '#e67e22', |
| 'font_color': 'white', |
| 'border': 1 |
| }) |
| for col_num, value in enumerate(results_df.columns.values): |
| worksheet.write(0, col_num, value, header_format) |
| worksheet.set_column(col_num, col_num, 18) |
| |
| st.download_button( |
| label="⬇️ Download Results as Excel", |
| data=output.getvalue(), |
| file_name="smartwatch_zscore_results.xlsx", |
| mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" |
| ) |
| else: |
| st.warning( |
| "No recognized biomarker columns found in your data. " |
| "Please ensure your columns match the template format." |
| ) |
|
|
| |
| st.markdown("---") |
| with st.expander("📊 Z-Score Classification Guide"): |
| st.markdown(""" |
| **How to interpret Z-Scores:** |
| |
| | Z-Score Range | Classification | Percentile Range | |
| |:-------------:|:--------------:|:----------------:| |
| | z < -2.0 | Very Low | < 2.3% | |
| | -2.0 ≤ z < -0.5 | Below Average | 2.3% - 30.9% | |
| | **-0.5 ≤ z < 0.5** | **Average** | **30.9% - 69.1%** | |
| | 0.5 ≤ z < 2.0 | Above Average | 69.1% - 97.7% | |
| | z ≥ 2.0 | Very High | > 97.7% | |
| |
| **Context matters:** |
| - For **steps, sleep duration, and active minutes**: Higher values are generally better ✓ |
| - For **heart rate**: Lower resting values are generally better ✓ |
| |
| *A z-score of 0 means you are exactly at the population average for your demographic group.* |
| """) |
|
|
| |
| st.markdown("---") |
| st.markdown( |
| "*Batch analysis calculates z-scores relative to the Withings normative population, " |
| "stratified by region, gender, age group, and BMI category.*" |
| ) |
| st.markdown( |
| "Built with ❤️ in Düsseldorf. © Lars Masanneck 2026." |
| ) |
|
|
|
|
|
|