Spaces:

MasanneckLab
/

Withings_Normalization_App

Sleeping

Withings_Normalization_App / pages /1_Batch_Analysis.py

Lars Masanneck

Adding explanaotry notes to app and exports

a3309b8 3 months ago

12 kB

	"""
	Batch Analysis page for Smartwatch Normative Z-Score Calculator.

	Upload multiple patient records for bulk z-score analysis.
	"""
	import streamlit as st
	import pandas as pd
	import sys
	import os
	from io import BytesIO

	# Add parent directory to path for imports
	sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from batch_utils import get_batch_template_df, process_batch_data, BIOMARKER_LABELS, AVAILABLE_BIOMARKERS
	import normalizer_model

	st.set_page_config(
	page_title="Batch Analysis - Smartwatch Z-Score Calculator",
	page_icon="📊",
	layout="wide",
	)

	# Load normative data
	DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "Table_1_summary_measure.csv")

	@st.cache_data
	def get_normative_data():
	try:
	return normalizer_model.load_normative_table(DATA_PATH)
	except Exception as e:
	st.error(f"Could not load normative data: {e}")
	return None

	normative_df = get_normative_data()

	st.title("📊 Batch Analysis")
	st.markdown("Upload multiple patient records for bulk smartwatch biomarker analysis")

	st.info(
	"Upload an Excel or CSV file with patient data. Each row will be analyzed and "
	"z-scores will be calculated for all available biomarkers."
	)

	col1, col2 = st.columns(2)

	with col1:
	st.subheader("📥 Download Template")
	st.markdown("Use this template to prepare your data in the correct format.")

	template_df = get_batch_template_df()

	# Create downloadable Excel template
	output = BytesIO()
	with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
	template_df.to_excel(writer, index=False, sheet_name='Patient Data')
	workbook = writer.book
	worksheet = writer.sheets['Patient Data']

	# Orange-themed header format
	header_format = workbook.add_format({
	'bold': True,
	'bg_color': '#e67e22',
	'font_color': 'white',
	'border': 1
	})
	for col_num, value in enumerate(template_df.columns.values):
	worksheet.write(0, col_num, value, header_format)
	worksheet.set_column(col_num, col_num, 18)

	st.download_button(
	label="⬇️ Download Excel Template",
	data=output.getvalue(),
	file_name="smartwatch_zscore_template.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
	)

	st.markdown("#### Required Columns:")
	st.markdown("""
	\| Column \| Description \| Example \|
	\|--------\|-------------\|---------\|
	\| patient_id \| Unique identifier \| P001 \|
	\| age \| Age in years \| 45 \|
	\| gender \| Man/Woman \| Man \|
	\| region \| Geographic region \| Western Europe \|
	\| bmi \| Body Mass Index \| 24.5 \|
	""")

	st.markdown("#### Biomarker Columns (optional):")
	biomarker_table = "\| Column \| Description \|\n\|--------\|-------------\|\n"
	for code in AVAILABLE_BIOMARKERS:
	label = BIOMARKER_LABELS.get(code, code)
	biomarker_table += f"\| {code} \| {label} \|\n"
	st.markdown(biomarker_table)

	st.markdown("Note: Include only the biomarkers you have data for. Leave cells blank if not measured.")

	with col2:
	st.subheader("📤 Upload Data")

	uploaded_file = st.file_uploader(
	"Choose an Excel or CSV file",
	type=['xlsx', 'xls', 'csv'],
	help="Upload a file with patient data following the template format"
	)

	if uploaded_file is not None:
	try:
	if uploaded_file.name.endswith('.csv'):
	df = pd.read_csv(uploaded_file)
	else:
	df = pd.read_excel(uploaded_file)

	st.success(f"✅ Loaded {len(df)} patient records")

	# Detect available biomarkers in the uploaded data
	detected_biomarkers = [col for col in df.columns if col in AVAILABLE_BIOMARKERS]

	if detected_biomarkers:
	st.markdown(f"Detected biomarkers: {', '.join([BIOMARKER_LABELS.get(b, b) for b in detected_biomarkers])}")
	else:
	st.warning("No recognized biomarker columns found. Please check your column names.")

	with st.expander("Preview uploaded data"):
	st.dataframe(df, use_container_width=True)

	except Exception as e:
	st.error(f"Error reading file: {str(e)}")
	df = None

	st.markdown("---")

	# Processing section
	if uploaded_file is not None and 'df' in dir() and df is not None and normative_df is not None:

	# Biomarker selection
	st.subheader("Select Biomarkers to Analyze")
	detected_biomarkers = [col for col in df.columns if col in AVAILABLE_BIOMARKERS]

	if detected_biomarkers:
	selected_biomarkers = st.multiselect(
	"Choose biomarkers to include in analysis",
	options=detected_biomarkers,
	default=detected_biomarkers,
	format_func=lambda x: BIOMARKER_LABELS.get(x, x)
	)

	if st.button("🔬 Process Batch Data", type="primary"):
	if not selected_biomarkers:
	st.error("Please select at least one biomarker to analyze.")
	else:
	with st.spinner("Processing patient data..."):
	results_df = process_batch_data(df, normative_df, selected_biomarkers)

	st.success("✅ Processing complete!")

	# Results section
	st.subheader("Results")

	# Build display columns dynamically
	base_cols = ['patient_id', 'age', 'gender', 'region', 'bmi']
	display_cols = [c for c in base_cols if c in results_df.columns]

	for bm in selected_biomarkers:
	if bm in results_df.columns:
	display_cols.append(bm)
	if f'{bm}_z' in results_df.columns:
	display_cols.append(f'{bm}_z')
	if f'{bm}_percentile' in results_df.columns:
	display_cols.append(f'{bm}_percentile')
	if f'{bm}_interpretation' in results_df.columns:
	display_cols.append(f'{bm}_interpretation')

	available_cols = [c for c in display_cols if c in results_df.columns]

	# Style function for interpretation columns
	def highlight_interpretation(val):
	if pd.isna(val) or val == 'N/A' or val == 'No data':
	return ''
	val_str = str(val).lower()
	if 'average' in val_str and 'below' not in val_str and 'above' not in val_str:
	return 'background-color: #90EE90' # Green
	elif 'below' in val_str:
	return 'background-color: #87CEEB' # Light blue
	elif 'above' in val_str:
	return 'background-color: #FFD700' # Gold
	elif 'very low' in val_str:
	return 'background-color: #ADD8E6' # Light blue
	elif 'very high' in val_str:
	return 'background-color: #FF6B6B' # Red
	return ''

	# Apply styling to interpretation columns
	interp_cols = [c for c in available_cols if 'interpretation' in c]
	if interp_cols:
	styled_df = results_df[available_cols].style.applymap(
	highlight_interpretation,
	subset=interp_cols
	)
	st.dataframe(styled_df, use_container_width=True)
	else:
	st.dataframe(results_df[available_cols], use_container_width=True)

	# Summary Statistics
	st.subheader("Summary Statistics")

	# Create columns for each biomarker
	if len(selected_biomarkers) > 0:
	cols = st.columns(min(len(selected_biomarkers), 3))

	for idx, bm in enumerate(selected_biomarkers[:3]):
	with cols[idx]:
	st.markdown(f"{BIOMARKER_LABELS.get(bm, bm)}")
	z_col = f'{bm}_z'
	if z_col in results_df.columns:
	# Filter out non-numeric values
	z_values = pd.to_numeric(results_df[z_col], errors='coerce').dropna()
	if len(z_values) > 0:
	st.metric("Mean Z-Score", f"{z_values.mean():.2f}")
	st.metric("Patients Analyzed", len(z_values))

	# Distribution of interpretations
	interp_col = f'{bm}_interpretation'
	if interp_col in results_df.columns:
	interp_counts = results_df[interp_col].value_counts()
	st.bar_chart(interp_counts)

	# Export Results
	st.subheader("📥 Export Results")

	output = BytesIO()
	with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
	results_df.to_excel(writer, index=False, sheet_name='Results')
	workbook = writer.book
	worksheet = writer.sheets['Results']

	# Orange-themed header
	header_format = workbook.add_format({
	'bold': True,
	'bg_color': '#e67e22',
	'font_color': 'white',
	'border': 1
	})
	for col_num, value in enumerate(results_df.columns.values):
	worksheet.write(0, col_num, value, header_format)
	worksheet.set_column(col_num, col_num, 18)

	st.download_button(
	label="⬇️ Download Results as Excel",
	data=output.getvalue(),
	file_name="smartwatch_zscore_results.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
	)
	else:
	st.warning(
	"No recognized biomarker columns found in your data. "
	"Please ensure your columns match the template format."
	)

	# Z-Score Classification Guide
	st.markdown("---")
	with st.expander("📊 Z-Score Classification Guide"):
	st.markdown("""
	How to interpret Z-Scores:

	\| Z-Score Range \| Classification \| Percentile Range \|
	\|:-------------:\|:--------------:\|:----------------:\|
	\| z < -2.0 \| Very Low \| < 2.3% \|
	\| -2.0 ≤ z < -0.5 \| Below Average \| 2.3% - 30.9% \|
	\| -0.5 ≤ z < 0.5 \| Average \| 30.9% - 69.1% \|
	\| 0.5 ≤ z < 2.0 \| Above Average \| 69.1% - 97.7% \|
	\| z ≥ 2.0 \| Very High \| > 97.7% \|

	Context matters:
	- For steps, sleep duration, and active minutes: Higher values are generally better ✓
	- For heart rate: Lower resting values are generally better ✓

	A z-score of 0 means you are exactly at the population average for your demographic group.
	""")

	# Footer
	st.markdown("---")
	st.markdown(
	"*Batch analysis calculates z-scores relative to the Withings normative population, "
	"stratified by region, gender, age group, and BMI category.*"
	)
	st.markdown(
	"Built with ❤️ in Düsseldorf. © Lars Masanneck 2026."
	)