import numpy as np import pandas as pd import streamlit as st import joblib from pathlib import Path # ------------------------------- # Page config # ------------------------------- st.set_page_config( page_title='Rainfall Probability Predictor (LogReg)', page_icon='🌧️', layout='centered' ) st.title('🌧️ Rainfall Probability Predictor') st.write('Predicts the probability of rainfall (0–1) using a Logistic Regression model trained on weather features.') BASE_DIR = Path(__file__).resolve().parent MODEL_PATH = BASE_DIR / 'lr_final_model.pkl' FEATURE_NAMES_PATH = BASE_DIR / 'feature_names.pkl' @st.cache_resource def load_artifacts(): if not MODEL_PATH.exists(): raise FileNotFoundError( f'Model not found: {MODEL_PATH.name}. Put it in the repo root (same folder as app.py).' ) if not FEATURE_NAMES_PATH.exists(): raise FileNotFoundError( f'Feature names not found: {FEATURE_NAMES_PATH.name}. Put it in the repo root (same folder as app.py).' ) model = joblib.load(MODEL_PATH) feature_names = joblib.load(FEATURE_NAMES_PATH) if not isinstance(feature_names, list) or len(feature_names) == 0: raise ValueError('feature_names.pkl must contain a non-empty list of column names.') return model, feature_names def add_features(df: pd.DataFrame) -> pd.DataFrame: df = df.copy() # Temperature variability df['temp_range'] = df['maxtemp'] - df['mintemp'] # Air saturation level df['humidity_gap'] = df['humidity'] - df['dewpoint'] # Sunshine vs clouds df['sunshine_ratio'] = df['sunshine'] / (df['cloud'] + 1) # Wind intensity (simple interaction) df['wind_energy'] = df['windspeed'] * df['winddirection'] # Seasonal pattern df['sin_day'] = np.sin(2 * np.pi * df['day'] / 365) df['cos_day'] = np.cos(2 * np.pi * df['day'] / 365) return df model, feature_names = load_artifacts() st.subheader('Input features') # Note: ranges are generic. If you want, you can set them based on df1.describe(). col1, col2 = st.columns(2) with col1: day = st.number_input('day (1–365)', min_value=1, max_value=365, value=100, step=1) pressure = st.number_input('pressure', value=1013.0, step=0.1) maxtemp = st.number_input('maxtemp', value=20.0, step=0.1) temperature = st.number_input('temperature', value=15.0, step=0.1) mintemp = st.number_input('mintemp', value=10.0, step=0.1) with col2: dewpoint = st.number_input('dewpoint', value=8.0, step=0.1) humidity = st.number_input('humidity', value=70.0, step=0.1) cloud = st.number_input('cloud', value=50.0, step=1.0) sunshine = st.number_input('sunshine', value=5.0, step=0.1) windspeed = st.number_input('windspeed', value=10.0, step=0.1) winddirection = st.number_input('winddirection', value=180.0, step=1.0) # Build one-row dataframe with the ORIGINAL base features input_df = pd.DataFrame([{ 'day': float(day), 'pressure': float(pressure), 'maxtemp': float(maxtemp), 'temparature': float(temperature), 'mintemp': float(mintemp), 'dewpoint': float(dewpoint), 'humidity': float(humidity), 'cloud': float(cloud), 'sunshine': float(sunshine), 'windspeed': float(windspeed), 'winddirection': float(winddirection) }]) # Add engineered features (must match training) input_df = add_features(input_df) # Ensure correct feature order and missing columns safety missing_cols = [c for c in feature_names if c not in input_df.columns] extra_cols = [c for c in input_df.columns if c not in feature_names] if missing_cols: st.error(f'Missing required feature columns: {missing_cols}') st.stop() # Keep only the expected columns in the correct order X = input_df[feature_names].copy() st.divider() if st.button('Predict rainfall probability'): try: proba = float(model.predict_proba(X)[:, 1][0]) st.metric('Rainfall probability', f'{proba:.3f}', delta=None) st.progress(min(max(proba, 0.0), 1.0)) if proba >= 0.7: st.success('High chance of rainfall.') elif proba >= 0.4: st.warning('Medium chance of rainfall.') else: st.info('Low chance of rainfall.') with st.expander('Show model input (debug)'): st.write('Used feature columns (ordered):') st.write(feature_names) st.dataframe(X) if extra_cols: st.caption(f'Note: These columns were ignored (not in feature_names): {extra_cols}') except Exception as e: st.error(f'Prediction failed: {e}')