|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import streamlit as st |
|
|
import joblib |
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.set_page_config( |
|
|
page_title='Rainfall Probability Predictor (LogReg)', |
|
|
page_icon='🌧️', |
|
|
layout='centered' |
|
|
) |
|
|
|
|
|
st.title('🌧️ Rainfall Probability Predictor') |
|
|
st.write('Predicts the probability of rainfall (0–1) using a Logistic Regression model trained on weather features.') |
|
|
|
|
|
BASE_DIR = Path(__file__).resolve().parent |
|
|
MODEL_PATH = BASE_DIR / 'lr_final_model.pkl' |
|
|
FEATURE_NAMES_PATH = BASE_DIR / 'feature_names.pkl' |
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def load_artifacts(): |
|
|
if not MODEL_PATH.exists(): |
|
|
raise FileNotFoundError( |
|
|
f'Model not found: {MODEL_PATH.name}. Put it in the repo root (same folder as app.py).' |
|
|
) |
|
|
if not FEATURE_NAMES_PATH.exists(): |
|
|
raise FileNotFoundError( |
|
|
f'Feature names not found: {FEATURE_NAMES_PATH.name}. Put it in the repo root (same folder as app.py).' |
|
|
) |
|
|
|
|
|
model = joblib.load(MODEL_PATH) |
|
|
feature_names = joblib.load(FEATURE_NAMES_PATH) |
|
|
|
|
|
if not isinstance(feature_names, list) or len(feature_names) == 0: |
|
|
raise ValueError('feature_names.pkl must contain a non-empty list of column names.') |
|
|
|
|
|
return model, feature_names |
|
|
|
|
|
|
|
|
def add_features(df: pd.DataFrame) -> pd.DataFrame: |
|
|
df = df.copy() |
|
|
|
|
|
|
|
|
df['temp_range'] = df['maxtemp'] - df['mintemp'] |
|
|
|
|
|
df['humidity_gap'] = df['humidity'] - df['dewpoint'] |
|
|
|
|
|
df['sunshine_ratio'] = df['sunshine'] / (df['cloud'] + 1) |
|
|
|
|
|
df['wind_energy'] = df['windspeed'] * df['winddirection'] |
|
|
|
|
|
df['sin_day'] = np.sin(2 * np.pi * df['day'] / 365) |
|
|
df['cos_day'] = np.cos(2 * np.pi * df['day'] / 365) |
|
|
|
|
|
return df |
|
|
|
|
|
|
|
|
model, feature_names = load_artifacts() |
|
|
|
|
|
st.subheader('Input features') |
|
|
|
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
|
|
with col1: |
|
|
day = st.number_input('day (1–365)', min_value=1, max_value=365, value=100, step=1) |
|
|
pressure = st.number_input('pressure', value=1013.0, step=0.1) |
|
|
maxtemp = st.number_input('maxtemp', value=20.0, step=0.1) |
|
|
temperature = st.number_input('temperature', value=15.0, step=0.1) |
|
|
mintemp = st.number_input('mintemp', value=10.0, step=0.1) |
|
|
|
|
|
with col2: |
|
|
dewpoint = st.number_input('dewpoint', value=8.0, step=0.1) |
|
|
humidity = st.number_input('humidity', value=70.0, step=0.1) |
|
|
cloud = st.number_input('cloud', value=50.0, step=1.0) |
|
|
sunshine = st.number_input('sunshine', value=5.0, step=0.1) |
|
|
windspeed = st.number_input('windspeed', value=10.0, step=0.1) |
|
|
winddirection = st.number_input('winddirection', value=180.0, step=1.0) |
|
|
|
|
|
|
|
|
input_df = pd.DataFrame([{ |
|
|
'day': float(day), |
|
|
'pressure': float(pressure), |
|
|
'maxtemp': float(maxtemp), |
|
|
'temparature': float(temperature), |
|
|
'mintemp': float(mintemp), |
|
|
'dewpoint': float(dewpoint), |
|
|
'humidity': float(humidity), |
|
|
'cloud': float(cloud), |
|
|
'sunshine': float(sunshine), |
|
|
'windspeed': float(windspeed), |
|
|
'winddirection': float(winddirection) |
|
|
}]) |
|
|
|
|
|
|
|
|
|
|
|
input_df = add_features(input_df) |
|
|
|
|
|
|
|
|
missing_cols = [c for c in feature_names if c not in input_df.columns] |
|
|
extra_cols = [c for c in input_df.columns if c not in feature_names] |
|
|
|
|
|
if missing_cols: |
|
|
st.error(f'Missing required feature columns: {missing_cols}') |
|
|
st.stop() |
|
|
|
|
|
|
|
|
X = input_df[feature_names].copy() |
|
|
|
|
|
st.divider() |
|
|
|
|
|
if st.button('Predict rainfall probability'): |
|
|
try: |
|
|
proba = float(model.predict_proba(X)[:, 1][0]) |
|
|
st.metric('Rainfall probability', f'{proba:.3f}', delta=None) |
|
|
st.progress(min(max(proba, 0.0), 1.0)) |
|
|
|
|
|
if proba >= 0.7: |
|
|
st.success('High chance of rainfall.') |
|
|
elif proba >= 0.4: |
|
|
st.warning('Medium chance of rainfall.') |
|
|
else: |
|
|
st.info('Low chance of rainfall.') |
|
|
|
|
|
with st.expander('Show model input (debug)'): |
|
|
st.write('Used feature columns (ordered):') |
|
|
st.write(feature_names) |
|
|
st.dataframe(X) |
|
|
|
|
|
if extra_cols: |
|
|
st.caption(f'Note: These columns were ignored (not in feature_names): {extra_cols}') |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f'Prediction failed: {e}') |