Spaces:
Sleeping
Sleeping
| # app.py (Hugging Face Space friendly) | |
| import os, warnings | |
| warnings.filterwarnings("ignore") | |
| import numpy as np | |
| import pandas as pd | |
| import yfinance as yf | |
| from datetime import datetime, timedelta | |
| import joblib | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import roc_auc_score | |
| import plotly.graph_objects as go | |
| import gradio as gr | |
| # ----- Utilities ----- | |
| def download_data(ticker, period='6y', interval='1d'): | |
| df = yf.download(ticker, period=period, interval=interval, progress=False) | |
| if df is None or df.empty: | |
| raise ValueError(f"No data for {ticker}") | |
| df.index = pd.to_datetime(df.index) | |
| return df.dropna() | |
| def add_features(df): | |
| df = df.copy() | |
| df['AdjClose'] = df['Adj Close'] | |
| df['ret'] = df['AdjClose'].pct_change() | |
| df['logret'] = np.log(df['AdjClose']).diff() | |
| df['ma5'] = df['AdjClose'].rolling(5).mean() | |
| df['ma20'] = df['AdjClose'].rolling(20).mean() | |
| df['vol20'] = df['logret'].rolling(20).std() | |
| delta = df['AdjClose'].diff() | |
| up = delta.clip(lower=0); down = -1*delta.clip(upper=0) | |
| ma_up = up.rolling(14).mean(); ma_down = down.rolling(14).mean() | |
| rs = ma_up / (ma_down + 1e-9) | |
| df['rsi14'] = 100 - (100 / (1 + rs)) | |
| df['mom5'] = df['AdjClose'].pct_change(5) | |
| return df.dropna() | |
| def make_label(df, threshold_pct=-0.10, horizon=30): | |
| closes = df['AdjClose'].values | |
| n = len(closes) | |
| label = np.zeros(n, dtype=int) | |
| for i in range(n): | |
| end = min(n, i + horizon + 1) | |
| future = closes[i+1:end] | |
| if future.size==0: | |
| label[i]=0; continue | |
| minf = np.min(future) | |
| drop = (minf - closes[i]) / closes[i] | |
| if drop <= threshold_pct: | |
| label[i]=1 | |
| df['label']=label | |
| return df | |
| # ----- Training (light) ----- | |
| def train_if_missing(ticker, threshold_pct=-0.10, horizon=30): | |
| model_path = f"models/{ticker}_rf.pkl" | |
| os.makedirs("models", exist_ok=True) | |
| if os.path.exists(model_path): | |
| return model_path | |
| df = download_data(ticker, period='6y') | |
| df = add_features(df) | |
| df = make_label(df, threshold_pct=threshold_pct, horizon=horizon) | |
| features = ['ret','logret','ma5','ma20','vol20','rsi14','mom5'] | |
| df = df.dropna(subset=features+['label']) | |
| X = df[features].values; y = df['label'].values | |
| if len(y) < 250: | |
| # still train but warn | |
| pass | |
| # LIGHTER model for Spaces: fewer trees | |
| clf = RandomForestClassifier(n_estimators=50, random_state=42, n_jobs=-1, class_weight='balanced') | |
| # Use time-ordered split (no shuffle) | |
| split = int(len(X)*0.8) | |
| X_train, y_train = X[:split], y[:split] | |
| clf.fit(X_train, y_train) | |
| joblib.dump({'model':clf, 'features':features}, model_path) | |
| return model_path | |
| # ----- Predict probability ----- | |
| def predict_prob(ticker, threshold_pct_pos, horizon): | |
| ticker = ticker.strip().upper() | |
| threshold = -abs(threshold_pct_pos)/100.0 | |
| model_path = train_if_missing(ticker, threshold_pct=threshold, horizon=horizon) | |
| saved = joblib.load(model_path) | |
| clf = saved['model']; features = saved['features'] | |
| df = download_data(ticker, period='6y') | |
| df = add_features(df) | |
| X_latest = df[features].iloc[-1].values.reshape(1,-1) | |
| prob = float(clf.predict_proba(X_latest)[:,1][0]) | |
| return prob, df | |
| # ----- GBM Monte Carlo (smaller sims default) ----- | |
| def simulate_gbm(S0, mu, sigma, days=252, n_sims=500, seed=0): | |
| np.random.seed(seed) | |
| dt = 1/252 | |
| paths = np.zeros((days+1, n_sims)); paths[0]=S0 | |
| for t in range(1, days+1): | |
| z = np.random.normal(size=n_sims) | |
| paths[t] = paths[t-1] * np.exp((mu - 0.5*sigma**2)*dt + sigma*np.sqrt(dt)*z) | |
| return paths | |
| def build_candles_from_paths(paths, start_date): | |
| median = np.percentile(paths,50,axis=1) | |
| q10 = np.percentile(paths,10,axis=1) | |
| q90 = np.percentile(paths,90,axis=1) | |
| o = median[:-1]; c = median[1:] | |
| h = np.maximum(c, q90[1:]); l = np.minimum(c, q10[1:]) | |
| dates = pd.bdate_range(start=start_date, periods=len(c)) | |
| df = pd.DataFrame({'Open':o, 'High':h, 'Low':l, 'Close':c}, index=dates) | |
| return df | |
| def plot_candles(df): | |
| fig = go.Figure(data=[go.Candlestick(x=df.index, open=df['Open'], high=df['High'], | |
| low=df['Low'], close=df['Close'])]) | |
| fig.update_layout(xaxis_rangeslider_visible=False, height=600) | |
| return fig | |
| # ----- Main function used by Gradio ----- | |
| def run(ticker="RELIANCE.NS", threshold=10.0, horizon=30, sims=500): | |
| try: | |
| prob, df = predict_prob(ticker, threshold, horizon) | |
| except Exception as e: | |
| return None, f"Error: {e}" | |
| # VaR/CVaR simple (historical daily) | |
| returns = df['Adj Close'].pct_change().dropna().values | |
| sorted_ret = np.sort(returns) | |
| idx = max(0, int(0.05*len(sorted_ret))-1) | |
| var = -sorted_ret[idx] | |
| cvar = -sorted_ret[:idx+1].mean() if idx>=0 else -sorted_ret.mean() | |
| # GBM simulate | |
| logrets = np.log(df['Adj Close']).diff().dropna() | |
| mu = float(logrets.mean()*252); sigma = float(logrets.std()*np.sqrt(252)) | |
| S0 = float(df['Adj Close'].iloc[-1]) | |
| sims = int(max(100, min(2000, sims))) | |
| model_paths = simulate_gbm(S0, mu, sigma, days=252, n_sims=sims, seed=1) | |
| start_date = (df.index[-1] + pd.Timedelta(days=1)).normalize() | |
| df_candles = build_candles_from_paths(model_paths, start_date) | |
| fig = plot_candles(df_candles) | |
| summary = (f"Ticker: {ticker}\nThreshold: {threshold}% drop within {horizon} days\n" | |
| f"Predicted prob: {prob*100:.2f}%\nHistorical VaR(5%): {var:.4f}, CVaR: {cvar:.4f}\n" | |
| f"Annual mu: {mu:.4f}, sigma: {sigma:.4f}") | |
| return fig, summary | |
| # ----- Gradio UI ----- | |
| title = "Stock Risk Predictor + 1Y Candle Simulator (Hugging Face Space)" | |
| desc = "Enter ticker (eg RELIANCE.NS). Threshold (percent), horizon days, sims (keep small for hosted Space)." | |
| iface = gr.Interface( | |
| fn=run, | |
| inputs=[gr.Textbox(label="Ticker", value="RELIANCE.NS"), | |
| gr.Number(label="Threshold percent (drop)", value=10.0), | |
| gr.Number(label="Horizon days", value=30, precision=0), | |
| gr.Number(label="Monte Carlo sims (100-2000)", value=500, precision=0)], | |
| outputs=[gr.Plot(label="Simulated 1Y Candles"), gr.Textbox(label="Summary")], | |
| title=title, description=desc, allow_flagging="never", | |
| examples=[["RELIANCE.NS",10,30,500], ["AAPL",15,30,500]] | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |