Spaces:
Sleeping
Sleeping
File size: 6,476 Bytes
c6937ca 63b1da3 c6937ca 63b1da3 c6937ca 63b1da3 c6937ca 63b1da3 c6937ca 63b1da3 c6937ca 63b1da3 c6937ca 63b1da3 c6937ca 63b1da3 c6937ca 63b1da3 c6937ca 63b1da3 c6937ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
# app.py (Hugging Face Space friendly)
import os, warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import plotly.graph_objects as go
import gradio as gr
# ----- Utilities -----
def download_data(ticker, period='6y', interval='1d'):
df = yf.download(ticker, period=period, interval=interval, progress=False)
if df is None or df.empty:
raise ValueError(f"No data for {ticker}")
df.index = pd.to_datetime(df.index)
return df.dropna()
def add_features(df):
df = df.copy()
df['AdjClose'] = df['Adj Close']
df['ret'] = df['AdjClose'].pct_change()
df['logret'] = np.log(df['AdjClose']).diff()
df['ma5'] = df['AdjClose'].rolling(5).mean()
df['ma20'] = df['AdjClose'].rolling(20).mean()
df['vol20'] = df['logret'].rolling(20).std()
delta = df['AdjClose'].diff()
up = delta.clip(lower=0); down = -1*delta.clip(upper=0)
ma_up = up.rolling(14).mean(); ma_down = down.rolling(14).mean()
rs = ma_up / (ma_down + 1e-9)
df['rsi14'] = 100 - (100 / (1 + rs))
df['mom5'] = df['AdjClose'].pct_change(5)
return df.dropna()
def make_label(df, threshold_pct=-0.10, horizon=30):
closes = df['AdjClose'].values
n = len(closes)
label = np.zeros(n, dtype=int)
for i in range(n):
end = min(n, i + horizon + 1)
future = closes[i+1:end]
if future.size==0:
label[i]=0; continue
minf = np.min(future)
drop = (minf - closes[i]) / closes[i]
if drop <= threshold_pct:
label[i]=1
df['label']=label
return df
# ----- Training (light) -----
def train_if_missing(ticker, threshold_pct=-0.10, horizon=30):
model_path = f"models/{ticker}_rf.pkl"
os.makedirs("models", exist_ok=True)
if os.path.exists(model_path):
return model_path
df = download_data(ticker, period='6y')
df = add_features(df)
df = make_label(df, threshold_pct=threshold_pct, horizon=horizon)
features = ['ret','logret','ma5','ma20','vol20','rsi14','mom5']
df = df.dropna(subset=features+['label'])
X = df[features].values; y = df['label'].values
if len(y) < 250:
# still train but warn
pass
# LIGHTER model for Spaces: fewer trees
clf = RandomForestClassifier(n_estimators=50, random_state=42, n_jobs=-1, class_weight='balanced')
# Use time-ordered split (no shuffle)
split = int(len(X)*0.8)
X_train, y_train = X[:split], y[:split]
clf.fit(X_train, y_train)
joblib.dump({'model':clf, 'features':features}, model_path)
return model_path
# ----- Predict probability -----
def predict_prob(ticker, threshold_pct_pos, horizon):
ticker = ticker.strip().upper()
threshold = -abs(threshold_pct_pos)/100.0
model_path = train_if_missing(ticker, threshold_pct=threshold, horizon=horizon)
saved = joblib.load(model_path)
clf = saved['model']; features = saved['features']
df = download_data(ticker, period='6y')
df = add_features(df)
X_latest = df[features].iloc[-1].values.reshape(1,-1)
prob = float(clf.predict_proba(X_latest)[:,1][0])
return prob, df
# ----- GBM Monte Carlo (smaller sims default) -----
def simulate_gbm(S0, mu, sigma, days=252, n_sims=500, seed=0):
np.random.seed(seed)
dt = 1/252
paths = np.zeros((days+1, n_sims)); paths[0]=S0
for t in range(1, days+1):
z = np.random.normal(size=n_sims)
paths[t] = paths[t-1] * np.exp((mu - 0.5*sigma**2)*dt + sigma*np.sqrt(dt)*z)
return paths
def build_candles_from_paths(paths, start_date):
median = np.percentile(paths,50,axis=1)
q10 = np.percentile(paths,10,axis=1)
q90 = np.percentile(paths,90,axis=1)
o = median[:-1]; c = median[1:]
h = np.maximum(c, q90[1:]); l = np.minimum(c, q10[1:])
dates = pd.bdate_range(start=start_date, periods=len(c))
df = pd.DataFrame({'Open':o, 'High':h, 'Low':l, 'Close':c}, index=dates)
return df
def plot_candles(df):
fig = go.Figure(data=[go.Candlestick(x=df.index, open=df['Open'], high=df['High'],
low=df['Low'], close=df['Close'])])
fig.update_layout(xaxis_rangeslider_visible=False, height=600)
return fig
# ----- Main function used by Gradio -----
def run(ticker="RELIANCE.NS", threshold=10.0, horizon=30, sims=500):
try:
prob, df = predict_prob(ticker, threshold, horizon)
except Exception as e:
return None, f"Error: {e}"
# VaR/CVaR simple (historical daily)
returns = df['Adj Close'].pct_change().dropna().values
sorted_ret = np.sort(returns)
idx = max(0, int(0.05*len(sorted_ret))-1)
var = -sorted_ret[idx]
cvar = -sorted_ret[:idx+1].mean() if idx>=0 else -sorted_ret.mean()
# GBM simulate
logrets = np.log(df['Adj Close']).diff().dropna()
mu = float(logrets.mean()*252); sigma = float(logrets.std()*np.sqrt(252))
S0 = float(df['Adj Close'].iloc[-1])
sims = int(max(100, min(2000, sims)))
model_paths = simulate_gbm(S0, mu, sigma, days=252, n_sims=sims, seed=1)
start_date = (df.index[-1] + pd.Timedelta(days=1)).normalize()
df_candles = build_candles_from_paths(model_paths, start_date)
fig = plot_candles(df_candles)
summary = (f"Ticker: {ticker}\nThreshold: {threshold}% drop within {horizon} days\n"
f"Predicted prob: {prob*100:.2f}%\nHistorical VaR(5%): {var:.4f}, CVaR: {cvar:.4f}\n"
f"Annual mu: {mu:.4f}, sigma: {sigma:.4f}")
return fig, summary
# ----- Gradio UI -----
title = "Stock Risk Predictor + 1Y Candle Simulator (Hugging Face Space)"
desc = "Enter ticker (eg RELIANCE.NS). Threshold (percent), horizon days, sims (keep small for hosted Space)."
iface = gr.Interface(
fn=run,
inputs=[gr.Textbox(label="Ticker", value="RELIANCE.NS"),
gr.Number(label="Threshold percent (drop)", value=10.0),
gr.Number(label="Horizon days", value=30, precision=0),
gr.Number(label="Monte Carlo sims (100-2000)", value=500, precision=0)],
outputs=[gr.Plot(label="Simulated 1Y Candles"), gr.Textbox(label="Summary")],
title=title, description=desc, allow_flagging="never",
examples=[["RELIANCE.NS",10,30,500], ["AAPL",15,30,500]]
)
if __name__ == "__main__":
iface.launch()
|