File size: 6,476 Bytes
c6937ca
 
 
63b1da3
c6937ca
63b1da3
c6937ca
 
 
 
 
 
63b1da3
c6937ca
63b1da3
c6937ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63b1da3
c6937ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63b1da3
c6937ca
 
 
 
 
63b1da3
c6937ca
 
63b1da3
c6937ca
63b1da3
c6937ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63b1da3
 
c6937ca
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# app.py (Hugging Face Space friendly)
import os, warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import plotly.graph_objects as go
import gradio as gr

# ----- Utilities -----
def download_data(ticker, period='6y', interval='1d'):
    df = yf.download(ticker, period=period, interval=interval, progress=False)
    if df is None or df.empty:
        raise ValueError(f"No data for {ticker}")
    df.index = pd.to_datetime(df.index)
    return df.dropna()

def add_features(df):
    df = df.copy()
    df['AdjClose'] = df['Adj Close']
    df['ret'] = df['AdjClose'].pct_change()
    df['logret'] = np.log(df['AdjClose']).diff()
    df['ma5'] = df['AdjClose'].rolling(5).mean()
    df['ma20'] = df['AdjClose'].rolling(20).mean()
    df['vol20'] = df['logret'].rolling(20).std()
    delta = df['AdjClose'].diff()
    up = delta.clip(lower=0); down = -1*delta.clip(upper=0)
    ma_up = up.rolling(14).mean(); ma_down = down.rolling(14).mean()
    rs = ma_up / (ma_down + 1e-9)
    df['rsi14'] = 100 - (100 / (1 + rs))
    df['mom5'] = df['AdjClose'].pct_change(5)
    return df.dropna()

def make_label(df, threshold_pct=-0.10, horizon=30):
    closes = df['AdjClose'].values
    n = len(closes)
    label = np.zeros(n, dtype=int)
    for i in range(n):
        end = min(n, i + horizon + 1)
        future = closes[i+1:end]
        if future.size==0:
            label[i]=0; continue
        minf = np.min(future)
        drop = (minf - closes[i]) / closes[i]
        if drop <= threshold_pct:
            label[i]=1
    df['label']=label
    return df

# ----- Training (light) -----
def train_if_missing(ticker, threshold_pct=-0.10, horizon=30):
    model_path = f"models/{ticker}_rf.pkl"
    os.makedirs("models", exist_ok=True)
    if os.path.exists(model_path):
        return model_path
    df = download_data(ticker, period='6y')
    df = add_features(df)
    df = make_label(df, threshold_pct=threshold_pct, horizon=horizon)
    features = ['ret','logret','ma5','ma20','vol20','rsi14','mom5']
    df = df.dropna(subset=features+['label'])
    X = df[features].values; y = df['label'].values
    if len(y) < 250:
        # still train but warn
        pass
    # LIGHTER model for Spaces: fewer trees
    clf = RandomForestClassifier(n_estimators=50, random_state=42, n_jobs=-1, class_weight='balanced')
    # Use time-ordered split (no shuffle)
    split = int(len(X)*0.8)
    X_train, y_train = X[:split], y[:split]
    clf.fit(X_train, y_train)
    joblib.dump({'model':clf, 'features':features}, model_path)
    return model_path

# ----- Predict probability -----
def predict_prob(ticker, threshold_pct_pos, horizon):
    ticker = ticker.strip().upper()
    threshold = -abs(threshold_pct_pos)/100.0
    model_path = train_if_missing(ticker, threshold_pct=threshold, horizon=horizon)
    saved = joblib.load(model_path)
    clf = saved['model']; features = saved['features']
    df = download_data(ticker, period='6y')
    df = add_features(df)
    X_latest = df[features].iloc[-1].values.reshape(1,-1)
    prob = float(clf.predict_proba(X_latest)[:,1][0])
    return prob, df

# ----- GBM Monte Carlo (smaller sims default) -----
def simulate_gbm(S0, mu, sigma, days=252, n_sims=500, seed=0):
    np.random.seed(seed)
    dt = 1/252
    paths = np.zeros((days+1, n_sims)); paths[0]=S0
    for t in range(1, days+1):
        z = np.random.normal(size=n_sims)
        paths[t] = paths[t-1] * np.exp((mu - 0.5*sigma**2)*dt + sigma*np.sqrt(dt)*z)
    return paths

def build_candles_from_paths(paths, start_date):
    median = np.percentile(paths,50,axis=1)
    q10 = np.percentile(paths,10,axis=1)
    q90 = np.percentile(paths,90,axis=1)
    o = median[:-1]; c = median[1:]
    h = np.maximum(c, q90[1:]); l = np.minimum(c, q10[1:])
    dates = pd.bdate_range(start=start_date, periods=len(c))
    df = pd.DataFrame({'Open':o, 'High':h, 'Low':l, 'Close':c}, index=dates)
    return df

def plot_candles(df):
    fig = go.Figure(data=[go.Candlestick(x=df.index, open=df['Open'], high=df['High'],
                                         low=df['Low'], close=df['Close'])])
    fig.update_layout(xaxis_rangeslider_visible=False, height=600)
    return fig

# ----- Main function used by Gradio -----
def run(ticker="RELIANCE.NS", threshold=10.0, horizon=30, sims=500):
    try:
        prob, df = predict_prob(ticker, threshold, horizon)
    except Exception as e:
        return None, f"Error: {e}"
    # VaR/CVaR simple (historical daily)
    returns = df['Adj Close'].pct_change().dropna().values
    sorted_ret = np.sort(returns)
    idx = max(0, int(0.05*len(sorted_ret))-1)
    var = -sorted_ret[idx]
    cvar = -sorted_ret[:idx+1].mean() if idx>=0 else -sorted_ret.mean()
    # GBM simulate
    logrets = np.log(df['Adj Close']).diff().dropna()
    mu = float(logrets.mean()*252); sigma = float(logrets.std()*np.sqrt(252))
    S0 = float(df['Adj Close'].iloc[-1])
    sims = int(max(100, min(2000, sims)))
    model_paths = simulate_gbm(S0, mu, sigma, days=252, n_sims=sims, seed=1)
    start_date = (df.index[-1] + pd.Timedelta(days=1)).normalize()
    df_candles = build_candles_from_paths(model_paths, start_date)
    fig = plot_candles(df_candles)
    summary = (f"Ticker: {ticker}\nThreshold: {threshold}% drop within {horizon} days\n"
               f"Predicted prob: {prob*100:.2f}%\nHistorical VaR(5%): {var:.4f}, CVaR: {cvar:.4f}\n"
               f"Annual mu: {mu:.4f}, sigma: {sigma:.4f}")
    return fig, summary

# ----- Gradio UI -----
title = "Stock Risk Predictor + 1Y Candle Simulator (Hugging Face Space)"
desc = "Enter ticker (eg RELIANCE.NS). Threshold (percent), horizon days, sims (keep small for hosted Space)."

iface = gr.Interface(
    fn=run,
    inputs=[gr.Textbox(label="Ticker", value="RELIANCE.NS"),
            gr.Number(label="Threshold percent (drop)", value=10.0),
            gr.Number(label="Horizon days", value=30, precision=0),
            gr.Number(label="Monte Carlo sims (100-2000)", value=500, precision=0)],
    outputs=[gr.Plot(label="Simulated 1Y Candles"), gr.Textbox(label="Summary")],
    title=title, description=desc, allow_flagging="never",
    examples=[["RELIANCE.NS",10,30,500], ["AAPL",15,30,500]]
)

if __name__ == "__main__":
    iface.launch()