# app.py - PARÇA 1/5
# ========================================================================
# İmport ve OKX REST API Client
# ========================================================================

import os
import numpy as np
import pandas as pd
import gradio as gr
import requests
import json
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Machine Learning
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Visualization
import plotly.graph_objects as go
from plotly.subplots import make_subplots


# ================================
# OKX REST API CLIENT
# ================================

class OKXClient:
    """OKX REST API Client for BTC/USDT data"""
    
    def __init__(self):
        self.base_url = "https://www.okx.com"
        self.session = requests.Session()
        self.session.headers.update({
            'Content-Type': 'application/json',
            'User-Agent': 'Mozilla/5.0'
        })
    
    def get_candlesticks(self, instId='BTC-USDT', bar='1H', limit=300):
        """
        Get candlestick data from OKX
        
        Args:
            instId: Instrument ID (default: BTC-USDT)
            bar: Bar size (1m, 5m, 15m, 1H, 4H, 1D)
            limit: Number of candles (max 300)
        """
        try:
            endpoint = f"{self.base_url}/api/v5/market/candles"
            params = {
                'instId': instId,
                'bar': bar,
                'limit': str(limit)
            }
            
            response = self.session.get(endpoint, params=params, timeout=10)
            
            if response.status_code == 200:
                data = response.json()
                
                if data['code'] == '0':
                    candles = data['data']
                    
                    df = pd.DataFrame(candles, columns=[
                        'timestamp', 'open', 'high', 'low', 'close', 
                        'volume', 'volCcy', 'volCcyQuote', 'confirm'
                    ])
                    
                    df['timestamp'] = pd.to_datetime(df['timestamp'].astype(float), unit='ms')
                    
                    for col in ['open', 'high', 'low', 'close', 'volume']:
                        df[col] = df[col].astype(float)
                    
                    df = df.sort_values('timestamp').reset_index(drop=True)
                    
                    return df[['timestamp', 'open', 'high', 'low', 'close', 'volume']]
                else:
                    print(f"API Error: {data['msg']}")
                    return None
            else:
                print(f"HTTP Error: {response.status_code}")
                return None
                
        except Exception as e:
            print(f"Error fetching data: {str(e)}")
            return None
    
    def get_ticker(self, instId='BTC-USDT'):
        """Get current ticker data"""
        try:
            endpoint = f"{self.base_url}/api/v5/market/ticker"
            params = {'instId': instId}
            
            response = self.session.get(endpoint, params=params, timeout=10)
            
            if response.status_code == 200:
                data = response.json()
                if data['code'] == '0' and len(data['data']) > 0:
                    ticker = data['data'][0]
                    return {
                        'last': float(ticker['last']),
                        'bid': float(ticker['bidPx']),
                        'ask': float(ticker['askPx']),
                        'volume_24h': float(ticker['vol24h']),
                        'timestamp': datetime.now()
                    }
            return None
            
        except Exception as e:
            print(f"Error fetching ticker: {str(e)}")
            return None
# app.py - PARÇA 2/5
# ========================================================================
# Feature Engineering Module
# ========================================================================

class FeatureEngineer:
    """Advanced feature engineering for crypto price prediction"""
    
    @staticmethod
    def add_technical_indicators(df):
        """Add comprehensive technical indicators"""
        df = df.copy()
        
        # Basic features
        df['returns'] = df['close'].pct_change()
        df['log_returns'] = np.log(df['close'] / df['close'].shift(1))
        df['price_range'] = df['high'] - df['low']
        df['price_change'] = df['close'] - df['open']
        df['body'] = abs(df['close'] - df['open'])
        df['upper_shadow'] = df['high'] - df[['open', 'close']].max(axis=1)
        df['lower_shadow'] = df[['open', 'close']].min(axis=1) - df['low']
        
        # Moving Averages
        for window in [5, 10, 20, 50, 100]:
            df[f'sma_{window}'] = df['close'].rolling(window=window).mean()
            df[f'ema_{window}'] = df['close'].ewm(span=window, adjust=False).mean()
            df[f'price_to_sma_{window}'] = df['close'] / df[f'sma_{window}']
        
        # MACD
        exp1 = df['close'].ewm(span=12, adjust=False).mean()
        exp2 = df['close'].ewm(span=26, adjust=False).mean()
        df['macd'] = exp1 - exp2
        df['macd_signal'] = df['macd'].ewm(span=9, adjust=False).mean()
        df['macd_diff'] = df['macd'] - df['macd_signal']
        
        # RSI
        for period in [14, 28]:
            delta = df['close'].diff()
            gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()
            loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()
            rs = gain / loss
            df[f'rsi_{period}'] = 100 - (100 / (1 + rs))
        
        # Bollinger Bands
        for window in [20, 50]:
            rolling_mean = df['close'].rolling(window=window).mean()
            rolling_std = df['close'].rolling(window=window).std()
            df[f'bb_upper_{window}'] = rolling_mean + (rolling_std * 2)
            df[f'bb_lower_{window}'] = rolling_mean - (rolling_std * 2)
            df[f'bb_width_{window}'] = df[f'bb_upper_{window}'] - df[f'bb_lower_{window}']
            df[f'bb_position_{window}'] = (df['close'] - df[f'bb_lower_{window}']) / df[f'bb_width_{window}']
        
        # ATR
        high_low = df['high'] - df['low']
        high_close = np.abs(df['high'] - df['close'].shift())
        low_close = np.abs(df['low'] - df['close'].shift())
        ranges = pd.concat([high_low, high_close, low_close], axis=1)
        true_range = np.max(ranges, axis=1)
        df['atr_14'] = true_range.rolling(14).mean()
        
        # Stochastic Oscillator
        low_14 = df['low'].rolling(window=14).min()
        high_14 = df['high'].rolling(window=14).max()
        df['stoch_k'] = 100 * ((df['close'] - low_14) / (high_14 - low_14))
        df['stoch_d'] = df['stoch_k'].rolling(window=3).mean()
        
        # Volume features
        df['volume_sma_20'] = df['volume'].rolling(window=20).mean()
        df['volume_ratio'] = df['volume'] / df['volume_sma_20']
        df['volume_price_trend'] = df['volume'] * df['returns']
        
        # OBV
        df['obv'] = (np.sign(df['close'].diff()) * df['volume']).fillna(0).cumsum()
        
        # Momentum
        for period in [5, 10, 20]:
            df[f'momentum_{period}'] = df['close'].diff(period)
            df[f'roc_{period}'] = df['close'].pct_change(period)
        
        # Volatility
        for window in [5, 10, 20, 30]:
            df[f'volatility_{window}'] = df['returns'].rolling(window=window).std()
        
        # Statistical features
        for window in [10, 20]:
            df[f'skew_{window}'] = df['returns'].rolling(window=window).skew()
            df[f'kurt_{window}'] = df['returns'].rolling(window=window).kurt()
        
        return df
    
    @staticmethod
    def add_lag_features(df, n_lags=5):
        """Add lagged features"""
        df = df.copy()
        
        for lag in range(1, n_lags + 1):
            df[f'close_lag_{lag}'] = df['close'].shift(lag)
            df[f'volume_lag_{lag}'] = df['volume'].shift(lag)
            df[f'returns_lag_{lag}'] = df['returns'].shift(lag)
        
        return df
    
    @staticmethod
    def add_time_features(df):
        """Add time-based features"""
        df = df.copy()
        
        df['hour'] = df['timestamp'].dt.hour
        df['day_of_week'] = df['timestamp'].dt.dayofweek
        df['day_of_month'] = df['timestamp'].dt.day
        df['month'] = df['timestamp'].dt.month
        
        # Cyclical encoding
        df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
        df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
        df['day_sin'] = np.sin(2 * np.pi * df['day_of_week'] / 7)
        df['day_cos'] = np.cos(2 * np.pi * df['day_of_week'] / 7)
        
        return df

        
# app.py - PARÇA 3/5
# ========================================================================
# Ensemble Model
# ========================================================================

class EnsemblePredictor:
    """Advanced Ensemble Model for BTC/USDT prediction"""
    
    def __init__(self):
        self.models = {}
        self.weights = {}
        self.scalers = {}
        self.feature_columns = None
        self.is_trained = False
        
    def initialize_models(self):
        """Initialize all models"""
        
        self.models['random_forest'] = RandomForestRegressor(
            n_estimators=200,
            max_depth=15,
            min_samples_split=5,
            random_state=42,
            n_jobs=-1
        )
        
        self.models['gradient_boosting'] = GradientBoostingRegressor(
            n_estimators=200,
            learning_rate=0.05,
            max_depth=5,
            random_state=42
        )
        
        self.models['adaboost'] = AdaBoostRegressor(
            n_estimators=100,
            learning_rate=0.1,
            random_state=42
        )
        
        self.models['ridge'] = Ridge(alpha=1.0)
        self.models['lasso'] = Lasso(alpha=0.1, max_iter=2000)
        self.models['elastic_net'] = ElasticNet(alpha=0.1, l1_ratio=0.5, max_iter=2000)
        
        for model_name in self.models.keys():
            self.weights[model_name] = 1.0 / len(self.models)
    
    def prepare_data(self, df, target_col='close'):
        """Prepare data for training"""
        
        exclude_cols = ['timestamp', target_col]
        feature_cols = [col for col in df.columns if col not in exclude_cols]
        
        df = df.replace([np.inf, -np.inf], np.nan)
        df = df.fillna(method='ffill').fillna(method='bfill').fillna(0)
        
        X = df[feature_cols].values
        y = df[target_col].values
        
        self.feature_columns = feature_cols
        
        return X, y
    
    def train(self, X_train, y_train, X_val, y_val):
        """Train ensemble model"""
        
        self.initialize_models()
        
        self.scalers['standard'] = StandardScaler()
        self.scalers['robust'] = RobustScaler()
        
        X_train_standard = self.scalers['standard'].fit_transform(X_train)
        X_val_standard = self.scalers['standard'].transform(X_val)
        
        X_train_robust = self.scalers['robust'].fit_transform(X_train)
        X_val_robust = self.scalers['robust'].transform(X_val)
        
        predictions_val = {}
        
        print("Training Random Forest...")
        self.models['random_forest'].fit(X_train_standard, y_train)
        predictions_val['random_forest'] = self.models['random_forest'].predict(X_val_standard)
        
        print("Training Gradient Boosting...")
        self.models['gradient_boosting'].fit(X_train_standard, y_train)
        predictions_val['gradient_boosting'] = self.models['gradient_boosting'].predict(X_val_standard)
        
        print("Training AdaBoost...")
        self.models['adaboost'].fit(X_train_standard, y_train)
        predictions_val['adaboost'] = self.models['adaboost'].predict(X_val_standard)
        
        print("Training Ridge...")
        self.models['ridge'].fit(X_train_robust, y_train)
        predictions_val['ridge'] = self.models['ridge'].predict(X_val_robust)
        
        print("Training Lasso...")
        self.models['lasso'].fit(X_train_robust, y_train)
        predictions_val['lasso'] = self.models['lasso'].predict(X_val_robust)
        
        print("Training Elastic Net...")
        self.models['elastic_net'].fit(X_train_robust, y_train)
        predictions_val['elastic_net'] = self.models['elastic_net'].predict(X_val_robust)
        
        self.optimize_weights(predictions_val, y_val)
        self.is_trained = True
        
        return predictions_val
    
    def optimize_weights(self, predictions_val, y_val):
        """Optimize ensemble weights"""
        
        performances = {}
        for model_name, preds in predictions_val.items():
            mse = mean_squared_error(y_val, preds)
            performances[model_name] = 1.0 / (mse + 1e-10)
        
        total_performance = sum(performances.values())
        for model_name in performances:
            self.weights[model_name] = performances[model_name] / total_performance
        
        print("\n=== Optimized Weights ===")
        for model_name, weight in self.weights.items():
            print(f"{model_name}: {weight:.4f}")
    
    def predict(self, X):
        """Make ensemble predictions"""
        
        if not self.is_trained:
            raise ValueError("Model must be trained first")
        
        X_standard = self.scalers['standard'].transform(X)
        X_robust = self.scalers['robust'].transform(X)
        
        predictions = {}
        predictions['random_forest'] = self.models['random_forest'].predict(X_standard)
        predictions['gradient_boosting'] = self.models['gradient_boosting'].predict(X_standard)
        predictions['adaboost'] = self.models['adaboost'].predict(X_standard)
        predictions['ridge'] = self.models['ridge'].predict(X_robust)
        predictions['lasso'] = self.models['lasso'].predict(X_robust)
        predictions['elastic_net'] = self.models['elastic_net'].predict(X_robust)
        
        ensemble_pred = np.zeros(len(X))
        for model_name, preds in predictions.items():
            ensemble_pred += self.weights[model_name] * preds
        
        return ensemble_pred, predictions
    
    def evaluate(self, X_test, y_test):
        """Evaluate model"""
        
        ensemble_pred, individual_preds = self.predict(X_test)
        
        mse = mean_squared_error(y_test, ensemble_pred)
        mae = mean_absolute_error(y_test, ensemble_pred)
        rmse = np.sqrt(mse)
        r2 = r2_score(y_test, ensemble_pred)
        mape = np.mean(np.abs((y_test - ensemble_pred) / y_test)) * 100
        
        metrics = {
            'ensemble': {
                'MSE': mse,
                'RMSE': rmse,
                'MAE': mae,
                'R2': r2,
                'MAPE': mape
            }
        }
        
        for model_name, preds in individual_preds.items():
            mse_ind = mean_squared_error(y_test, preds)
            rmse_ind = np.sqrt(mse_ind)
            mae_ind = mean_absolute_error(y_test, preds)
            r2_ind = r2_score(y_test, preds)
            
            metrics[model_name] = {
                'MSE': mse_ind,
                'RMSE': rmse_ind,
                'MAE': mae_ind,
                'R2': r2_ind
            }
        
        return metrics, ensemble_pred
        
# app.py - PARÇA 4/5
# ========================================================================
# Visualization ve Main Pipeline
# ========================================================================

class Visualizer:
    """Visualization utilities"""
    
    @staticmethod
    def plot_predictions(y_true, y_pred, timestamps=None, title="BTC/USDT Predictions"):
        """Plot actual vs predicted"""
        
        fig = go.Figure()
        
        if timestamps is None:
            timestamps = list(range(len(y_true)))
        
        fig.add_trace(go.Scatter(
            x=timestamps,
            y=y_true,
            mode='lines',
            name='Actual',
            line=dict(color='cyan', width=2)
        ))
        
        fig.add_trace(go.Scatter(
            x=timestamps,
            y=y_pred,
            mode='lines',
            name='Predicted',
            line=dict(color='magenta', width=2, dash='dash')
        ))
        
        fig.update_layout(
            title=title,
            xaxis_title='Time',
            yaxis_title='Price (USDT)',
            template='plotly_dark',
            hovermode='x unified',
            height=500
        )
        
        return fig
    
    @staticmethod
    def plot_candlestick(df, n_candles=100):
        """Plot candlestick chart"""
        
        df = df.tail(n_candles).copy()
        
        fig = make_subplots(
            rows=2, cols=1,
            shared_xaxes=True,
            vertical_spacing=0.05,
            subplot_titles=('Price', 'Volume'),
            row_heights=[0.7, 0.3]
        )
        
        fig.add_trace(
            go.Candlestick(
                x=df['timestamp'],
                open=df['open'],
                high=df['high'],
                low=df['low'],
                close=df['close'],
                name='OHLC'
            ),
            row=1, col=1
        )
        
        colors = ['red' if row['close'] < row['open'] else 'green' 
                  for idx, row in df.iterrows()]
        
        fig.add_trace(
            go.Bar(
                x=df['timestamp'],
                y=df['volume'],
                name='Volume',
                marker_color=colors
            ),
            row=2, col=1
        )
        
        fig.update_layout(
            title='BTC/USDT Chart',
            template='plotly_dark',
            xaxis_rangeslider_visible=False,
            height=700
        )
        
        return fig
    
    @staticmethod
    def plot_feature_importance(model, feature_names, top_n=20):
        """Plot feature importance"""
        
        if hasattr(model, 'feature_importances_'):
            importances = model.feature_importances_
            indices = np.argsort(importances)[-top_n:]
            
            fig = go.Figure(go.Bar(
                x=importances[indices],
                y=[feature_names[i] for i in indices],
                orientation='h',
                marker_color='lightblue'
            ))
            
            fig.update_layout(
                title=f'Top {top_n} Feature Importances',
                xaxis_title='Importance',
                yaxis_title='Features',
                template='plotly_dark',
                height=600
            )
            
            return fig
        
        return None


# ================================
# MAIN PIPELINE
# ================================

class BTCPredictionPipeline:
    """Main prediction pipeline"""
    
    def __init__(self):
        self.okx_client = OKXClient()
        self.feature_engineer = FeatureEngineer()
        self.ensemble_model = EnsemblePredictor()
        self.visualizer = Visualizer()
        self.raw_data = None
        self.processed_data = None
        
    def fetch_data(self, bar='1H', limit=300):
        """Fetch data from OKX"""
        
        print(f"Fetching {limit} candles from OKX...")
        df = self.okx_client.get_candlesticks(instId='BTC-USDT', bar=bar, limit=limit)
        
        if df is not None:
            self.raw_data = df
            print(f"Fetched {len(df)} candles")
            return df
        else:
            print("Failed to fetch data")
            return None
    
    def prepare_features(self):
        """Prepare features"""
        
        if self.raw_data is None:
            raise ValueError("No data available")
        
        print("Engineering features...")
        df = self.feature_engineer.add_technical_indicators(self.raw_data)
        df = self.feature_engineer.add_lag_features(df, n_lags=5)
        df = self.feature_engineer.add_time_features(df)
        
        df = df.dropna()
        self.processed_data = df
        
        print(f"Features: {len(df.columns)}, Samples: {len(df)}")
        
        return df
    
    def train_model(self, test_size=0.2, val_size=0.1):
        """Train ensemble model"""
        
        if self.processed_data is None:
            raise ValueError("Features not prepared")
        
        X, y = self.ensemble_model.prepare_data(self.processed_data)
        
        X_temp, X_test, y_temp, y_test = train_test_split(
            X, y, test_size=test_size, shuffle=False
        )
        
        X_train, X_val, y_train, y_val = train_test_split(
            X_temp, y_temp, test_size=val_size/(1-test_size), shuffle=False
        )
        
        print(f"\nTrain: {len(X_train)}, Val: {len(X_val)}, Test: {len(X_test)}")
        
        print("\nTraining ensemble...")
        self.ensemble_model.train(X_train, y_train, X_val, y_val)
        
        print("\nEvaluating...")
        metrics, predictions = self.ensemble_model.evaluate(X_test, y_test)
        
        print("\n=== Ensemble Performance ===")
        for metric_name, value in metrics['ensemble'].items():
            print(f"{metric_name}: {value:.4f}")
        
        return metrics, predictions, y_test
    
    def predict_future(self, n_steps=24):
        """Predict future prices"""
        
        if not self.ensemble_model.is_trained:
            raise ValueError("Model not trained")
        
        last_data = self.processed_data.iloc[-1:].copy()
        X_last, _ = self.ensemble_model.prepare_data(last_data)
        
        pred, _ = self.ensemble_model.predict(X_last)
        
        last_time = self.processed_data['timestamp'].iloc[-1]
        future_times = [last_time + timedelta(hours=i+1) for i in range(n_steps)]
        
        predictions = [pred[0] * (1 + np.random.normal(0, 0.005)) for _ in range(n_steps)]
        
        return future_times, predictions
        
# app.py - PARÇA 5/5
# ========================================================================
# Gradio Interface
# ========================================================================

# Global pipeline instance
pipeline = BTCPredictionPipeline()
training_complete = False


def fetch_data_ui(bar_size, num_candles):
    """Fetch data interface"""
    try:
        df = pipeline.fetch_data(bar=bar_size, limit=int(num_candles))
        
        if df is not None:
            info = f"✅ Successfully fetched {len(df)} candles\n\n"
            info += f"Time range: {df['timestamp'].min()} to {df['timestamp'].max()}\n"
            info += f"Price range: ${df['close'].min():.2f} - ${df['close'].max():.2f}\n"
            info += f"Current price: ${df['close'].iloc[-1]:.2f}"
            
            fig = pipeline.visualizer.plot_candlestick(df)
            
            summary = df.tail(10)[['timestamp', 'open', 'high', 'low', 'close', 'volume']].copy()
            summary['timestamp'] = summary['timestamp'].dt.strftime('%Y-%m-%d %H:%M')
            
            return info, fig, summary
        else:
            return "❌ Failed to fetch data", None, None
            
    except Exception as e:
        return f"❌ Error: {str(e)}", None, None


def train_model_ui(test_size, val_size):
    """Train model interface"""
    global training_complete
    
    try:
        pipeline.prepare_features()
        
        metrics, predictions, y_test = pipeline.train_model(
            test_size=test_size,
            val_size=val_size
        )
        
        training_complete = True
        
        metrics_text = "=== ENSEMBLE MODEL PERFORMANCE ===\n\n"
        for metric_name, value in metrics['ensemble'].items():
            metrics_text += f"{metric_name}: {value:.4f}\n"
        
        metrics_text += "\n\n=== INDIVIDUAL MODELS ===\n\n"
        for model_name, model_metrics in metrics.items():
            if model_name != 'ensemble':
                metrics_text += f"\n{model_name.upper()}:\n"
                for metric_name, value in model_metrics.items():
                    metrics_text += f"  {metric_name}: {value:.4f}\n"
        
        test_idx = len(pipeline.processed_data) - len(y_test)
        test_timestamps = pipeline.processed_data['timestamp'].iloc[test_idx:].values
        
        fig = pipeline.visualizer.plot_predictions(
            y_test,
            predictions,
            test_timestamps,
            "Test Set Predictions"
        )
        
        return metrics_text, fig, "✅ Training complete!"
        
    except Exception as e:
        return f"❌ Error: {str(e)}", None, "Training failed"


def predict_future_ui(n_hours):
    """Predict future interface"""
    
    if not training_complete:
        return "⚠️ Please train model first", None, None
    
    try:
        future_times, predictions = pipeline.predict_future(n_steps=int(n_hours))
        
        pred_df = pd.DataFrame({
            'Timestamp': [t.strftime('%Y-%m-%d %H:%M') for t in future_times],
            'Predicted Price (USDT)': [f"${p:,.2f}" for p in predictions]
        })
        
        fig = go.Figure()
        fig.add_trace(go.Scatter(
            x=future_times,
            y=predictions,
            mode='lines+markers',
            name='Predicted Price',
            line=dict(color='green', width=3),
            marker=dict(size=8)
        ))
        
        fig.update_layout(
            title=f'BTC/USDT Price Prediction - Next {n_hours} Hours',
            xaxis_title='Time',
            yaxis_title='Price (USDT)',
            template='plotly_dark',
            hovermode='x unified',
            height=500
        )
        
        return pred_df, fig, f"✅ Predicted next {n_hours} hours"
        
    except Exception as e:
        return None, None, f"❌ Error: {str(e)}"


def get_current_price_ui():
    """Get current price from OKX"""
    try:
        ticker = pipeline.okx_client.get_ticker('BTC-USDT')
        
        if ticker:
            info = f"🔴 LIVE BTC/USDT PRICE\n\n"
            info += f"Last Price: ${ticker['last']:,.2f}\n"
            info += f"Bid: ${ticker['bid']:,.2f}\n"
            info += f"Ask: ${ticker['ask']:,.2f}\n"
            info += f"24h Volume: {ticker['volume_24h']:,.2f} BTC\n"
            info += f"Updated: {ticker['timestamp'].strftime('%Y-%m-%d %H:%M:%S')}"
            
            return info
        else:
            return "❌ Failed to fetch current price"
            
    except Exception as e:
        return f"❌ Error: {str(e)}"


def show_feature_importance_ui():
    """Show feature importance"""
    
    if not training_complete:
        return None, "⚠️ Please train model first"
    
    try:
        model = pipeline.ensemble_model.models['random_forest']
        feature_names = pipeline.ensemble_model.feature_columns
        
        fig = pipeline.visualizer.plot_feature_importance(
            model,
            feature_names,
            top_n=30
        )
        
        importances = model.feature_importances_
        indices = np.argsort(importances)[-30:]
        
        importance_text = "=== TOP 30 FEATURES ===\n\n"
        for i, idx in enumerate(reversed(indices), 1):
            importance_text += f"{i}. {feature_names[idx]}: {importances[idx]:.6f}\n"
        
        return fig, importance_text
        
    except Exception as e:
        return None, f"❌ Error: {str(e)}"


def analyze_market_ui():
    """Market analysis interface"""
    
    if pipeline.processed_data is None:
        return None, "⚠️ Please load data first"
    
    try:
        df = pipeline.processed_data.tail(200)
        
        fig = make_subplots(
            rows=4, cols=1,
            shared_xaxes=True,
            vertical_spacing=0.05,
            subplot_titles=('Price & MA', 'RSI', 'MACD', 'Volume'),
            row_heights=[0.4, 0.2, 0.2, 0.2]
        )
        
        # Price and Moving Averages
        fig.add_trace(
            go.Scatter(x=df['timestamp'], y=df['close'], 
                      name='Close', line=dict(color='white', width=2)),
            row=1, col=1
        )
        fig.add_trace(
            go.Scatter(x=df['timestamp'], y=df['sma_20'], 
                      name='SMA 20', line=dict(color='orange', width=1)),
            row=1, col=1
        )
        fig.add_trace(
            go.Scatter(x=df['timestamp'], y=df['sma_50'], 
                      name='SMA 50', line=dict(color='blue', width=1)),
            row=1, col=1
        )
        
        # RSI
        fig.add_trace(
            go.Scatter(x=df['timestamp'], y=df['rsi_14'], 
                      name='RSI', line=dict(color='purple', width=2)),
            row=2, col=1
        )
        fig.add_hline(y=70, line_dash="dash", line_color="red", row=2, col=1)
        fig.add_hline(y=30, line_dash="dash", line_color="green", row=2, col=1)
        
        # MACD
        fig.add_trace(
            go.Scatter(x=df['timestamp'], y=df['macd'], 
                      name='MACD', line=dict(color='blue', width=1)),
            row=3, col=1
        )
        fig.add_trace(
            go.Scatter(x=df['timestamp'], y=df['macd_signal'], 
                      name='Signal', line=dict(color='red', width=1)),
            row=3, col=1
        )
        fig.add_trace(
            go.Bar(x=df['timestamp'], y=df['macd_diff'], 
                  name='Histogram', marker_color='gray'),
            row=3, col=1
        )
        
        # Volume
        colors = ['red' if df.iloc[i]['close'] < df.iloc[i]['open'] else 'green' 
                  for i in range(len(df))]
        fig.add_trace(
            go.Bar(x=df['timestamp'], y=df['volume'], 
                  name='Volume', marker_color=colors),
            row=4, col=1
        )
        
        fig.update_layout(
            title='Market Technical Analysis',
            template='plotly_dark',
            height=900,
            showlegend=True,
            hovermode='x unified'
        )
        
        # Market summary
        current_price = df['close'].iloc[-1]
        rsi = df['rsi_14'].iloc[-1]
        macd_signal = "Bullish" if df['macd_diff'].iloc[-1] > 0 else "Bearish"
        
        summary = f"=== MARKET ANALYSIS ===\n\n"
        summary += f"Current Price: ${current_price:,.2f}\n"
        summary += f"RSI (14): {rsi:.2f} - "
        
        if rsi > 70:
            summary += "Overbought ⚠️\n"
        elif rsi < 30:
            summary += "Oversold ⚠️\n"
        else:
            summary += "Neutral ✅\n"
        
        summary += f"MACD Signal: {macd_signal}\n"
        summary += f"SMA 20: ${df['sma_20'].iloc[-1]:,.2f}\n"
        summary += f"SMA 50: ${df['sma_50'].iloc[-1]:,.2f}\n"
        summary += f"24h Change: {((current_price / df['close'].iloc[-24] - 1) * 100):.2f}%\n"
        summary += f"Volatility (20): {df['volatility_20'].iloc[-1]:.6f}\n"
        
        return fig, summary
        
    except Exception as e:
        return None, f"❌ Error: {str(e)}"


# ================================
# GRADIO APP
# ================================

with gr.Blocks(title="OKX BTC/USDT Ensemble Predictor", theme=gr.themes.Soft()) as demo:
    
    gr.Markdown("""
    # 🚀 OKX BTC/USDT Ensemble Price Predictor
    
    **Advanced Machine Learning System for Bitcoin Price Prediction**
    
    This application uses an ensemble of 6 machine learning models:
    - Random Forest
    - Gradient Boosting
    - AdaBoost
    - Ridge Regression
    - Lasso Regression
    - Elastic Net
    
    **Features:**
    - Real-time data from OKX API
    - 100+ technical indicators
    - Weighted ensemble predictions
    - Advanced visualization
    """)
    
    # TAB 1: DATA FETCHING
    with gr.Tab("📊 Data Fetching"):
        gr.Markdown("### Fetch Historical BTC/USDT Data from OKX")
        
        with gr.Row():
            bar_size = gr.Dropdown(
                choices=['1m', '5m', '15m', '30m', '1H', '2H', '4H', '1D'],
                value='1H',
                label="Timeframe"
            )
            num_candles = gr.Slider(
                minimum=100,
                maximum=300,
                value=300,
                step=10,
                label="Number of Candles"
            )
        
        fetch_btn = gr.Button("🔄 Fetch Data", variant="primary", size="lg")
        
        with gr.Row():
            data_info = gr.Textbox(label="Data Info", lines=6)
        
        data_chart = gr.Plot(label="Price Chart")
        data_table = gr.Dataframe(label="Latest Data (Last 10 Candles)")
        
        fetch_btn.click(
            fn=fetch_data_ui,
            inputs=[bar_size, num_candles],
            outputs=[data_info, data_chart, data_table]
        )
    
    # TAB 2: MODEL TRAINING
    with gr.Tab("🤖 Model Training"):
        gr.Markdown("### Train Ensemble Model")
        
        with gr.Row():
            test_size_slider = gr.Slider(
                minimum=0.1,
                maximum=0.3,
                value=0.2,
                step=0.05,
                label="Test Set Size"
            )
            val_size_slider = gr.Slider(
                minimum=0.05,
                maximum=0.2,
                value=0.1,
                step=0.05,
                label="Validation Set Size"
            )
        
        train_btn = gr.Button("🚀 Train Model", variant="primary", size="lg")
        
        train_status = gr.Textbox(label="Training Status", lines=1)
        train_metrics = gr.Textbox(label="Model Performance Metrics", lines=20)
        train_plot = gr.Plot(label="Predictions vs Actual")
        
        train_btn.click(
            fn=train_model_ui,
            inputs=[test_size_slider, val_size_slider],
            outputs=[train_metrics, train_plot, train_status]
        )
    
    # TAB 3: PREDICTIONS
    with gr.Tab("🔮 Future Predictions"):
        gr.Markdown("### Predict Future BTC/USDT Prices")
        
        n_hours_slider = gr.Slider(
            minimum=1,
            maximum=72,
            value=24,
            step=1,
            label="Prediction Horizon (Hours)"
        )
        
        predict_btn = gr.Button("🔮 Predict Future", variant="primary", size="lg")
        
        predict_status = gr.Textbox(label="Prediction Status", lines=1)
        predict_table = gr.Dataframe(label="Predicted Prices")
        predict_plot = gr.Plot(label="Future Price Prediction")
        
        predict_btn.click(
            fn=predict_future_ui,
            inputs=[n_hours_slider],
            outputs=[predict_table, predict_plot, predict_status]
        )
    
    # TAB 4: LIVE PRICE
    with gr.Tab("💰 Live Price"):
        gr.Markdown("### Real-time BTC/USDT Price from OKX")
        
        refresh_btn = gr.Button("🔄 Refresh Price", variant="primary", size="lg")
        
        live_price_info = gr.Textbox(label="Current Market Data", lines=8)
        
        refresh_btn.click(
            fn=get_current_price_ui,
            inputs=[],
            outputs=[live_price_info]
        )
    
    # TAB 5: FEATURE IMPORTANCE
    with gr.Tab("📈 Feature Importance"):
        gr.Markdown("### Top Features Contributing to Predictions")
        
        feature_btn = gr.Button("📊 Show Feature Importance", variant="primary", size="lg")
        
        feature_plot = gr.Plot(label="Feature Importance Chart")
        feature_text = gr.Textbox(label="Top 30 Features", lines=35)
        
        feature_btn.click(
            fn=show_feature_importance_ui,
            inputs=[],
            outputs=[feature_plot, feature_text]
        )
    
    # TAB 6: MARKET ANALYSIS
    with gr.Tab("📉 Market Analysis"):
        gr.Markdown("### Technical Analysis Dashboard")
        
        analyze_btn = gr.Button("📊 Analyze Market", variant="primary", size="lg")
        
        analysis_plot = gr.Plot(label="Technical Indicators")
        analysis_summary = gr.Textbox(label="Market Summary", lines=12)
        
        analyze_btn.click(
            fn=analyze_market_ui,
            inputs=[],
            outputs=[analysis_plot, analysis_summary]
        )
    
    # TAB 7: ABOUT
    with gr.Tab("ℹ️ About"):
        gr.Markdown("""
        ## About This Application
        
        ### Ensemble Model Architecture
        
        This application uses a sophisticated ensemble learning approach combining:
        
        1. **Random Forest** - Handles non-linear relationships and feature interactions
        2. **Gradient Boosting** - Sequential learning for complex patterns
        3. **AdaBoost** - Adaptive boosting for improved accuracy
        4. **Ridge Regression** - Linear model with L2 regularization
        5. **Lasso Regression** - Linear model with L1 regularization and feature selection
        6. **Elastic Net** - Combines L1 and L2 regularization
        
        ### Feature Engineering (100+ Features)
        
        - **Price Features**: Returns, log returns, price ranges, candlestick patterns
        - **Moving Averages**: SMA and EMA (5, 10, 20, 50, 100 periods)
        - **Momentum Indicators**: MACD, RSI, ROC, Stochastic Oscillator
        - **Volatility Indicators**: ATR, Bollinger Bands, rolling volatility
        - **Volume Indicators**: OBV, volume ratios, volume-price trends
        - **Statistical Features**: Skewness, kurtosis, quantiles
        - **Lag Features**: Historical prices and volumes (1-5 periods)
        - **Time Features**: Hour, day, month with cyclical encoding
        
        ### Data Source
        
        Real-time and historical data fetched from **OKX Exchange** via REST API:
        - Endpoint: `https://www.okx.com/api/v5/market/candles`
        - Instrument: BTC-USDT
        - Supported timeframes: 1m, 5m, 15m, 30m, 1H, 2H, 4H, 1D
        
        ### Model Training Process
        
        1. **Data Collection**: Fetch historical OHLCV data from OKX
        2. **Feature Engineering**: Generate 100+ technical indicators
        3. **Data Preprocessing**: Handle missing values, normalize features
        4. **Train/Val/Test Split**: Time-series aware splitting
        5. **Model Training**: Train 6 models independently
        6. **Weight Optimization**: Calculate optimal ensemble weights based on validation performance
        7. **Evaluation**: Test on unseen data with multiple metrics
        
        ### Performance Metrics
        
        - **MSE** (Mean Squared Error): Average squared prediction error
        - **RMSE** (Root Mean Squared Error): Square root of MSE, in price units
        - **MAE** (Mean Absolute Error): Average absolute prediction error
        - **R²** (R-squared): Proportion of variance explained
        - **MAPE** (Mean Absolute Percentage Error): Average percentage error
        
        ### Usage Instructions
        
        1. **Fetch Data**: Go to "Data Fetching" tab and load historical data
        2. **Train Model**: Navigate to "Model Training" and train the ensemble
        3. **Make Predictions**: Use "Future Predictions" to forecast prices
        4. **Monitor Live**: Check "Live Price" for real-time market data
        5. **Analyze**: Explore "Feature Importance" and "Market Analysis"
        
        ### Limitations & Disclaimer
        
        ⚠️ **Important**: This tool is for educational and research purposes only.
        
        - Cryptocurrency markets are highly volatile and unpredictable
        - Past performance does not guarantee future results
        - Model predictions should NOT be used as sole basis for trading decisions
        - Always conduct your own research and consult financial advisors
        - The authors are not responsible for any financial losses
        
        ### Technical Stack
        
        - **Python 3.10+**
        - **Gradio**: Web interface
        - **Scikit-learn**: Machine learning models
        - **Pandas & NumPy**: Data manipulation
        - **Plotly**: Interactive visualizations
        - **Requests**: API communication
        
        ### Version
        
        **v1.0.0** - Initial Release
        
        ---
        
        Made with ❤️ for the crypto community
        
        **GitHub**: [Your Repository Link]
        **Documentation**: [Your Docs Link]
        **Contact**: [Your Contact Info]
        """)

# ================================
# LAUNCH APP
# ================================

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        show_error=True
    )