Spaces:

AshmithaIRRI
/

DeepMap_GUI

Runtime error

App Files Files Community

AshmithaIRRI commited on Feb 5, 2025

Commit

30aae02

verified ·

1 Parent(s): 5214b25

Update app.py

Browse files

Files changed (1) hide show

app.py +162 -67

app.py CHANGED Viewed

@@ -1,9 +1,10 @@
 # -*- coding: utf-8 -*-
 """
-Created on Fri Jan 31 14:12:26 2025
 @author: Ashmitha
 """
 #-------------------------------------Libraries-------------------------
 import pandas as pd
 import numpy as np
@@ -29,7 +30,22 @@ from sklearn.feature_selection import SelectFromModel
 import tempfile
 import matplotlib.pyplot as plt
 import seaborn as sns
-#--------------------------------------------------------------------FNNModel----------------------------------------------------
 def FNNModel(trainX, trainy, testX=None, testy=None, epochs=1000, batch_size=64, learning_rate=0.0001,
              l1_reg=0.001, l2_reg=0.001, dropout_rate=0.2):
@@ -102,7 +118,6 @@ def FNNModel(trainX, trainy, testX=None, testy=None, epochs=1000, batch_size=64,
 #--------------------------------------------------CNNModel-------------------------------------------
 def CNNModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_rate=0.0001, l1_reg=0.0001, l2_reg=0.0001, dropout_rate=0.3,feature_selection=True):
@@ -157,6 +172,64 @@ def CNNModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_
     predicted_test = model.predict(testX).flatten() if testX is not None else None
     return predicted_train, predicted_test, history
 #------------------------------------------RFModel---------------------------------------------------
 def RFModel(trainX, trainy, testX, testy, n_estimators=100, max_depth=None,feature_selection=True):
@@ -179,34 +252,41 @@ def RFModel(trainX, trainy, testX, testy, n_estimators=100, max_depth=None,featu
     predicted_test = rf_model.predict(testX_scaled) if testX is not None else None
     return predicted_train, predicted_test,history
-#-------------------------------------------------XGBoost--------------------------------------------
-def XGBoostModel(trainX, trainy, testX, testy,learning_rate,min_child_weight,feature_selection=True, n_estimators=100, max_depth=None):
-    # Scale the features
-    scaler = MinMaxScaler()
-    trainX_scaled = scaler.fit_transform(trainX)
-    if testX is not None:
-        testX_scaled = scaler.transform(testX)
-    xgb_model=XGBRegressor(objective="reg:squarederror",random_state=42)
-    history=xgb_model.fit(trainX, trainy)
-    #param_grid={
-        #"learning_rate":0.01,
-        #"max_depth" : 10,
-         #"n_estimators": 100,
-         #"min_child_weight": 10
-       # }
-    # Predictions
-    predicted_train = xgb_model.predict(trainX_scaled)
-    predicted_test = xgb_model.predict(testX_scaled) if testX is not None else None
-    return predicted_train, predicted_test,history
 #------------------------------------------------------------------File--------------------------------------------
 def read_csv_file(uploaded_file):
     if uploaded_file is not None:
@@ -237,12 +317,15 @@ def calculate_topsis_score(df):
     df['TOPSIS_Score'] = topsis_score
     return df
-#_-------------------------------------------------------------NestedKFold Cross Validation---------------------
 def NestedKFoldCrossValidation(training_data, training_additive, testing_data, testing_additive,
                                 training_dominance, testing_dominance, epochs, learning_rate, min_child_weight, batch_size=64,
-                                outer_n_splits=2, output_file='cross_validation_results.csv',
                                 predicted_phenotype_file='predicted_phenotype.csv', feature_selection=True):
     # Define calculate_topsis_score before using it
@@ -255,68 +338,71 @@ def NestedKFoldCrossValidation(training_data, training_additive, testing_data, t
     testing_additive = testing_additive.iloc[:, 1:]
     training_dominance = training_dominance.iloc[:, 1:]
     testing_dominance = testing_dominance.iloc[:, 1:]
-    # Merge training and testing data with additive and dominance components
-    training_data_merged = pd.concat([training_data, training_additive, training_dominance], axis=1)
-    testing_data_merged = pd.concat([testing_data, testing_additive, testing_dominance], axis=1)
-    phenotypic_info = training_data['phenotypes'].values
-    phenotypic_test_info = testing_data['phenotypes'].values if 'phenotypes' in testing_data.columns else None
-    sample_ids = testing_data.iloc[:, 0].values
-    training_genotypic_data_merged = training_data_merged.iloc[:, 2:].values
-    testing_genotypic_data_merged = testing_data_merged.iloc[:, 1:].values
-    outer_kf = KFold(n_splits=outer_n_splits)
-    results = []
-    all_predicted_phenotypes = []
-    def calculate_metrics(true_values, predicted_values):
-        mse = mean_squared_error(true_values, predicted_values)
-        rmse = np.sqrt(mse)
-        r2 = r2_score(true_values, predicted_values)
-        corr = pearsonr(true_values, predicted_values)[0]
-        return mse, rmse, r2, corr
-    models = [
-        ('FNNModel', FNNModel),
         ('CNNModel', CNNModel),
-        ('RFModel', RFModel),
-        ('XGBoostModel', XGBoostModel)
     ]
     for outer_fold, (outer_train_index, outer_test_index) in enumerate(outer_kf.split(phenotypic_info), 1):
         outer_trainX = training_genotypic_data_merged[outer_train_index]
         outer_trainy = phenotypic_info[outer_train_index]
-        # Feature selection (inside the outer loop to prevent data leakage)
         if feature_selection:
             rf = RandomForestRegressor(n_estimators=100, random_state=42)
-            rf.fit(outer_trainX, outer_trainy)  # Fit only on outer_trainX
             selector = SelectFromModel(rf, threshold="mean", prefit=True)
             outer_trainX = selector.transform(outer_trainX)
             testing_genotypic_data_merged_fold = selector.transform(testing_genotypic_data_merged)  # Transform testing data
         else:
             testing_genotypic_data_merged_fold = testing_genotypic_data_merged
-        # Standardization (inside the outer loop to prevent data leakage)
         scaler = StandardScaler()
         outer_trainX = scaler.fit_transform(outer_trainX)  # Fit and transform on outer_trainX
         testing_genotypic_data_merged_fold = scaler.transform(testing_genotypic_data_merged_fold)  # Transform testing data
         outer_testX = testing_genotypic_data_merged_fold
         outer_testy = phenotypic_test_info
         for model_name, model_func in models:
             print(f"Running model: {model_name} for fold {outer_fold}")
             if model_name in ['FNNModel', 'CNNModel']:
                 predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy, epochs=epochs, batch_size=batch_size)
             elif model_name in ['RFModel']:
                 predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy)
             else:
-                predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy, learning_rate, min_child_weight)
             # Calculate metrics
             mse_train, rmse_train, r2_train, corr_train = calculate_metrics(outer_trainy, predicted_train)
             mse_test, rmse_test, r2_test, corr_test = calculate_metrics(outer_testy, predicted_test) if outer_testy is not None else (None, None, None, None)
@@ -396,9 +482,8 @@ def visualize_topsis_scores(results_df):
     # Save the figure
     plt.savefig("topsis_scores.png")
     return "topsis_scores.png"
 def run_cross_validation(training_file, training_additive_file, testing_file, testing_additive_file,
-                         training_dominance_file, testing_dominance_file, feature_selection, learning_rate, min_child_weight):
     # Default parameters
     epochs = 1000
@@ -426,7 +511,12 @@ def run_cross_validation(training_file, training_additive_file, testing_file, te
         outer_n_splits=outer_n_splits,
         learning_rate=learning_rate,
         min_child_weight=min_child_weight,
-        feature_selection=feature_selection
     )
     # Save outputs
@@ -474,4 +564,9 @@ with gr.Blocks() as interface:
     )
 # Launch the interface
-interface.launch()

 # -*- coding: utf-8 -*-
 """
+Created on Tue Feb  4 14:44:33 2025
 @author: Ashmitha
 """
 #-------------------------------------Libraries-------------------------
 import pandas as pd
 import numpy as np
 import tempfile
 import matplotlib.pyplot as plt
 import seaborn as sns
+#import lightgbm as lgb
+import lightgbm as lgb
+import numpy as np
+from sklearn.model_selection import KFold
+from sklearn.preprocessing import StandardScaler
+from lightgbm import LGBMRegressor
+from sklearn.svm import SVR
+from sklearn.preprocessing import StandardScaler
+from sklearn.pipeline import Pipeline
+from lightgbm import LGBMRegressor
+from sklearn.preprocessing import StandardScaler
+from sklearn.preprocessing import StandardScaler
+from sklearn.pipeline import Pipeline
+from sklearn.svm import SVR as SVR_Model
+#--------------------------------------------------FNNModel-----------------------------------
 def FNNModel(trainX, trainy, testX=None, testy=None, epochs=1000, batch_size=64, learning_rate=0.0001,
              l1_reg=0.001, l2_reg=0.001, dropout_rate=0.2):
 #--------------------------------------------------CNNModel-------------------------------------------
 def CNNModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_rate=0.0001, l1_reg=0.0001, l2_reg=0.0001, dropout_rate=0.3,feature_selection=True):
     predicted_test = model.predict(testX).flatten() if testX is not None else None
     return predicted_train, predicted_test, history
+#-------------------------------------------LGBoost-----------------------------------------------
+#def LGBoostModel(trainX, trainy, testX, testy, learning_rate=0.05, num_leaves=31, max_depth=-1, min_child_samples=20, n_estimators=500):
+    #scaler = StandardScaler()
+    #trainX_scaled = scaler.fit_transform(trainX)
+    #testX_scaled = scaler.transform(testX)
+    # Create and train the model
+   # lgbm_model = LGBMRegressor(
+      #  n_estimators=n_estimators,
+       # learning_rate=learning_rate,
+      #  num_leaves=num_leaves,  # More leaves for complex data
+       # max_depth=max_depth,  # No limit (-1) allows deeper trees
+      #  min_child_samples=min_child_samples,  # Minimum data needed to split
+      #  reg_alpha=0.1,  # L1 regularization
+      #  reg_lambda=0.1,  # L2 regularization
+   # )
+   # history = lgbm_model.fit(trainX_scaled, trainy)
+    # Predicting the values
+  #  predicted_train = lgbm_model.predict(trainX_scaled)
+   # predicted_test = lgbm_model.predict(testX_scaled)
+   # return predicted_train, predicted_test, history
+def LGBoostModel(trainX, trainy, testX, testy, learning_rate=0.05, num_leaves=15, max_depth=5, min_child_samples=10, n_estimators=1000):
+    """
+    Train a LightGBM model with the given data and parameters.
+    """
+    print(f"Training LightGBM Model with n_estimators={n_estimators}, learning_rate={learning_rate}, num_leaves={num_leaves}, max_depth={max_depth}")
+    # Standardizing the data
+    scaler = StandardScaler()
+    trainX_scaled = scaler.fit_transform(trainX)
+    testX_scaled = scaler.transform(testX)
+    # Create and train the model
+    lgbm_model = LGBMRegressor(
+        n_estimators=n_estimators,
+        learning_rate=learning_rate,
+        num_leaves=num_leaves,
+        max_depth=max_depth,
+        min_child_samples=min_child_samples,
+        reg_alpha=0.01,  # Reduced L1 regularization
+        reg_lambda=0.01,
+        verbose=-1# Reduced L2 regularization
+    )
+    lgbm_model.fit(trainX_scaled, trainy)
+    # Predicting the values
+    predicted_train = lgbm_model.predict(trainX_scaled)
+    predicted_test = lgbm_model.predict(testX_scaled)
+    return predicted_train, predicted_test, lgbm_model
 #------------------------------------------RFModel---------------------------------------------------
 def RFModel(trainX, trainy, testX, testy, n_estimators=100, max_depth=None,feature_selection=True):
     predicted_test = rf_model.predict(testX_scaled) if testX is not None else None
     return predicted_train, predicted_test,history
+#--------------------------------------SVR-------------------------------------
+ # Avoid function name conflict
+def SVR(trainX, trainy, testX, testy, kernel='rbf', C=1.0, epsilon=0.1, gamma='scale'):
+    """
+    Train a Support Vector Regression (SVR) model with the given data and parameters.
+    Parameters:
+        trainX, trainy: Training data (features & target)
+        testX, testy: Testing data (features & target)
+        kernel: 'linear', 'poly', 'rbf' (default is 'rbf')
+        C: Regularization parameter
+        epsilon: Defines a margin of tolerance where predictions don't get penalized
+        gamma: Kernel coefficient (used for 'rbf' and 'poly')
+    """
+    print(f"Training SVR Model with kernel={kernel}, C={C}, epsilon={epsilon}, gamma={gamma}")
+    # Create a pipeline with scaling and SVR
+    svr_model = Pipeline([
+        ('scaler', StandardScaler()),
+        ('svr', SVR_Model(kernel=kernel, C=C, epsilon=epsilon, gamma=gamma))
+    ])
+    # Train the model
+    svr_model.fit(trainX, trainy)
+    # Predict values
+    predicted_train = svr_model.predict(trainX)
+    predicted_test = svr_model.predict(testX)
+    return predicted_train, predicted_test, svr_model
 #------------------------------------------------------------------File--------------------------------------------
 def read_csv_file(uploaded_file):
     if uploaded_file is not None:
     df['TOPSIS_Score'] = topsis_score
     return df
+#----------------------------------------------------------NestedKFoldCrossValidation------------
 def NestedKFoldCrossValidation(training_data, training_additive, testing_data, testing_additive,
                                 training_dominance, testing_dominance, epochs, learning_rate, min_child_weight, batch_size=64,
+                                outer_n_splits=2, kernel='rbf', C=1.0, epsilon=0.1, gamma='scale', output_file='cross_validation_results.csv',
                                 predicted_phenotype_file='predicted_phenotype.csv', feature_selection=True):
     # Define calculate_topsis_score before using it
     testing_additive = testing_additive.iloc[:, 1:]
     training_dominance = training_dominance.iloc[:, 1:]
     testing_dominance = testing_dominance.iloc[:, 1:]
+    A_square_training=training_additive**2
+    D_square_training=training_dominance**2
+    A_square_testing=testing_additive**2
+    D_square_testing=testing_dominance**2
+    additive_dominance_training=training_additive*training_dominance
+    additive_dominance_testing=testing_additive*testing_dominance
+    training_data_merged=np.concatenate([training_additive,training_dominance,A_square_training,D_square_training,additive_dominance_training], axis=1)
+    testing_data_merged=np.concatenate([testing_additive,testing_dominance,A_square_testing,D_square_testing,additive_dominance_testing], axis=1)
+    phenotypic_info=training_data['phenotypes'].values
+    phenotypic_test_info=testing_data['phenotypes'].values if 'phenotypes' in testing_data.columns else None
+    sample_ids=testing_data.iloc[:,0].values
+    training_data_merged=pd.DataFrame(training_data_merged)
+    testing_data_merged=pd.DataFrame(testing_data_merged)
+    training_genotypic_data_merged=training_data_merged.iloc[:,1:].values
+    testing_genotypic_data_merged=testing_data_merged.iloc[:,1:].values
+    print(training_genotypic_data_merged)
+    print(testing_genotypic_data_merged)
+    outer_kf=KFold(n_splits=outer_n_splits)
+    results=[]
+    all_predicted_phenotypes=[]
+    def calculate_metrics(true_values,predicted_values):
+        mse=mean_squared_error(true_values,predicted_values)
+        rmse=np.sqrt(mse)
+        r2=r2_score(true_values,predicted_values)
+        corr=pearsonr(true_values,predicted_values)[0]
+        return mse,rmse,corr,r2
+    models=[
+        ('FNNModel',FNNModel),
         ('CNNModel', CNNModel),
+        ('RFModel',RFModel),
+        ('LGBoostModel',LGBoostModel),
+        ('SVR',SVR)
     ]
     for outer_fold, (outer_train_index, outer_test_index) in enumerate(outer_kf.split(phenotypic_info), 1):
         outer_trainX = training_genotypic_data_merged[outer_train_index]
         outer_trainy = phenotypic_info[outer_train_index]
         if feature_selection:
             rf = RandomForestRegressor(n_estimators=100, random_state=42)
+            rf.fit(outer_trainX, outer_trainy)
             selector = SelectFromModel(rf, threshold="mean", prefit=True)
             outer_trainX = selector.transform(outer_trainX)
             testing_genotypic_data_merged_fold = selector.transform(testing_genotypic_data_merged)  # Transform testing data
         else:
             testing_genotypic_data_merged_fold = testing_genotypic_data_merged
         scaler = StandardScaler()
         outer_trainX = scaler.fit_transform(outer_trainX)  # Fit and transform on outer_trainX
         testing_genotypic_data_merged_fold = scaler.transform(testing_genotypic_data_merged_fold)  # Transform testing data
         outer_testX = testing_genotypic_data_merged_fold
         outer_testy = phenotypic_test_info
         for model_name, model_func in models:
             print(f"Running model: {model_name} for fold {outer_fold}")
             if model_name in ['FNNModel', 'CNNModel']:
                 predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy, epochs=epochs, batch_size=batch_size)
             elif model_name in ['RFModel']:
                 predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy)
+            elif model_name in ['LGBoostModel']:
+                predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy,learning_rate=0.05, num_leaves=31, max_depth=-1, min_child_samples=20, n_estimators=500)
             else:
+                predicted_train, predicted_test, svr_model=model_func(outer_trainX,outer_trainy,outer_testX,outer_testy,kernel='rbf', C=1.0, epsilon=0.1, gamma='scale')
             # Calculate metrics
             mse_train, rmse_train, r2_train, corr_train = calculate_metrics(outer_trainy, predicted_train)
             mse_test, rmse_test, r2_test, corr_test = calculate_metrics(outer_testy, predicted_test) if outer_testy is not None else (None, None, None, None)
     # Save the figure
     plt.savefig("topsis_scores.png")
     return "topsis_scores.png"
 def run_cross_validation(training_file, training_additive_file, testing_file, testing_additive_file,
+                         training_dominance_file, testing_dominance_file, feature_selection, learning_rate, min_child_weight,kernel,C,epsilon,gamma):
     # Default parameters
     epochs = 1000
         outer_n_splits=outer_n_splits,
         learning_rate=learning_rate,
         min_child_weight=min_child_weight,
+        feature_selection=feature_selection,
+        kernel='rbf',
+        C=1.0,
+        epsilon=0.1,
+        gamma='scale'
     )
     # Save outputs
     )
 # Launch the interface
+interface.launch()