Spaces:

AshmithaIRRI
/

DeepMap_GUI

Runtime error

App Files Files Community

AshmithaIRRI commited on Jan 30, 2025

Commit

e8be176

verified ·

1 Parent(s): f3793d3

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -50

app.py CHANGED Viewed

@@ -1,8 +1,4 @@
-"""
-Created on Tue Jan 28 13:43:25 2025
-@author: Ashmitha
-"""
 #---------------------------------------------Libraries--------------------------
 import pandas as pd
@@ -27,21 +23,34 @@ from xgboost import XGBRegressor
 import io
 from sklearn.feature_selection import SelectFromModel
 import tempfile
 #------------------------------------------GRUModel-------------------------------------
-def GRUModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_rate=0.0001, l1_reg=0.001, l2_reg=0.001, dropout_rate=0.2):
-    # Reshape trainX and testX to be 3D: (samples, timesteps, features)
-    trainX = trainX.reshape((trainX.shape[0], 1, trainX.shape[1]))  # Adjusted for general feature count
     if testX is not None:
-        testX = testX.reshape((testX.shape[0], 1, testX.shape[1]))  # Reshape testX if it exists
-    # Define the GRU model
     model = Sequential()
     # GRU Layer
-    model.add(GRU(512, input_shape=(trainX.shape[1], trainX.shape[2]), return_sequences=False,
-                  kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
     # Dense Layers with Batch Normalization, Dropout, LeakyReLU
     model.add(Dense(256, kernel_initializer='he_normal', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
     model.add(BatchNormalization())
@@ -52,31 +61,31 @@ def GRUModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_
     model.add(BatchNormalization())
     model.add(Dropout(dropout_rate))
     model.add(LeakyReLU(alpha=0.1))
     model.add(Dense(64, kernel_initializer='he_normal', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
     model.add(BatchNormalization())
     model.add(Dropout(dropout_rate))
     model.add(LeakyReLU(alpha=0.1))
     model.add(Dense(32, kernel_initializer='he_normal', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
     model.add(BatchNormalization())
     model.add(Dropout(dropout_rate))
     model.add(LeakyReLU(alpha=0.1))
     # Output Layer with ReLU activation to prevent negative predictions
     model.add(Dense(1, activation="relu"))
     # Compile the model
     model.compile(loss='mse', optimizer=Adam(learning_rate=learning_rate), metrics=['mse'])
     # Callbacks for learning rate reduction and early stopping
     learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', patience=10, verbose=1, factor=0.5, min_lr=1e-6)
     early_stopping = EarlyStopping(monitor='val_loss', verbose=1, restore_best_weights=True, patience=10)
     # Train the model
-    history = model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, validation_split=0.1, verbose=1,
                         callbacks=[learning_rate_reduction, early_stopping])
     # Predict train and test
     predicted_train = model.predict(trainX)
     predicted_test = model.predict(testX) if testX is not None else None
@@ -85,9 +94,18 @@ def GRUModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_
     predicted_train = predicted_train.flatten()
     if predicted_test is not None:
         predicted_test = predicted_test.flatten()
     return predicted_train, predicted_test, history
 #--------------------------------------------------CNNModel-------------------------------------------
 def CNNModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_rate=0.0001, l1_reg=0.0001, l2_reg=0.0001, dropout_rate=0.3,feature_selection=True):
@@ -178,12 +196,12 @@ def XGBoostModel(trainX, trainy, testX, testy,learning_rate,min_child_weight,fea
     xgb_model=XGBRegressor(objective="reg:squarederror",random_state=42)
     history=xgb_model.fit(trainX, trainy)
-    param_grid={
-        "learning_rate":0.01,
-        "max_depth" : 10,
-         "n_estimators": 100,
-         "min_child_weight": 10
-        }
     # Predictions
@@ -200,7 +218,9 @@ def read_csv_file(uploaded_file):
         elif hasattr(uploaded_file, 'name'):  # For NamedString
             return pd.read_csv(uploaded_file.name)
     return None
-#------------------------------------------------------------Calculating TOPSIS score---------------------------
 def calculate_topsis_score(df):
     # Normalize the data
     norm_df = (df.iloc[:, 1:] - df.iloc[:, 1:].min()) / (df.iloc[:, 1:].max() - df.iloc[:, 1:].min())
@@ -331,24 +351,25 @@ def NestedKFoldCrossValidation(training_data, training_additive, testing_data, t
     # Calculate the average metrics for each model
     if 'phenotypes' in testing_data.columns:
         avg_results_df = results_df.groupby('Model').agg({
-            'Train_MSE': 'mean',
-            'Train_RMSE': 'mean',
             'Train_R2': 'mean',
             'Train_Corr': 'mean',
-            'Test_MSE': 'mean',
-            'Test_RMSE': 'mean',
             'Test_R2': 'mean',
             'Test_Corr': 'mean'
         }).reset_index()
     else:
         avg_results_df = results_df.groupby('Model').agg({
-            'Train_MSE': 'mean',
-            'Train_RMSE': 'mean',
             'Train_R2': 'mean',
             'Train_Corr': 'mean'
         }).reset_index()
     avg_results_df = calculate_topsis_score(avg_results_df)
     # Save the results with TOPSIS scores to the file
     avg_results_df.to_csv(output_file, index=False)
@@ -359,19 +380,33 @@ def NestedKFoldCrossValidation(training_data, training_additive, testing_data, t
         predicted_all_df.to_csv(predicted_phenotype_file, index=False)
     return avg_results_df, predicted_all_df if all_predicted_phenotypes else None
-#-------------------------------------------------------------------Gradio Interface----------------------------------
 def run_cross_validation(training_file, training_additive_file, testing_file, testing_additive_file,
-                         training_dominance_file, testing_dominance_file,feature_selection,learning_rate,min_child_weight):
     # Default parameters
     epochs = 1000
     batch_size = 64
     outer_n_splits = 2
-    #inner_n_splits = 2
-    min_child_weight=5
-    learning_rate=0.001
-    #learning_rate=learning_rate
-   # min_child_weight=min_child_weight
     # Load datasets
     training_data = pd.read_csv(training_file.name)
@@ -392,19 +427,22 @@ def run_cross_validation(training_file, training_additive_file, testing_file, te
         epochs=epochs,
         batch_size=batch_size,
         outer_n_splits=outer_n_splits,
-        #inner_n_splits=inner_n_splits,
         learning_rate=learning_rate,
         min_child_weight=min_child_weight,
         feature_selection=feature_selection
     )
     # Save outputs
-    results_file = "cross_validation_results.csv"
     predicted_file = "predicted_phenotype.csv"
-    results.to_csv(results_file, index=False)
-    predicted_phenotypes.to_csv(predicted_file, index=False)
-    return results_file, predicted_file
 # Gradio interface
 with gr.Blocks() as interface:
@@ -423,21 +461,21 @@ with gr.Blocks() as interface:
     with gr.Row():
         feature_selection = gr.Checkbox(label="Enable Feature Selection", value=True)
-    output1 = gr.File(label="Cross-Validation Results (CSV)")
     output2 = gr.File(label="Predicted Phenotypes (CSV)")
     submit_btn = gr.Button("Run DeepMap")
     submit_btn.click(
         run_cross_validation,
         inputs=[
             training_file, training_additive_file, testing_file,
-            testing_additive_file, training_dominance_file,testing_dominance_file,
             feature_selection
         ],
-        outputs=[output1, output2]
     )
 # Launch the interface
 interface.launch()

 #---------------------------------------------Libraries--------------------------
 import pandas as pd
 import io
 from sklearn.feature_selection import SelectFromModel
 import tempfile
+import matplotlib.pyplot as plt
+import seaborn as sns
 #------------------------------------------GRUModel-------------------------------------
+def GRUModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_rate=0.0001, l1_reg=0.001, l2_reg=0.001, dropout_rate=0.2, feature_selection=True):
+    # Apply feature selection using Random Forest Regressor
+    # Scale the input data using MinMaxScaler to normalize the feature range
+    scaler = MinMaxScaler()
+    trainX_scaled = scaler.fit_transform(trainX)
     if testX is not None:
+        testX_scaled = scaler.transform(testX)
+    # Scale the target variable using MinMaxScaler
+    target_scaler = MinMaxScaler()
+    trainy_scaled = target_scaler.fit_transform(trainy.reshape(-1, 1))  # Reshape to 2D for scaler
+    # Reshape trainX and testX to be 3D: (samples, timesteps, features)
+    trainX = trainX_scaled.reshape((trainX.shape[0], 1, trainX.shape[1]))  # Adjusted for general feature count
+    if testX is not None:
+        testX = testX_scaled.reshape((testX.shape[0], 1, testX.shape[1]))  # Reshape testX if it exists
     model = Sequential()
     # GRU Layer
+    model.add(GRU(512, input_shape=(trainX.shape[1], trainX.shape[2]), return_sequences=False, kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
     # Dense Layers with Batch Normalization, Dropout, LeakyReLU
     model.add(Dense(256, kernel_initializer='he_normal', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
     model.add(BatchNormalization())
     model.add(BatchNormalization())
     model.add(Dropout(dropout_rate))
     model.add(LeakyReLU(alpha=0.1))
     model.add(Dense(64, kernel_initializer='he_normal', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
     model.add(BatchNormalization())
     model.add(Dropout(dropout_rate))
     model.add(LeakyReLU(alpha=0.1))
     model.add(Dense(32, kernel_initializer='he_normal', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
     model.add(BatchNormalization())
     model.add(Dropout(dropout_rate))
     model.add(LeakyReLU(alpha=0.1))
     # Output Layer with ReLU activation to prevent negative predictions
     model.add(Dense(1, activation="relu"))
     # Compile the model
     model.compile(loss='mse', optimizer=Adam(learning_rate=learning_rate), metrics=['mse'])
     # Callbacks for learning rate reduction and early stopping
     learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', patience=10, verbose=1, factor=0.5, min_lr=1e-6)
     early_stopping = EarlyStopping(monitor='val_loss', verbose=1, restore_best_weights=True, patience=10)
     # Train the model
+    history = model.fit(trainX, trainy_scaled, epochs=epochs, batch_size=batch_size, validation_split=0.1, verbose=1,
                         callbacks=[learning_rate_reduction, early_stopping])
     # Predict train and test
     predicted_train = model.predict(trainX)
     predicted_test = model.predict(testX) if testX is not None else None
     predicted_train = predicted_train.flatten()
     if predicted_test is not None:
         predicted_test = predicted_test.flatten()
+    else:
+        predicted_test = np.zeros_like(predicted_train)
+    # Inverse scale the predictions to get them back to original range
+    predicted_train = target_scaler.inverse_transform(predicted_train.reshape(-1, 1)).flatten()
+    if predicted_test is not None:
+        predicted_test = target_scaler.inverse_transform(predicted_test.reshape(-1, 1)).flatten()
     return predicted_train, predicted_test, history
 #--------------------------------------------------CNNModel-------------------------------------------
 def CNNModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_rate=0.0001, l1_reg=0.0001, l2_reg=0.0001, dropout_rate=0.3,feature_selection=True):
     xgb_model=XGBRegressor(objective="reg:squarederror",random_state=42)
     history=xgb_model.fit(trainX, trainy)
+    #param_grid={
+        #"learning_rate":0.01,
+        #"max_depth" : 10,
+         #"n_estimators": 100,
+         #"min_child_weight": 10
+       # }
     # Predictions
         elif hasattr(uploaded_file, 'name'):  # For NamedString
             return pd.read_csv(uploaded_file.name)
     return None
+#_-------------------------------------------------------------NestedKFold Cross Validation---------------------
 def calculate_topsis_score(df):
     # Normalize the data
     norm_df = (df.iloc[:, 1:] - df.iloc[:, 1:].min()) / (df.iloc[:, 1:].max() - df.iloc[:, 1:].min())
     # Calculate the average metrics for each model
     if 'phenotypes' in testing_data.columns:
         avg_results_df = results_df.groupby('Model').agg({
+           # 'Train_MSE': 'mean',
+           # 'Train_RMSE': 'mean',
             'Train_R2': 'mean',
             'Train_Corr': 'mean',
+            #'Test_MSE': 'mean',
+            #'Test_RMSE': 'mean',
             'Test_R2': 'mean',
             'Test_Corr': 'mean'
         }).reset_index()
     else:
         avg_results_df = results_df.groupby('Model').agg({
+            #'Train_MSE': 'mean',
+           # 'Train_RMSE': 'mean',
             'Train_R2': 'mean',
             'Train_Corr': 'mean'
         }).reset_index()
     avg_results_df = calculate_topsis_score(avg_results_df)
+    print(avg_results_df)
     # Save the results with TOPSIS scores to the file
     avg_results_df.to_csv(output_file, index=False)
         predicted_all_df.to_csv(predicted_phenotype_file, index=False)
     return avg_results_df, predicted_all_df if all_predicted_phenotypes else None
+def visualize_topsis_scores(results_df):
+    """
+    Function to visualize the TOPSIS scores as a bar chart.
+    """
+    if 'TOPSIS_Score' not in results_df.columns:
+        print("TOPSIS scores are missing in the DataFrame!")
+        return None
+    plt.figure(figsize=(10, 6))
+    sns.barplot(x='Model', y='TOPSIS_Score', data=results_df, palette="viridis")
+    plt.xlabel("Models", fontsize=12)
+    plt.ylabel("TOPSIS Score", fontsize=12)
+    plt.title("Model Performance - TOPSIS Score", fontsize=14)
+    plt.xticks(rotation=45)
+    plt.tight_layout()
+    # Save the figure
+    plt.savefig("topsis_scores.png")
+    return "topsis_scores.png"
 def run_cross_validation(training_file, training_additive_file, testing_file, testing_additive_file,
+                         training_dominance_file, testing_dominance_file, feature_selection, learning_rate, min_child_weight):
     # Default parameters
     epochs = 1000
     batch_size = 64
     outer_n_splits = 2
     # Load datasets
     training_data = pd.read_csv(training_file.name)
         epochs=epochs,
         batch_size=batch_size,
         outer_n_splits=outer_n_splits,
         learning_rate=learning_rate,
         min_child_weight=min_child_weight,
         feature_selection=feature_selection
     )
     # Save outputs
+    #results_file = "cross_validation_results.csv"
     predicted_file = "predicted_phenotype.csv"
+    #results.to_csv(results_file, index=False)
+    if predicted_phenotypes is not None:
+        predicted_phenotypes.to_csv(predicted_file, index=False)
+    # Generate visualization of TOPSIS scores
+    topsis_plot = visualize_topsis_scores(results)
+    return  predicted_file, topsis_plot
 # Gradio interface
 with gr.Blocks() as interface:
     with gr.Row():
         feature_selection = gr.Checkbox(label="Enable Feature Selection", value=True)
+    #output1 = gr.File(label="Cross-Validation Results (CSV)")
     output2 = gr.File(label="Predicted Phenotypes (CSV)")
+    output3 = gr.Image(label="TOPSIS Score Visualization")
     submit_btn = gr.Button("Run DeepMap")
     submit_btn.click(
         run_cross_validation,
         inputs=[
             training_file, training_additive_file, testing_file,
+            testing_additive_file, training_dominance_file, testing_dominance_file,
             feature_selection
         ],
+        outputs=[output2, output3]
     )
 # Launch the interface
 interface.launch()