AshmithaIRRI commited on
Commit
e8be176
·
verified ·
1 Parent(s): f3793d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -50
app.py CHANGED
@@ -1,8 +1,4 @@
1
- """
2
- Created on Tue Jan 28 13:43:25 2025
3
 
4
- @author: Ashmitha
5
- """
6
 
7
  #---------------------------------------------Libraries--------------------------
8
  import pandas as pd
@@ -27,21 +23,34 @@ from xgboost import XGBRegressor
27
  import io
28
  from sklearn.feature_selection import SelectFromModel
29
  import tempfile
 
 
30
  #------------------------------------------GRUModel-------------------------------------
31
- def GRUModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_rate=0.0001, l1_reg=0.001, l2_reg=0.001, dropout_rate=0.2):
32
 
33
- # Reshape trainX and testX to be 3D: (samples, timesteps, features)
34
- trainX = trainX.reshape((trainX.shape[0], 1, trainX.shape[1])) # Adjusted for general feature count
 
 
 
 
35
  if testX is not None:
36
- testX = testX.reshape((testX.shape[0], 1, testX.shape[1])) # Reshape testX if it exists
 
 
 
 
37
 
38
- # Define the GRU model
 
 
 
 
39
  model = Sequential()
40
-
41
  # GRU Layer
42
- model.add(GRU(512, input_shape=(trainX.shape[1], trainX.shape[2]), return_sequences=False,
43
- kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
44
-
45
  # Dense Layers with Batch Normalization, Dropout, LeakyReLU
46
  model.add(Dense(256, kernel_initializer='he_normal', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
47
  model.add(BatchNormalization())
@@ -52,31 +61,31 @@ def GRUModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_
52
  model.add(BatchNormalization())
53
  model.add(Dropout(dropout_rate))
54
  model.add(LeakyReLU(alpha=0.1))
55
-
56
  model.add(Dense(64, kernel_initializer='he_normal', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
57
  model.add(BatchNormalization())
58
  model.add(Dropout(dropout_rate))
59
  model.add(LeakyReLU(alpha=0.1))
60
-
61
  model.add(Dense(32, kernel_initializer='he_normal', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
62
  model.add(BatchNormalization())
63
  model.add(Dropout(dropout_rate))
64
  model.add(LeakyReLU(alpha=0.1))
65
-
66
  # Output Layer with ReLU activation to prevent negative predictions
67
  model.add(Dense(1, activation="relu"))
68
-
69
  # Compile the model
70
  model.compile(loss='mse', optimizer=Adam(learning_rate=learning_rate), metrics=['mse'])
71
-
72
  # Callbacks for learning rate reduction and early stopping
73
  learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', patience=10, verbose=1, factor=0.5, min_lr=1e-6)
74
  early_stopping = EarlyStopping(monitor='val_loss', verbose=1, restore_best_weights=True, patience=10)
75
-
76
  # Train the model
77
- history = model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, validation_split=0.1, verbose=1,
78
  callbacks=[learning_rate_reduction, early_stopping])
79
-
80
  # Predict train and test
81
  predicted_train = model.predict(trainX)
82
  predicted_test = model.predict(testX) if testX is not None else None
@@ -85,9 +94,18 @@ def GRUModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_
85
  predicted_train = predicted_train.flatten()
86
  if predicted_test is not None:
87
  predicted_test = predicted_test.flatten()
 
 
 
 
 
 
 
88
 
89
  return predicted_train, predicted_test, history
90
 
 
 
91
  #--------------------------------------------------CNNModel-------------------------------------------
92
  def CNNModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_rate=0.0001, l1_reg=0.0001, l2_reg=0.0001, dropout_rate=0.3,feature_selection=True):
93
 
@@ -178,12 +196,12 @@ def XGBoostModel(trainX, trainy, testX, testy,learning_rate,min_child_weight,fea
178
 
179
  xgb_model=XGBRegressor(objective="reg:squarederror",random_state=42)
180
  history=xgb_model.fit(trainX, trainy)
181
- param_grid={
182
- "learning_rate":0.01,
183
- "max_depth" : 10,
184
- "n_estimators": 100,
185
- "min_child_weight": 10
186
- }
187
 
188
 
189
  # Predictions
@@ -200,7 +218,9 @@ def read_csv_file(uploaded_file):
200
  elif hasattr(uploaded_file, 'name'): # For NamedString
201
  return pd.read_csv(uploaded_file.name)
202
  return None
203
- #------------------------------------------------------------Calculating TOPSIS score---------------------------
 
 
204
  def calculate_topsis_score(df):
205
  # Normalize the data
206
  norm_df = (df.iloc[:, 1:] - df.iloc[:, 1:].min()) / (df.iloc[:, 1:].max() - df.iloc[:, 1:].min())
@@ -331,24 +351,25 @@ def NestedKFoldCrossValidation(training_data, training_additive, testing_data, t
331
  # Calculate the average metrics for each model
332
  if 'phenotypes' in testing_data.columns:
333
  avg_results_df = results_df.groupby('Model').agg({
334
- 'Train_MSE': 'mean',
335
- 'Train_RMSE': 'mean',
336
  'Train_R2': 'mean',
337
  'Train_Corr': 'mean',
338
- 'Test_MSE': 'mean',
339
- 'Test_RMSE': 'mean',
340
  'Test_R2': 'mean',
341
  'Test_Corr': 'mean'
342
  }).reset_index()
343
  else:
344
  avg_results_df = results_df.groupby('Model').agg({
345
- 'Train_MSE': 'mean',
346
- 'Train_RMSE': 'mean',
347
  'Train_R2': 'mean',
348
  'Train_Corr': 'mean'
349
  }).reset_index()
350
 
351
  avg_results_df = calculate_topsis_score(avg_results_df)
 
352
 
353
  # Save the results with TOPSIS scores to the file
354
  avg_results_df.to_csv(output_file, index=False)
@@ -359,19 +380,33 @@ def NestedKFoldCrossValidation(training_data, training_additive, testing_data, t
359
  predicted_all_df.to_csv(predicted_phenotype_file, index=False)
360
 
361
  return avg_results_df, predicted_all_df if all_predicted_phenotypes else None
362
- #-------------------------------------------------------------------Gradio Interface----------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
  def run_cross_validation(training_file, training_additive_file, testing_file, testing_additive_file,
364
- training_dominance_file, testing_dominance_file,feature_selection,learning_rate,min_child_weight):
365
 
366
  # Default parameters
367
  epochs = 1000
368
  batch_size = 64
369
  outer_n_splits = 2
370
- #inner_n_splits = 2
371
- min_child_weight=5
372
- learning_rate=0.001
373
- #learning_rate=learning_rate
374
- # min_child_weight=min_child_weight
375
 
376
  # Load datasets
377
  training_data = pd.read_csv(training_file.name)
@@ -392,19 +427,22 @@ def run_cross_validation(training_file, training_additive_file, testing_file, te
392
  epochs=epochs,
393
  batch_size=batch_size,
394
  outer_n_splits=outer_n_splits,
395
- #inner_n_splits=inner_n_splits,
396
  learning_rate=learning_rate,
397
  min_child_weight=min_child_weight,
398
  feature_selection=feature_selection
399
  )
400
 
401
  # Save outputs
402
- results_file = "cross_validation_results.csv"
403
  predicted_file = "predicted_phenotype.csv"
404
- results.to_csv(results_file, index=False)
405
- predicted_phenotypes.to_csv(predicted_file, index=False)
 
406
 
407
- return results_file, predicted_file
 
 
 
408
 
409
  # Gradio interface
410
  with gr.Blocks() as interface:
@@ -423,21 +461,21 @@ with gr.Blocks() as interface:
423
  with gr.Row():
424
  feature_selection = gr.Checkbox(label="Enable Feature Selection", value=True)
425
 
426
- output1 = gr.File(label="Cross-Validation Results (CSV)")
427
  output2 = gr.File(label="Predicted Phenotypes (CSV)")
 
428
 
429
  submit_btn = gr.Button("Run DeepMap")
430
  submit_btn.click(
431
  run_cross_validation,
432
  inputs=[
433
  training_file, training_additive_file, testing_file,
434
- testing_additive_file, training_dominance_file,testing_dominance_file,
435
  feature_selection
436
  ],
437
- outputs=[output1, output2]
438
  )
439
 
440
  # Launch the interface
441
  interface.launch()
442
 
443
-
 
 
 
1
 
 
 
2
 
3
  #---------------------------------------------Libraries--------------------------
4
  import pandas as pd
 
23
  import io
24
  from sklearn.feature_selection import SelectFromModel
25
  import tempfile
26
+ import matplotlib.pyplot as plt
27
+ import seaborn as sns
28
  #------------------------------------------GRUModel-------------------------------------
29
+ def GRUModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_rate=0.0001, l1_reg=0.001, l2_reg=0.001, dropout_rate=0.2, feature_selection=True):
30
 
31
+ # Apply feature selection using Random Forest Regressor
32
+
33
+
34
+ # Scale the input data using MinMaxScaler to normalize the feature range
35
+ scaler = MinMaxScaler()
36
+ trainX_scaled = scaler.fit_transform(trainX)
37
  if testX is not None:
38
+ testX_scaled = scaler.transform(testX)
39
+
40
+ # Scale the target variable using MinMaxScaler
41
+ target_scaler = MinMaxScaler()
42
+ trainy_scaled = target_scaler.fit_transform(trainy.reshape(-1, 1)) # Reshape to 2D for scaler
43
 
44
+ # Reshape trainX and testX to be 3D: (samples, timesteps, features)
45
+ trainX = trainX_scaled.reshape((trainX.shape[0], 1, trainX.shape[1])) # Adjusted for general feature count
46
+ if testX is not None:
47
+ testX = testX_scaled.reshape((testX.shape[0], 1, testX.shape[1])) # Reshape testX if it exists
48
+
49
  model = Sequential()
50
+
51
  # GRU Layer
52
+ model.add(GRU(512, input_shape=(trainX.shape[1], trainX.shape[2]), return_sequences=False, kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
53
+
 
54
  # Dense Layers with Batch Normalization, Dropout, LeakyReLU
55
  model.add(Dense(256, kernel_initializer='he_normal', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
56
  model.add(BatchNormalization())
 
61
  model.add(BatchNormalization())
62
  model.add(Dropout(dropout_rate))
63
  model.add(LeakyReLU(alpha=0.1))
64
+
65
  model.add(Dense(64, kernel_initializer='he_normal', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
66
  model.add(BatchNormalization())
67
  model.add(Dropout(dropout_rate))
68
  model.add(LeakyReLU(alpha=0.1))
69
+
70
  model.add(Dense(32, kernel_initializer='he_normal', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
71
  model.add(BatchNormalization())
72
  model.add(Dropout(dropout_rate))
73
  model.add(LeakyReLU(alpha=0.1))
74
+
75
  # Output Layer with ReLU activation to prevent negative predictions
76
  model.add(Dense(1, activation="relu"))
77
+
78
  # Compile the model
79
  model.compile(loss='mse', optimizer=Adam(learning_rate=learning_rate), metrics=['mse'])
80
+
81
  # Callbacks for learning rate reduction and early stopping
82
  learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', patience=10, verbose=1, factor=0.5, min_lr=1e-6)
83
  early_stopping = EarlyStopping(monitor='val_loss', verbose=1, restore_best_weights=True, patience=10)
84
+
85
  # Train the model
86
+ history = model.fit(trainX, trainy_scaled, epochs=epochs, batch_size=batch_size, validation_split=0.1, verbose=1,
87
  callbacks=[learning_rate_reduction, early_stopping])
88
+
89
  # Predict train and test
90
  predicted_train = model.predict(trainX)
91
  predicted_test = model.predict(testX) if testX is not None else None
 
94
  predicted_train = predicted_train.flatten()
95
  if predicted_test is not None:
96
  predicted_test = predicted_test.flatten()
97
+ else:
98
+ predicted_test = np.zeros_like(predicted_train)
99
+
100
+ # Inverse scale the predictions to get them back to original range
101
+ predicted_train = target_scaler.inverse_transform(predicted_train.reshape(-1, 1)).flatten()
102
+ if predicted_test is not None:
103
+ predicted_test = target_scaler.inverse_transform(predicted_test.reshape(-1, 1)).flatten()
104
 
105
  return predicted_train, predicted_test, history
106
 
107
+
108
+
109
  #--------------------------------------------------CNNModel-------------------------------------------
110
  def CNNModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_rate=0.0001, l1_reg=0.0001, l2_reg=0.0001, dropout_rate=0.3,feature_selection=True):
111
 
 
196
 
197
  xgb_model=XGBRegressor(objective="reg:squarederror",random_state=42)
198
  history=xgb_model.fit(trainX, trainy)
199
+ #param_grid={
200
+ #"learning_rate":0.01,
201
+ #"max_depth" : 10,
202
+ #"n_estimators": 100,
203
+ #"min_child_weight": 10
204
+ # }
205
 
206
 
207
  # Predictions
 
218
  elif hasattr(uploaded_file, 'name'): # For NamedString
219
  return pd.read_csv(uploaded_file.name)
220
  return None
221
+
222
+
223
+ #_-------------------------------------------------------------NestedKFold Cross Validation---------------------
224
  def calculate_topsis_score(df):
225
  # Normalize the data
226
  norm_df = (df.iloc[:, 1:] - df.iloc[:, 1:].min()) / (df.iloc[:, 1:].max() - df.iloc[:, 1:].min())
 
351
  # Calculate the average metrics for each model
352
  if 'phenotypes' in testing_data.columns:
353
  avg_results_df = results_df.groupby('Model').agg({
354
+ # 'Train_MSE': 'mean',
355
+ # 'Train_RMSE': 'mean',
356
  'Train_R2': 'mean',
357
  'Train_Corr': 'mean',
358
+ #'Test_MSE': 'mean',
359
+ #'Test_RMSE': 'mean',
360
  'Test_R2': 'mean',
361
  'Test_Corr': 'mean'
362
  }).reset_index()
363
  else:
364
  avg_results_df = results_df.groupby('Model').agg({
365
+ #'Train_MSE': 'mean',
366
+ # 'Train_RMSE': 'mean',
367
  'Train_R2': 'mean',
368
  'Train_Corr': 'mean'
369
  }).reset_index()
370
 
371
  avg_results_df = calculate_topsis_score(avg_results_df)
372
+ print(avg_results_df)
373
 
374
  # Save the results with TOPSIS scores to the file
375
  avg_results_df.to_csv(output_file, index=False)
 
380
  predicted_all_df.to_csv(predicted_phenotype_file, index=False)
381
 
382
  return avg_results_df, predicted_all_df if all_predicted_phenotypes else None
383
+ def visualize_topsis_scores(results_df):
384
+ """
385
+ Function to visualize the TOPSIS scores as a bar chart.
386
+ """
387
+ if 'TOPSIS_Score' not in results_df.columns:
388
+ print("TOPSIS scores are missing in the DataFrame!")
389
+ return None
390
+
391
+ plt.figure(figsize=(10, 6))
392
+ sns.barplot(x='Model', y='TOPSIS_Score', data=results_df, palette="viridis")
393
+ plt.xlabel("Models", fontsize=12)
394
+ plt.ylabel("TOPSIS Score", fontsize=12)
395
+ plt.title("Model Performance - TOPSIS Score", fontsize=14)
396
+ plt.xticks(rotation=45)
397
+ plt.tight_layout()
398
+
399
+ # Save the figure
400
+ plt.savefig("topsis_scores.png")
401
+ return "topsis_scores.png"
402
+
403
  def run_cross_validation(training_file, training_additive_file, testing_file, testing_additive_file,
404
+ training_dominance_file, testing_dominance_file, feature_selection, learning_rate, min_child_weight):
405
 
406
  # Default parameters
407
  epochs = 1000
408
  batch_size = 64
409
  outer_n_splits = 2
 
 
 
 
 
410
 
411
  # Load datasets
412
  training_data = pd.read_csv(training_file.name)
 
427
  epochs=epochs,
428
  batch_size=batch_size,
429
  outer_n_splits=outer_n_splits,
 
430
  learning_rate=learning_rate,
431
  min_child_weight=min_child_weight,
432
  feature_selection=feature_selection
433
  )
434
 
435
  # Save outputs
436
+ #results_file = "cross_validation_results.csv"
437
  predicted_file = "predicted_phenotype.csv"
438
+ #results.to_csv(results_file, index=False)
439
+ if predicted_phenotypes is not None:
440
+ predicted_phenotypes.to_csv(predicted_file, index=False)
441
 
442
+ # Generate visualization of TOPSIS scores
443
+ topsis_plot = visualize_topsis_scores(results)
444
+
445
+ return predicted_file, topsis_plot
446
 
447
  # Gradio interface
448
  with gr.Blocks() as interface:
 
461
  with gr.Row():
462
  feature_selection = gr.Checkbox(label="Enable Feature Selection", value=True)
463
 
464
+ #output1 = gr.File(label="Cross-Validation Results (CSV)")
465
  output2 = gr.File(label="Predicted Phenotypes (CSV)")
466
+ output3 = gr.Image(label="TOPSIS Score Visualization")
467
 
468
  submit_btn = gr.Button("Run DeepMap")
469
  submit_btn.click(
470
  run_cross_validation,
471
  inputs=[
472
  training_file, training_additive_file, testing_file,
473
+ testing_additive_file, training_dominance_file, testing_dominance_file,
474
  feature_selection
475
  ],
476
+ outputs=[output2, output3]
477
  )
478
 
479
  # Launch the interface
480
  interface.launch()
481