AshmithaIRRI commited on
Commit
122d978
·
verified ·
1 Parent(s): bc01275

Delete app_recovery.py

Browse files
Files changed (1) hide show
  1. app_recovery.py +0 -594
app_recovery.py DELETED
@@ -1,594 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- Created on Sun Nov 24 12:47:37 2024
4
-
5
- @author: Ashmitha
6
- """
7
-
8
- # -*- coding: utf-8 -*-
9
- """
10
- Created on Sun Nov 24 12:25:57 2024
11
-
12
- @author: Ashmitha
13
- """
14
-
15
- # -*- coding: utf-8 -*-
16
- """
17
- Created on Sat Nov 9 15:44:40 2024
18
-
19
- @author: Ashmitha
20
- """
21
-
22
- import pandas as pd
23
- import numpy as np
24
- import gradio as gr
25
- from sklearn.metrics import mean_squared_error,r2_score
26
- from scipy.stats import pearsonr
27
- from sklearn.preprocessing import StandardScaler
28
- from sklearn.model_selection import KFold
29
- import tensorflow as tf
30
- from tensorflow.keras.models import Sequential
31
- from tensorflow.keras.layers import GRU,Dense,Dropout,BatchNormalization,LeakyReLU
32
- from tensorflow.keras.optimizers import Adam
33
- from tensorflow.keras import regularizers
34
- from tensorflow.keras.callbacks import ReduceLROnPlateau,EarlyStopping
35
- import os
36
- from sklearn.preprocessing import MinMaxScaler
37
- from keras.layers import Conv1D,MaxPooling1D,Dense,Flatten,Dropout,LeakyReLU
38
- from keras.callbacks import ReduceLROnPlateau,EarlyStopping
39
- from sklearn.ensemble import RandomForestRegressor
40
- from xgboost import XGBRegressor
41
- import io
42
- from sklearn.feature_selection import SelectFromModel
43
- import tempfile
44
-
45
- #-------------------------------------Feature selection---------------------------------------------------------------------------------------------
46
-
47
- def RandomForestFeatureSelection(trainX, trainy, num_features=60):
48
- rf = RandomForestRegressor(n_estimators=1000, random_state=50)
49
- rf.fit(trainX, trainy)
50
-
51
- # Get feature importances
52
- importances = rf.feature_importances_
53
-
54
- # Select the top N important features
55
- indices = np.argsort(importances)[-num_features:]
56
- return indices
57
- #----------------------------------------------------------GRU Model---------------------------------------------------------------------
58
- import numpy as np
59
- from tensorflow.keras.models import Sequential
60
- from tensorflow.keras.layers import GRU, Dense, BatchNormalization, Dropout, LeakyReLU
61
- from tensorflow.keras.optimizers import Adam
62
- from tensorflow.keras import regularizers
63
- from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
64
- from sklearn.preprocessing import MinMaxScaler
65
- from sklearn.ensemble import RandomForestRegressor
66
- from sklearn.feature_selection import SelectFromModel
67
-
68
- def GRUModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_rate=0.0001, l1_reg=0.001, l2_reg=0.001, dropout_rate=0.2, feature_selection=True):
69
-
70
- # Apply feature selection using Random Forest Regressor
71
- if feature_selection:
72
- # Use RandomForestRegressor to rank features by importance
73
- rf = RandomForestRegressor(n_estimators=100, random_state=42)
74
- rf.fit(trainX, trainy)
75
-
76
- # Select features with importance greater than a threshold (e.g., mean importance)
77
- selector = SelectFromModel(rf, threshold="mean", prefit=True)
78
- trainX = selector.transform(trainX)
79
- if testX is not None:
80
- testX = selector.transform(testX)
81
- print(f"Selected {trainX.shape[1]} features based on feature importance.")
82
-
83
- # Scale the input data using MinMaxScaler to normalize the feature range
84
- scaler = MinMaxScaler()
85
- trainX_scaled = scaler.fit_transform(trainX)
86
- if testX is not None:
87
- testX_scaled = scaler.transform(testX)
88
-
89
- # Scale the target variable using MinMaxScaler
90
- target_scaler = MinMaxScaler()
91
- trainy_scaled = target_scaler.fit_transform(trainy.reshape(-1, 1)) # Reshape to 2D for scaler
92
-
93
- # Reshape trainX and testX to be 3D: (samples, timesteps, features)
94
- trainX = trainX_scaled.reshape((trainX.shape[0], 1, trainX.shape[1])) # Adjusted for general feature count
95
- if testX is not None:
96
- testX = testX_scaled.reshape((testX.shape[0], 1, testX.shape[1])) # Reshape testX if it exists
97
-
98
- model = Sequential()
99
-
100
- # GRU Layer
101
- model.add(GRU(512, input_shape=(trainX.shape[1], trainX.shape[2]), return_sequences=False, kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
102
-
103
- # Dense Layers with Batch Normalization, Dropout, LeakyReLU
104
- model.add(Dense(256, kernel_initializer='he_normal', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
105
- model.add(BatchNormalization())
106
- model.add(Dropout(dropout_rate))
107
- model.add(LeakyReLU(alpha=0.1))
108
-
109
- model.add(Dense(128, kernel_initializer='he_normal', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
110
- model.add(BatchNormalization())
111
- model.add(Dropout(dropout_rate))
112
- model.add(LeakyReLU(alpha=0.1))
113
-
114
- model.add(Dense(64, kernel_initializer='he_normal', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
115
- model.add(BatchNormalization())
116
- model.add(Dropout(dropout_rate))
117
- model.add(LeakyReLU(alpha=0.1))
118
-
119
- model.add(Dense(32, kernel_initializer='he_normal', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
120
- model.add(BatchNormalization())
121
- model.add(Dropout(dropout_rate))
122
- model.add(LeakyReLU(alpha=0.1))
123
-
124
- # Output Layer with ReLU activation to prevent negative predictions
125
- model.add(Dense(1, activation="relu"))
126
-
127
- # Compile the model
128
- model.compile(loss='mse', optimizer=Adam(learning_rate=learning_rate), metrics=['mse'])
129
-
130
- # Callbacks for learning rate reduction and early stopping
131
- learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', patience=10, verbose=1, factor=0.5, min_lr=1e-6)
132
- early_stopping = EarlyStopping(monitor='val_loss', verbose=1, restore_best_weights=True, patience=10)
133
-
134
- # Train the model
135
- history = model.fit(trainX, trainy_scaled, epochs=epochs, batch_size=batch_size, validation_split=0.1, verbose=1,
136
- callbacks=[learning_rate_reduction, early_stopping])
137
-
138
- # Predict train and test
139
- predicted_train = model.predict(trainX)
140
- predicted_test = model.predict(testX) if testX is not None else None
141
-
142
- # Flatten predictions
143
- predicted_train = predicted_train.flatten()
144
- if predicted_test is not None:
145
- predicted_test = predicted_test.flatten()
146
- else:
147
- predicted_test = np.zeros_like(predicted_train)
148
-
149
- # Inverse scale the predictions to get them back to original range
150
- predicted_train = target_scaler.inverse_transform(predicted_train.reshape(-1, 1)).flatten()
151
- if predicted_test is not None:
152
- predicted_test = target_scaler.inverse_transform(predicted_test.reshape(-1, 1)).flatten()
153
-
154
- return predicted_train, predicted_test, history
155
-
156
-
157
-
158
-
159
- #-----------------------------------------------------------DeepMap-------------------------------------------------------------------------------
160
- def CNNModel(trainX, trainy, testX, testy, epochs=1000, batch_size=64, learning_rate=0.0001, l1_reg=0.0001, l2_reg=0.0001, dropout_rate=0.3,feature_selection=True):
161
- if feature_selection:
162
- rf=RandomForestRegressor(n_estimators=100,random_state=42)
163
- rf.fit(trainX,trainy)
164
-
165
- selector=SelectFromModel(rf, threshold="mean",prefit=True)
166
- trainX=selector.transform(trainX)
167
- if testX is not None:
168
- testX=selector.transform(testX)
169
- print(f"Selected {trainX.shape[1]} feature based on the important feature")
170
-
171
-
172
-
173
- # Scaling the inputs
174
- scaler = MinMaxScaler()
175
- trainX_scaled = scaler.fit_transform(trainX)
176
- if testX is not None:
177
- testX_scaled = scaler.transform(testX)
178
-
179
- # Reshape for CNN input (samples, features, channels)
180
- trainX = trainX_scaled.reshape((trainX.shape[0], trainX.shape[1], 1))
181
- if testX is not None:
182
- testX = testX_scaled.reshape((testX.shape[0], testX.shape[1], 1))
183
-
184
- model = Sequential()
185
-
186
- # Convolutional layers
187
- model.add(Conv1D(256, kernel_size=3, activation='relu', input_shape=(trainX.shape[1], 1), kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
188
- model.add(MaxPooling1D(pool_size=2))
189
- model.add(Dropout(dropout_rate))
190
-
191
- model.add(Conv1D(128, kernel_size=3, activation='relu', kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
192
- model.add(MaxPooling1D(pool_size=2))
193
- model.add(Dropout(dropout_rate))
194
-
195
- # Flatten and Dense layers
196
- model.add(Flatten())
197
- model.add(Dense(64, kernel_regularizer=regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))
198
- model.add(LeakyReLU(alpha=0.1))
199
- model.add(Dropout(dropout_rate))
200
-
201
- model.add(Dense(1, activation='linear'))
202
-
203
- # Compile the model
204
- model.compile(loss='mse', optimizer=Adam(learning_rate=learning_rate), metrics=['mse'])
205
-
206
- # Callbacks
207
- learning_rate_reduction = ReduceLROnPlateau(monitor='val_loss', patience=5, verbose=1, factor=0.5, min_lr=1e-6)
208
- early_stopping = EarlyStopping(monitor='val_loss', verbose=1, restore_best_weights=True, patience=10)
209
-
210
- # Train the model
211
- history = model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, validation_split=0.1, verbose=1,
212
- callbacks=[learning_rate_reduction, early_stopping])
213
-
214
- predicted_train = model.predict(trainX).flatten()
215
- predicted_test = model.predict(testX).flatten() if testX is not None else None
216
-
217
- return predicted_train, predicted_test, history
218
-
219
- #-------------------------------------------------------------------------Random Forest----------------------------------------------------
220
- def RFModel(trainX, trainy, testX, testy, n_estimators=100, max_depth=None,feature_selection=True):
221
- if feature_selection:
222
- rf=RandomForestRegressor(n_estimators=100, random_state=42)
223
- rf.fit(trainX, trainy)
224
- selector=SelectFromModel(rf, threshold="mean", prefit=True)
225
- trainX=selector.transform(trainX)
226
- if testX is not None:
227
- testX=selector.transform(testX)
228
- print(f"Selected {trainX.shape[1]} feature based on the feature selection")
229
-
230
-
231
- # Log transformation of the target variable
232
-
233
- # Scaling the feature data
234
- scaler = MinMaxScaler()
235
- trainX_scaled = scaler.fit_transform(trainX)
236
- if testX is not None:
237
- testX_scaled = scaler.transform(testX)
238
-
239
- # Define and train the RandomForest model
240
- rf_model = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
241
- history=rf_model.fit(trainX_scaled, trainy)
242
-
243
-
244
- # Predictions
245
- predicted_train = rf_model.predict(trainX_scaled)
246
- predicted_test = rf_model.predict(testX_scaled) if testX is not None else None
247
-
248
- return predicted_train, predicted_test,history
249
- #------------------------------------------------------------------------------XGboost---------------------------------------------------------------
250
- def XGBoostModel(trainX, trainy, testX, testy,learning_rate,min_child_weight,feature_selection=True, n_estimators=100, max_depth=None):
251
- if feature_selection:
252
- rf=RandomForestRegressor(n_estimators=100,random_state=42)
253
- rf.fit(trainX,trainy)
254
- selector=SelectFromModel(rf,threshold="mean",prefit=True)
255
- trainX=selector.transform(trainX)
256
- if testX is not None:
257
- testX=selector.transform(testX)
258
- print(f"Selected {trainX.shape[1]} features based on feature importance")
259
-
260
-
261
- #trainy_log = np.log1p(trainy) # Log-transform to handle large phenotypic values
262
- #if testy is not None:
263
- # testy_log = np.log1p(testy)
264
-
265
- # Scale the features
266
- scaler = MinMaxScaler()
267
- trainX_scaled = scaler.fit_transform(trainX)
268
- if testX is not None:
269
- testX_scaled = scaler.transform(testX)
270
-
271
- # Define and train the XGBoost model
272
- # xgb_model = XGBRegressor(n_estimators=n_estimators, max_depth=100, random_state=42)
273
- #xgb_model = XGBRegressor(objective ='reg:linear',
274
- # n_estimators = 100, seed = 100)
275
- xgb_model=XGBRegressor(objective="reg:squarederror",random_state=42)
276
- history=xgb_model.fit(trainX, trainy)
277
- param_grid={
278
- "learning_rate":0.01,
279
- "max_depth" : 10,
280
- "n_estimators": 100,
281
- "min_child_weight": 5
282
- }
283
-
284
-
285
- # Predictions
286
- predicted_train = xgb_model.predict(trainX_scaled)
287
- predicted_test = xgb_model.predict(testX_scaled) if testX is not None else None
288
-
289
-
290
- return predicted_train, predicted_test,history
291
-
292
-
293
-
294
-
295
-
296
-
297
- #----------------------------------------reading file----------------------------------------------------------------------------------------
298
-
299
-
300
-
301
-
302
-
303
- # Helper function to read the uploaded CSV file
304
- def read_csv_file(uploaded_file):
305
- if uploaded_file is not None:
306
- if hasattr(uploaded_file, 'data'): # For NamedBytes
307
- return pd.read_csv(io.BytesIO(uploaded_file.data))
308
- elif hasattr(uploaded_file, 'name'): # For NamedString
309
- return pd.read_csv(uploaded_file.name)
310
- return None
311
-
312
-
313
- #-----------------------------------------------------------------calculate topsis score--------------------------------------------------------
314
-
315
-
316
- def calculate_topsis_score(df):
317
- # Normalize the metrics
318
- metrics = df[['Train_MSE', 'Train_RMSE', 'Train_R2', 'Train_Corr']].dropna() # Ensure no NaN values
319
- norm_metrics = metrics / np.sqrt((metrics ** 2).sum(axis=0))
320
-
321
- # Define ideal best and worst for each metric
322
- ideal_best = pd.Series(index=norm_metrics.columns)
323
- ideal_worst = pd.Series(index=norm_metrics.columns)
324
-
325
- # For RMSE and MSE (minimization criteria): min is best, max is worst
326
- for col in ['Train_MSE', 'Train_RMSE']:
327
- ideal_best[col] = norm_metrics[col].min()
328
- ideal_worst[col] = norm_metrics[col].max()
329
-
330
- # For R2 and Corr (maximization criteria): max is best, min is worst
331
- for col in ['Train_R2', 'Train_Corr']:
332
- ideal_best[col] = norm_metrics[col].max()
333
- ideal_worst[col] = norm_metrics[col].min()
334
-
335
- # Calculate Euclidean distance to ideal best and worst
336
- dist_to_best = np.sqrt(((norm_metrics - ideal_best) ** 2).sum(axis=1))
337
- dist_to_worst = np.sqrt(((norm_metrics - ideal_worst) ** 2).sum(axis=1))
338
-
339
- # Calculate TOPSIS score
340
- topsis_score = dist_to_worst / (dist_to_best + dist_to_worst)
341
- df['TOPSIS_Score'] = np.nan # Initialize with NaN
342
- df.loc[metrics.index, 'TOPSIS_Score'] = topsis_score # Assign TOPSIS scores
343
- return df
344
-
345
- #--------------------------------------------------- Nested Cross validation---------------------------------------------------------------------------
346
- from sklearn.ensemble import RandomForestRegressor
347
- from sklearn.model_selection import KFold
348
- from sklearn.preprocessing import StandardScaler
349
- from sklearn.feature_selection import SelectFromModel
350
- from sklearn.metrics import mean_squared_error, r2_score
351
- from scipy.stats import pearsonr
352
- import numpy as np
353
- import pandas as pd
354
-
355
- def NestedKFoldCrossValidation(
356
- training_data, training_additive, testing_data, testing_additive,
357
- training_dominance, testing_dominance, epochs, learning_rate, min_child_weight,
358
- batch_size=64, outer_n_splits=2, output_file='cross_validation_results.csv',
359
- predicted_phenotype_file='predicted_phenotype.csv', feature_selection=True
360
- ):
361
-
362
- if 'phenotypes' not in training_data.columns:
363
- raise ValueError("Training data does not contain the 'phenotypes' column.")
364
-
365
- # Remove Sample ID columns from additive and dominance data
366
- training_additive = training_additive.iloc[:, 1:]
367
- testing_additive = testing_additive.iloc[:, 1:]
368
- training_dominance = training_dominance.iloc[:, 1:]
369
- testing_dominance = testing_dominance.iloc[:, 1:]
370
-
371
- # Merge training and testing data with additive and dominance components
372
- training_data_merged = pd.concat([training_data, training_additive, training_dominance], axis=1)
373
- testing_data_merged = pd.concat([testing_data, testing_additive, testing_dominance], axis=1)
374
-
375
- phenotypic_info = training_data['phenotypes'].values
376
- phenotypic_test_info = testing_data['phenotypes'].values if 'phenotypes' in testing_data.columns else None
377
- sample_ids = testing_data.iloc[:, 0].values
378
-
379
- training_genotypic_data_merged = training_data_merged.iloc[:, 2:].values
380
- testing_genotypic_data_merged = testing_data_merged.iloc[:, 2:].values
381
-
382
- # Feature selection
383
- if feature_selection:
384
- rf = RandomForestRegressor(n_estimators=100, random_state=65)
385
- rf.fit(training_genotypic_data_merged, phenotypic_info)
386
- selector = SelectFromModel(rf, threshold="mean", prefit=True)
387
- training_genotypic_data_merged = selector.transform(training_genotypic_data_merged)
388
- testing_genotypic_data_merged = selector.transform(testing_genotypic_data_merged)
389
- print(f"Selected {training_genotypic_data_merged.shape[1]} features based on importance.")
390
-
391
- # Standardize the genotypic data
392
- scaler = StandardScaler()
393
- training_genotypic_data_merged = scaler.fit_transform(training_genotypic_data_merged)
394
- testing_genotypic_data_merged = scaler.transform(testing_genotypic_data_merged)
395
-
396
- outer_kf = KFold(n_splits=outer_n_splits)
397
-
398
- results = []
399
- all_predicted_phenotypes = []
400
-
401
- def calculate_metrics(true_values, predicted_values):
402
- mse = mean_squared_error(true_values, predicted_values)
403
- rmse = np.sqrt(mse)
404
- r2 = r2_score(true_values, predicted_values)
405
- corr = pearsonr(true_values, predicted_values)[0]
406
- return mse, rmse, r2, corr
407
-
408
- models = [
409
- ('GRUModel', GRUModel),
410
- ('CNNModel', CNNModel),
411
- ('RFModel', RFModel),
412
- ('XGBoostModel', XGBoostModel)
413
- ]
414
-
415
- for outer_fold, (outer_train_index, outer_test_index) in enumerate(outer_kf.split(phenotypic_info), 1):
416
- outer_trainX = training_genotypic_data_merged[outer_train_index]
417
- outer_trainy = phenotypic_info[outer_train_index]
418
-
419
- outer_testX = testing_genotypic_data_merged
420
- outer_testy = phenotypic_test_info
421
-
422
- for model_name, model_func in models:
423
- print(f"Running model: {model_name} for fold {outer_fold}")
424
- if model_name in ['GRUModel', 'CNNModel']:
425
- predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy, epochs=epochs, batch_size=batch_size)
426
- elif model_name in ['RFModel']:
427
- predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy)
428
- else:
429
- predicted_train, predicted_test, history = model_func(outer_trainX, outer_trainy, outer_testX, outer_testy, learning_rate, min_child_weight)
430
-
431
- # Calculate metrics
432
- mse_train, rmse_train, r2_train, corr_train = calculate_metrics(outer_trainy, predicted_train)
433
- mse_test, rmse_test, r2_test, corr_test = calculate_metrics(outer_testy, predicted_test) if outer_testy is not None else (None, None, None, None)
434
-
435
- results.append({
436
- 'Model': model_name,
437
- 'Fold': outer_fold,
438
- 'Train_MSE': mse_train,
439
- 'Train_RMSE': rmse_train,
440
- 'Train_R2': r2_train,
441
- 'Train_Corr': corr_train,
442
- 'Test_MSE': mse_test,
443
- 'Test_RMSE': rmse_test,
444
- 'Test_R2': r2_test,
445
- 'Test_Corr': corr_test
446
- })
447
-
448
- if predicted_test is not None:
449
- predicted_test_df = pd.DataFrame({
450
- 'Sample_ID': sample_ids,
451
- 'Predicted_Phenotype': predicted_test,
452
- 'Model': model_name
453
- })
454
- all_predicted_phenotypes.append(predicted_test_df)
455
-
456
- # Compile results
457
- results_df = pd.DataFrame(results)
458
- avg_results_df = results_df.groupby('Model').agg({
459
- 'Train_MSE': 'mean',
460
- 'Train_RMSE': 'mean',
461
- 'Train_R2': 'mean',
462
- 'Train_Corr': 'mean',
463
- 'Test_MSE': 'mean',
464
- 'Test_RMSE': 'mean',
465
- 'Test_R2': 'mean',
466
- 'Test_Corr': 'mean'
467
- }).reset_index()
468
-
469
- # Calculate the TOPSIS score for the average metrics (considering only MSE, RMSE, R², and Correlation)
470
- def calculate_topsis_score(df):
471
- # Normalize the data
472
- norm_df = (df.iloc[:, 1:] - df.iloc[:, 1:].min()) / (df.iloc[:, 1:].max() - df.iloc[:, 1:].min())
473
-
474
- # Calculate the positive and negative ideal solutions
475
- ideal_positive = norm_df.max(axis=0)
476
- ideal_negative = norm_df.min(axis=0)
477
-
478
- # Calculate the Euclidean distances
479
- dist_positive = np.sqrt(((norm_df - ideal_positive) ** 2).sum(axis=1))
480
- dist_negative = np.sqrt(((norm_df - ideal_negative) ** 2).sum(axis=1))
481
-
482
- # Calculate the TOPSIS score
483
- topsis_score = dist_negative / (dist_positive + dist_negative)
484
-
485
- # Add the TOPSIS score to the dataframe
486
- df['TOPSIS_Score'] = topsis_score
487
-
488
- return df
489
-
490
- avg_results_df = calculate_topsis_score(avg_results_df)
491
-
492
- # Save the results with TOPSIS scores to the file
493
- avg_results_df.to_csv(output_file, index=False)
494
-
495
- # Save predicted phenotypes
496
- if all_predicted_phenotypes:
497
- predicted_all_df = pd.concat(all_predicted_phenotypes, axis=0, ignore_index=True)
498
- predicted_all_df.to_csv(predicted_phenotype_file, index=False)
499
-
500
- return avg_results_df, predicted_all_df if all_predicted_phenotypes else None
501
-
502
-
503
- # Save the results to the file
504
- #results_df.to_csv(output_file, index=False)
505
-
506
- # Save predicted phenotypes
507
- #if all_predicted_phenotypes:
508
- # predicted_all_df = pd.concat(all_predicted_phenotypes, axis=0, ignore_index=True)
509
- #predicted_all_df.to_csv(predicted_phenotype_file, index=False)
510
-
511
- # return results_df, predicted_all_df if all_predicted_phenotypes else None
512
-
513
- #--------------------------------------------------------------------Gradio interface---------------------------------------------------------------
514
-
515
- def run_cross_validation(training_file, training_additive_file, testing_file, testing_additive_file,
516
- training_dominance_file, testing_dominance_file,feature_selection,learning_rate,min_child_weight):
517
-
518
- # Default parameters
519
- epochs = 1000
520
- batch_size = 64
521
-
522
- inner_n_splits = 2
523
- min_child_weight=5
524
- learning_rate=0.001
525
- #learning_rate=learning_rate
526
- # min_child_weight=min_child_weight
527
-
528
- # Load datasets
529
- training_data = pd.read_csv(training_file.name)
530
- training_additive = pd.read_csv(training_additive_file.name)
531
- testing_data = pd.read_csv(testing_file.name)
532
- testing_additive = pd.read_csv(testing_additive_file.name)
533
- training_dominance = pd.read_csv(training_dominance_file.name)
534
- testing_dominance = pd.read_csv(testing_dominance_file.name)
535
-
536
- # Call the cross-validation function
537
- results, predicted_phenotypes = NestedKFoldCrossValidation(
538
- training_data=training_data,
539
- training_additive=training_additive,
540
- testing_data=testing_data,
541
- testing_additive=testing_additive,
542
- training_dominance=training_dominance,
543
- testing_dominance=testing_dominance,
544
- epochs=epochs,
545
- batch_size=batch_size,
546
- #outer_n_splits= outer_n_splits,
547
- #outer_n_splits=outer_n_splits,
548
- #inner_n_splits=inner_n_splits,
549
- learning_rate=learning_rate,
550
- min_child_weight=min_child_weight,
551
- feature_selection=feature_selection
552
- )
553
-
554
- # Save outputs
555
- results_file = "cross_validation_results.csv"
556
- predicted_file = "predicted_phenotype.csv"
557
- results.to_csv(results_file, index=False)
558
- predicted_phenotypes.to_csv(predicted_file, index=False)
559
-
560
- return results_file, predicted_file
561
-
562
- # Gradio interface
563
- with gr.Blocks() as interface:
564
- gr.Markdown("# DeepMap - An Integrated GUI for Genotype to Phenotype Prediction")
565
-
566
- with gr.Row():
567
- training_file = gr.File(label="Upload Training Data (CSV)")
568
- training_additive_file = gr.File(label="Upload Training Additive Data (CSV)")
569
- training_dominance_file = gr.File(label="Upload Training Dominance Data (CSV)")
570
-
571
- with gr.Row():
572
- testing_file = gr.File(label="Upload Testing Data (CSV)")
573
- testing_additive_file = gr.File(label="Upload Testing Additive Data (CSV)")
574
- testing_dominance_file = gr.File(label="Upload Testing Dominance Data (CSV)")
575
-
576
- with gr.Row():
577
- feature_selection = gr.Checkbox(label="Enable Feature Selection", value=True)
578
-
579
- output1 = gr.File(label="Cross-Validation Results (CSV)")
580
- output2 = gr.File(label="Predicted Phenotypes (CSV)")
581
-
582
- submit_btn = gr.Button("Run DeepMap")
583
- submit_btn.click(
584
- run_cross_validation,
585
- inputs=[
586
- training_file, training_additive_file, testing_file,
587
- testing_additive_file, training_dominance_file,testing_dominance_file,
588
- feature_selection
589
- ],
590
- outputs=[output1, output2]
591
- )
592
-
593
- # Launch the interface
594
- interface.launch()