| |
|
| | supported_datasets = { |
| | 'EC': 'GleghornLab/EC_reg', |
| | 'GO-CC': 'GleghornLab/CC_reg', |
| | 'GO-BP': 'GleghornLab/BP_reg', |
| | 'GO-MF': 'GleghornLab/MF_reg', |
| | 'MB': 'GleghornLab/MB_reg', |
| | 'DeepLoc-2': 'GleghornLab/DL2_reg', |
| | 'DeepLoc-10': 'GleghornLab/DL10_reg', |
| | 'Subcellular': 'GleghornLab/SL_13', |
| | 'enzyme-kcat': 'GleghornLab/enzyme_kcat', |
| | 'solubility': 'GleghornLab/solubility_prediction', |
| | 'localization': 'GleghornLab/localization_prediction', |
| | 'temperature-stability': 'GleghornLab/temperature_stability', |
| | 'peptide-HLA-MHC-affinity': 'GleghornLab/peptide_HLA_MHC_affinity_ppi', |
| | 'optimal-temperature': 'GleghornLab/optimal_temperature', |
| | 'optimal-ph': 'GleghornLab/optimal_ph', |
| | 'material-production': 'GleghornLab/material_production', |
| | 'fitness-prediction': 'GleghornLab/fitness_prediction', |
| | 'number-of-folds': 'GleghornLab/fold_prediction', |
| | 'cloning-clf': 'GleghornLab/cloning_clf', |
| | 'stability-prediction': 'GleghornLab/stability_prediction', |
| | 'human-ppi-saprot': 'GleghornLab/HPPI', |
| | 'SecondaryStructure-3': 'GleghornLab/SS3', |
| | 'SecondaryStructure-8': 'GleghornLab/SS8', |
| | 'fluorescence-prediction': 'GleghornLab/fluorescence_prediction', |
| | 'plastic': 'GleghornLab/plastic_degradation_benchmark', |
| | 'gold-ppi': 'Synthyra/bernett_gold_ppi', |
| | 'human-ppi-pinui': 'GleghornLab/HPPI_PiNUI', |
| | 'yeast-ppi-pinui': 'GleghornLab/YPPI_PiNUI', |
| | 'shs27-ppi-raw': 'Synthyra/SHS27k', |
| | 'shs148-ppi-raw': 'Synthyra/SHS148k', |
| | 'shs27-ppi-random': 'GleghornLab/ppi_SHS27k_random_2025', |
| | 'shs148-ppi-random': 'GleghornLab/ppi_SHS148k_random_2025', |
| | 'shs27-ppi-dfs': 'GleghornLab/ppi_SHS27k_dfs_2025', |
| | 'shs148-ppi-dfs': 'GleghornLab/ppi_SHS148k_dfs_2025', |
| | 'shs27-ppi-bfs': 'GleghornLab/ppi_SHS27k_bfs_2025', |
| | 'shs148-ppi-bfs': 'GleghornLab/ppi_SHS148k_bfs_2025', |
| | 'string-ppi-random': 'GleghornLab/ppi_STRING_random_2025', |
| | 'string-ppi-dfs': 'GleghornLab/ppi_STRING_dfs_2025', |
| | 'string-ppi-bfs': 'GleghornLab/ppi_STRING_bfs_2025', |
| | 'plm-interact': 'GleghornLab/plm_interact_human_train_cross_ppi', |
| | 'ppi-mutation-effect': 'GleghornLab/ppi_mutation_effect', |
| | 'PPA-ppi': 'Synthyra/ppi_affinity', |
| | 'foldseek-fold': 'lhallee/foldseek_dataset', |
| | 'foldseek-inverse': 'lhallee/foldseek_dataset', |
| | 'ec-active': 'lhallee/ec_active', |
| | 'bernett_processed': 'lhallee/bernett_processed', |
| | 'proteingym_zs': 'proteingym_zs', |
| | 'proteingym_supervised': 'proteingym_supervised', |
| | 'taxon_domain': 'GleghornLab/taxonomy_domain_0.4_clusters', |
| | 'taxon_kingdom': 'GleghornLab/taxonomy_kingdom_0.4_clusters', |
| | 'taxon_phylum': 'GleghornLab/taxonomy_phylum_0.4_clusters', |
| | 'taxon_class': 'GleghornLab/taxonomy_class_0.4_clusters', |
| | 'taxon_order': 'GleghornLab/taxonomy_order_0.4_clusters', |
| | 'taxon_family': 'GleghornLab/taxonomy_family_0.4_clusters', |
| | 'taxon_genus': 'GleghornLab/taxonomy_genus_0.4_clusters', |
| | 'taxon_species': 'GleghornLab/taxonomy_species_0.4_clusters', |
| | 'diff_phylogeny': 'GleghornLab/diff_phylo', |
| | 'plddt': 'GleghornLab/af2_plddt', |
| | 'realness': 'GleghornLab/realness_dataset', |
| | 'million_full': 'GleghornLab/millionfull_round_1_oct_2025', |
| | 'soluprot': 'GleghornLab/soluprot', |
| | 'ecoli_expression': 'GleghornLab/ecoli_expression', |
| | 'KSMoFinder-clustered': 'GleghornLab/ksmo_clustered', |
| | 'KSMoFinder': 'GleghornLab/KSmo_fixed', |
| | } |
| |
|
| | internal_datasets = { |
| | 'plastic': 'GleghornLab/plastic_degradation_benchmark', |
| | } |
| |
|
| | |
| | possible_with_vector_reps = [ |
| | |
| | 'EC', |
| | |
| | 'GO-CC', |
| | 'GO-BP', |
| | 'GO-MF', |
| | 'Subcellular', |
| | |
| | 'shs27-ppi-random', |
| | 'shs27-ppi-dfs', |
| | 'shs27-ppi-bfs', |
| | 'shs148-ppi-random', |
| | 'shs148-ppi-dfs', |
| | 'shs148-ppi-bfs', |
| | 'string-ppi-random', |
| | 'string-ppi-dfs', |
| | 'string-ppi-bfs', |
| | |
| | 'MB', |
| | 'DeepLoc-2', |
| | 'DeepLoc-10', |
| | 'solubility', |
| | 'temperature-stability', |
| | 'material-production', |
| | 'fitness-prediction', |
| | 'number-of-folds', |
| | 'cloning-clf', |
| | 'stability-prediction', |
| | 'ec-active', |
| | 'localization', |
| | |
| | 'taxon_domain', |
| | 'taxon_kingdom', |
| | 'taxon_phylum', |
| | 'taxon_class', |
| | 'taxon_order', |
| | 'taxon_family', |
| | 'taxon_genus', |
| | 'taxon_species', |
| | 'diff_phylogeny', |
| | |
| | 'shs27-ppi-raw', |
| | 'shs148-ppi-raw', |
| | 'plm-interact', |
| | 'gold-ppi', |
| | 'string-ppi-bfs', |
| | 'human-ppi-saprot', |
| | 'human-ppi-pinui', |
| | 'yeast-ppi-pinui', |
| | |
| | 'enzyme-kcat', |
| | 'optimal-temperature', |
| | 'optimal-ph', |
| | 'million_full', |
| | |
| | 'PPA-ppi', |
| | ] |
| |
|
| | |
| | standard_data_benchmark = [ |
| | 'ec-active', |
| | 'EC', |
| | 'GO-CC', |
| | 'GO-BP', |
| | 'GO-MF', |
| | 'MB', |
| | 'DeepLoc-2', |
| | 'DeepLoc-10', |
| | 'enzyme-kcat', |
| | 'optimal-temperature', |
| | 'optimal-ph', |
| | 'fitness-prediction', |
| | ] |
| |
|
| |
|
| | vector_benchmark = [ |
| | |
| | 'EC', |
| | |
| | 'GO-CC', |
| | 'GO-BP', |
| | 'GO-MF', |
| | 'Subcellular', |
| | |
| | 'shs27-ppi-bfs', |
| | 'shs148-ppi-bfs', |
| | 'string-ppi-bfs', |
| | |
| | 'MB', |
| | 'DeepLoc-2', |
| | 'DeepLoc-10', |
| | 'solubility', |
| | 'temperature-stability', |
| | 'material-production', |
| | 'fitness-prediction', |
| | 'number-of-folds', |
| | 'cloning-clf', |
| | 'stability-prediction', |
| | 'ec-active', |
| | 'soluprot', |
| | |
| | 'taxon_species', |
| | 'diff_phylogeny', |
| | |
| | 'plm-interact', |
| | 'gold-ppi', |
| | |
| | 'enzyme-kcat', |
| | 'optimal-temperature', |
| | 'optimal-ph', |
| | 'million_full', |
| | |
| | 'PPA-ppi', |
| | ] |
| |
|
| |
|
| | testing = [ |
| | 'EC', |
| | 'DeepLoc-2', |
| | 'DeepLoc-10', |
| | 'enzyme-kcat', |
| | 'human-ppi', |
| | 'plddt', |
| | 'SecondaryStructure-3', |
| | ] |