| | from __future__ import annotations |
| |
|
| | from typing import List, Tuple |
| |
|
| | MODEL_NAMES = { |
| | 'Random': 'Random vectors', |
| | 'Random-ESM2-8': r'$Random ESM2_{8M}$', |
| | 'Random-ESM2-35': r'$Random ESM2_{35M}$', |
| | 'Random-ESM2-150': r'$Random ESM2_{150M}$', |
| | 'Random-ESM2-650': r'$Random ESM2_{650M}$', |
| | 'Random-Transformer': 'Random Transformer', |
| | 'ESM2-8': r'$ESM2_{8M}$', |
| | 'ESM2-35': r'$ESM2_{35M}$', |
| | 'ESM2-150': r'$ESM2_{150M}$', |
| | 'ESM2-650': r'$ESM2_{650M}$', |
| | 'ESM2-3B': r'$ESM2_{3B}$', |
| | 'ESM2-diff-150': r'$ESM2_{diff-150M}$', |
| | 'ESM2-diffAV-150': r'$ESM2_{diffAV-150M}$', |
| | 'ESMC-300': r'$ESMC_{300M}$', |
| | 'ESMC-600': r'$ESMC_{600M}$', |
| | 'E1-150': r'$E1_{150M}$', |
| | 'E1-300': r'$E1_{300M}$', |
| | 'E1-600': r'$E1_{600M}$', |
| | 'ProtBert': r'$ProtBert_{420M}$', |
| | 'ProtBert-BFD': r'$ProtBert_{BFD}$', |
| | 'ProtT5': r'ProtT5-enc$_{3B}$', |
| | 'ProtT5-XL-UniRef50-full-prec': r'ProtT5-XL$_{UniRef50}$', |
| | 'ProtT5-XXL-UniRef50': r'ProtT5-XXL$_{UniRef50}$', |
| | 'ProtT5-XL-BFD': r'ProtT5-XL$_{BFD}$', |
| | 'ProtT5-XXL-BFD': r'ProtT5-XXL$_{BFD}$', |
| | 'ANKH-Base': r'ANKH-Base$_{400M}$', |
| | 'ANKH-Large': r'ANKH-Large$_{1.2B}$', |
| | 'ANKH2-Large': r'ANKH2-Large$_{1.2B}$', |
| | 'DSM-150': r'$DSM_{150M}$', |
| | 'DSM-650': r'$DSM_{650M}$', |
| | 'DSM-PPI': r'$DSM_{PPI}$', |
| | 'GLM2-150': r'$GLM2_{150M}$', |
| | 'GLM2-650': r'$GLM2_{650M}$', |
| | 'GLM2-GAIA': r'$GLM2_{GAIA}$', |
| | 'DPLM-150': r'$DPLM_{150M}$', |
| | 'DPLM-650': r'$DPLM_{650M}$', |
| | 'DPLM-3B': r'$DPLM_{3B}$', |
| | 'ProtCLM-1b': r'$ProtCLM_{1B}$', |
| | 'OneHot-Protein': 'OneHot Protein', |
| | 'OneHot-DNA': 'OneHot DNA', |
| | 'OneHot-RNA': 'OneHot RNA', |
| | 'OneHot-Codon': 'OneHot Codon', |
| | 'AMPLIFY-120': r'$AMPLIFY_{120M}$', |
| | 'AMPLIFY-350': r'$AMPLIFY_{350M}$', |
| | } |
| |
|
| | DATASET_NAMES = { |
| | |
| | 'EC': 'EC', |
| | 'EC_reg': 'EC', |
| | 'GO-CC': r'$GO_{CC}$', |
| | 'CC_reg': r'$GO_{CC}$', |
| | 'GO-BP': r'$GO_{BP}$', |
| | 'BP_reg': r'$GO_{BP}$', |
| | 'GO-MF': r'$GO_{MF}$', |
| | 'MF_reg': r'$GO_{MF}$', |
| |
|
| | |
| | 'MB': 'MB', |
| | 'MB_reg': 'MB', |
| | 'DeepLoc-2': r'$DL_{2}$', |
| | 'DL2_reg': r'$DL_{2}$', |
| | 'DeepLoc-10': r'$DL_{10}$', |
| | 'DL10_reg': r'$DL_{10}$', |
| | 'Subcellular': 'Subcellular', |
| | 'SL_13': 'Subcellular', |
| | 'enzyme-kcat': r'$k_{cat}$', |
| | 'enzyme_kcat': r'$k_{cat}$', |
| | 'solubility': 'solubility', |
| | 'solubility_prediction': 'solubility', |
| | 'localization': 'localization', |
| | 'localization_prediction': 'localization', |
| | 'temperature-stability': 'temperature stability', |
| | 'temperature_stability': 'temperature stability', |
| | 'optimal-temperature': 'optimal temperature', |
| | 'optimal_temperature': 'optimal temperature', |
| | 'optimal-ph': 'optimal pH', |
| | 'optimal_ph': 'optimal pH', |
| | 'material-production': 'material production', |
| | 'material_production': 'material production', |
| | 'fitness-prediction': 'fitness', |
| | 'fitness_prediction': 'fitness', |
| | 'number-of-folds': 'folds', |
| | 'fold_prediction': 'folds', |
| | 'cloning-clf': 'cloning-clf', |
| | 'cloning_clf': 'cloning-clf', |
| | 'stability-prediction': 'stability', |
| | 'stability_prediction': 'stability', |
| | 'ec-active': r'$EC_{singlelabel}$', |
| | 'ec_active': r'$EC_{singlelabel}$', |
| | 'ecoli_expression': 'E. coli expression', |
| | 'soluprot': 'soluprot', |
| | 'KSMoFinder-clustered': r'$KSMoFinder_{clustered}$', |
| | 'ksmo_clustered': r'$KSMoFinder_{clustered}$', |
| | 'KSMoFinder': 'KSMoFinder', |
| | 'KSmo_fixed': 'KSMoFinder', |
| |
|
| | |
| | 'human-ppi-saprot': r'$Human-PPI_{saprot}$', |
| | 'HPPI': r'$Human-PPI_{saprot}$', |
| | 'human-ppi-pinui': r'$Human-PPI_{PiNUI}$', |
| | 'HPPI_PiNUI': r'$Human-PPI_{PiNUI}$', |
| | 'yeast-ppi-pinui': r'$Yeast-PPI_{PiNUI}$', |
| | 'YPPI_PiNUI': r'$Yeast-PPI_{PiNUI}$', |
| | 'peptide-HLA-MHC-affinity': 'peptide HLA MHC affinity', |
| | 'peptide_HLA_MHC_affinity_ppi': 'peptide HLA MHC affinity', |
| | 'shs27-ppi-raw': r'$SHS_{27k-raw}-ppi$', |
| | 'SHS27k': r'$SHS_{27k-raw}-ppi$', |
| | 'shs148-ppi-raw': r'$SHS_{148k-raw}-ppi$', |
| | 'SHS148k': r'$SHS_{148k-raw}-ppi$', |
| | 'shs27-ppi-random': r'$SHS_{27k-random}-ppi$', |
| | 'shs148-ppi-random': r'$SHS_{148k-random}-ppi$', |
| | 'shs27-ppi-dfs': r'$SHS_{27k-dfs}-ppi$', |
| | 'shs148-ppi-dfs': r'$SHS_{148k-dfs}-ppi$', |
| | 'shs27-ppi-bfs': r'$SHS_{27k-bfs}-ppi$', |
| | 'shs148-ppi-bfs': r'$SHS_{148k-bfs}-ppi$', |
| | 'string-ppi-random': r'$STRING_{random-ppi}$', |
| | 'string-ppi-dfs': r'$STRING_{dfs-ppi}$', |
| | 'string-ppi-bfs': r'$STRING_{bfs-ppi}$', |
| | 'ppi_SHS148k_bfs_2025': r'$SHS_{148k-bfs}-ppi$', |
| | 'ppi_SHS148k_dfs_2025': r'$SHS_{148k-dfs}-ppi$', |
| | 'ppi_SHS27k_bfs_2025': r'$SHS_{27k-bfs}-ppi$', |
| | 'ppi_SHS27k_dfs_2025': r'$SHS_{27k-dfs}-ppi$', |
| | 'ppi_SHS27k_random_2025': r'$SHS_{27k-random}-ppi$', |
| | 'ppi_SHS148k_random_2025': r'$SHS_{148k-random}-ppi$', |
| | 'ppi_STRING_random_2025': r'$STRING_{random}-ppi$', |
| | 'ppi_STRING_dfs_2025': r'$STRING_{dfs}-ppi$', |
| | 'ppi_STRING_bfs_2025': r'$STRING_{bfs}-ppi$', |
| | 'gold-ppi': r'$Human PPI_{bernett}$', |
| | 'bernett_gold_ppi': r'$Human PPI_{bernett}$', |
| | 'plm-interact': r'$PLM-Interact_{human / cross}$', |
| | 'plm_interact_human_train_cross_ppi': r'$PLM-Interact_{human / cross}$', |
| | 'ppi-mutation-effect': r'$PPI_{mutation effect}$', |
| | 'ppi_mutation_effect': r'$PPI_{mutation effect}$', |
| | 'PPA-ppi': r'$PPA_{PPI}$', |
| | 'ppi_affinity': r'$PPA_{PPI}$', |
| | 'ProteinProteinAffinity': 'PPI binding affinity', |
| |
|
| | |
| | 'SecondaryStructure-3': r'$SS_{3}$', |
| | 'SecondaryStructure-8': r'$SS_{8}$', |
| | 'SS3': r'$SS_{3}$', |
| | 'SS8': r'$SS_{8}$', |
| |
|
| | |
| | 'fluorescence-prediction': 'fluorescence', |
| | 'fluorescence_prediction': 'fluorescence', |
| | 'millionfull_round_1_oct_2025': r'$AtOMT1_{millionfull}$', |
| | 'million_full': r'$AtOMT1_{millionfull}$', |
| |
|
| | |
| | 'plastic': r'$plastic degradation_{benchmark}$', |
| | 'plastic_degradation_benchmark': r'$plastic degradation_{benchmark}$', |
| | 'foldseek-fold': 'foldseek fold', |
| | 'foldseek-inverse': 'foldseek inverse', |
| | 'foldseek_dataset': 'foldseek', |
| | 'bernett_processed': r'$Bernett_{processed}$', |
| |
|
| | |
| | 'proteingym_zs': r'$ProteinGym_{zero-shot}$', |
| | 'proteingym_supervised': r'$ProteinGym_{supervised}$', |
| |
|
| | |
| | 'taxonomy_domain': r'$taxonomy_{domain}$', |
| | 'taxonomy_kingdom': r'$taxonomy_{kingdom}$', |
| | 'taxonomy_phylum': r'$taxonomy_{phylum}$', |
| | 'taxonomy_class': r'$taxonomy_{class}$', |
| | 'taxonomy_order': r'$taxonomy_{order}$', |
| | 'taxonomy_family': r'$taxonomy_{family}$', |
| | 'taxonomy_genus': r'$taxonomy_{genus}$', |
| | 'taxonomy_species': r'$taxonomy_{species}$', |
| | 'diff_phylogeny': r'$taxonomy_{different}$', |
| | 'diff_phylo': r'$taxonomy_{different}$', |
| | 'taxonomy_domain_0.4_clusters': r'$taxonomy_{domain}$', |
| | 'taxonomy_kingdom_0.4_clusters': r'$taxonomy_{kingdom}$', |
| | 'taxonomy_phylum_0.4_clusters': r'$taxonomy_{phylum}$', |
| | 'taxonomy_class_0.4_clusters': r'$taxonomy_{class}$', |
| | 'taxonomy_order_0.4_clusters': r'$taxonomy_{order}$', |
| | 'taxonomy_family_0.4_clusters': r'$taxonomy_{family}$', |
| | 'taxonomy_genus_0.4_clusters': r'$taxonomy_{genus}$', |
| | 'taxonomy_species_0.4_clusters': r'$taxonomy_{species}$', |
| | 'taxon_domain': r'$taxonomy_{domain}$', |
| | 'taxon_kingdom': r'$taxonomy_{kingdom}$', |
| | 'taxon_phylum': r'$taxonomy_{phylum}$', |
| | 'taxon_class': r'$taxonomy_{class}$', |
| | 'taxon_order': r'$taxonomy_{order}$', |
| | 'taxon_family': r'$taxonomy_{family}$', |
| | 'taxon_genus': r'$taxonomy_{genus}$', |
| | 'taxon_species': r'$taxonomy_{species}$', |
| |
|
| | |
| | 'plddt': r'$pLDDT_{AlphaFold2}$', |
| | 'af2_plddt': r'$pLDDT_{AlphaFold2}$', |
| | 'realness': r'$Realness_{dataset}$', |
| | 'realness_dataset': r'$Realness_{dataset}$', |
| | } |
| |
|
| | CLS_PREFS: List[Tuple[str, str]] = [ |
| | ("f1", "F1"), |
| | ("mcc", "MCC"), |
| | ("accuracy", "Accuracy"), |
| | ] |
| |
|
| | REG_PREFS: List[Tuple[str, str]] = [ |
| | ("spearman", "Spearman rho"), |
| | ("r_squared", "R²"), |
| | ("pearson", "Pearson r"), |
| | ] |
| |
|