Spaces:
Running
Running
Commit ·
ef814bf
0
Parent(s):
init
Browse files- .gitattributes +4 -0
- .gitignore +1 -0
- analysis/de_all_48.tsv +171 -0
- analysis/final_results.csv +201 -0
- analysis/re_all_48.tsv +145 -0
- ckp/Multi_seed0/multi_seed0_fold1.pth +3 -0
- ckp/Multi_seed0/multi_seed0_fold2.pth +3 -0
- ckp/Multi_seed0/multi_seed0_fold3.pth +3 -0
- ckp/Multi_seed0/multi_seed0_fold4.pth +3 -0
- ckp/Multi_seed0/multi_seed0_fold5.pth +3 -0
- config.py +5 -0
- data/__init__.py +0 -0
- data/create_dataset.py +210 -0
- data/datasets/atac_labelled.pkl +3 -0
- data/datasets/clones.csv +3 -0
- data/datasets/flux_labelled_11nov.csv +3 -0
- data/datasets/metabolic_model_metadata.csv +3 -0
- data/datasets/rna_labelled.pkl +3 -0
- data/datasets/rna_labelled_all.pkl +3 -0
- data/load_data.py +291 -0
- data/preprocess_data.py +163 -0
- interpretation/__init__.py +0 -0
- interpretation/attentions.py +244 -0
- interpretation/latentspace.py +219 -0
- interpretation/metrics.py +113 -0
- interpretation/predictions.py +359 -0
- interpretation/shapvalues.py +126 -0
- interpretation/similarity.py +26 -0
- interpretation/visualization.py +560 -0
- models/__init__.py +1 -0
- models/transformers.py +339 -0
- notebooks/analysis_plots.ipynb +0 -0
- objects/degs.pkl +3 -0
- objects/fi_shift_atac.pkl +3 -0
- objects/fi_shift_flux.pkl +3 -0
- objects/fi_shift_rna.pkl +3 -0
- objects/fold_results_multi.pkl +3 -0
- objects/mutlimodal_dataset.pkl +3 -0
- utils/__init__.py +4 -0
- utils/helpers.py +132 -0
- utils/losses.py +24 -0
.gitattributes
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
data/datasets/*.csv filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
analysis/de_all_48.tsv
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Category Term Count % PValue Genes List Total Pop Hits Pop Total Fold Enrichment Bonferroni Benjamini FDR
|
| 2 |
+
GOTERM_CC_DIRECT GO:0005615~extracellular space 23 48.93617021276596 3.0643470340330746E-15 SPARC, VCAM1, GPX3, SERPINF1, COL12A1, BGN, ADM, PTN, CXCL14, COL1A1, GREM2, VCAN, COL3A1, COL1A2, FLRT2, PTPRZ1, CRISPLD2, COL6A2, COL8A1, S100A4, TIMP1, IGFBP6, FBN1 47 1809 29722 8.040247932912271 3.979039320256561E-13 3.9223642035623355E-13 3.4627121484573744E-13
|
| 3 |
+
GOTERM_CC_DIRECT GO:0031012~extracellular matrix 12 25.53191489361702 1.6644129825397194E-13 COL1A1, COL3A1, VCAN, SPARC, PTPRZ1, CRISPLD2, COL6A2, COL12A1, BGN, TIMP1, PTN, FBN1 47 257 29722 29.52760990148191 2.1302071218087804E-11 1.0652243088254204E-11 9.403933351349415E-12
|
| 4 |
+
GOTERM_CC_DIRECT GO:0062023~collagen-containing extracellular matrix 12 25.53191489361702 2.6096757316181293E-12 COL1A1, COL3A1, VCAN, SPARC, COL1A2, COL6A2, COL12A1, BGN, COL8A1, IGFBP6, FBLN2, FBN1 47 331 29722 22.926271131966313 3.340403509355383E-10 1.1134616454904018E-10 9.829778589094954E-11
|
| 5 |
+
REACTOME_PATHWAY R-MMU-1474244~Extracellular matrix organization 14 29.78723404255319 9.412796932920763E-12 SPARC, VCAM1, COL12A1, BGN, FBLN2, COL1A1, ACTA2, VCAN, COL3A1, COL1A2, COL6A2, COL8A1, TIMP1, FBN1 35 287 9277 12.929616724738676 2.183770497765636E-9 2.1931816853705377E-9 1.97668735591336E-9
|
| 6 |
+
GOTERM_CC_DIRECT GO:0005576~extracellular region 19 40.42553191489361 5.071027105503714E-11 CTLA2A, VCAM1, GPX3, SERPINF1, COL12A1, BGN, PTN, FBLN2, COL1A1, VCAN, COL3A1, COL1A2, FLRT2, PTPRZ1, CRISPLD2, COL6A2, S100A4, TIMP1, FBN1 47 1780 29722 6.750155390867797 6.490921578006237E-9 1.6227286737611885E-9 1.4325651573047992E-9
|
| 7 |
+
GOTERM_MF_DIRECT GO:0005201~extracellular matrix structural constituent 7 14.893617021276595 4.554153190661396E-9 COL1A1, COL3A1, SPARC, COL1A2, IGFBP6, FBLN2, FBN1 44 91 28924 50.56643356643356 5.829314366767591E-7 4.787582223598892E-7 4.4883583346239615E-7
|
| 8 |
+
GOTERM_MF_DIRECT GO:0030020~extracellular matrix structural constituent conferring tensile strength 6 12.76595744680851 7.480597224373269E-9 COL1A1, COL3A1, COL1A2, COL6A2, COL12A1, COL8A1 44 46 28924 85.74308300395256 9.575159934938071E-7 4.787582223598892E-7 4.4883583346239615E-7
|
| 9 |
+
REACTOME_PATHWAY R-MMU-3000178~ECM proteoglycans 7 14.893617021276595 1.9030924365705653E-8 COL1A1, COL3A1, VCAN, SPARC, COL1A2, COL6A2, BGN 35 49 9277 37.86530612244898 4.415164754023593E-6 2.2171026886047086E-6 1.9982470583990936E-6
|
| 10 |
+
KEGG_PATHWAY mmu04820:Cytoskeleton in muscle cells 10 21.27659574468085 2.2847972312714302E-8 COL1A1, COL3A1, VCAN, COL1A2, COL6A2, BGN, MYH11, FBLN2, MYL9, FBN1 31 232 9565 13.299499443826473 1.5993567997751867E-6 1.5993580618900011E-6 1.439422255701001E-6
|
| 11 |
+
GOTERM_CC_DIRECT GO:0005604~basement membrane 7 14.893617021276595 3.460258164716589E-8 ACTA2, SPARC, SERPINF1, COL8A1, TIMP1, PTN, FBN1 47 122 29722 36.28426927101499 4.4291207241808905E-6 8.858260901674469E-7 7.820183452259493E-7
|
| 12 |
+
GOTERM_CC_DIRECT GO:0005581~collagen trimer 6 12.76595744680851 1.771586448374384E-7 COL1A1, COL3A1, COL1A2, COL6A2, COL12A1, COL8A1 47 82 29722 46.271925272444214 2.2676051445791323E-5 3.7793844231986857E-6 3.3364878111050898E-6
|
| 13 |
+
REACTOME_PATHWAY R-MMU-216083~Integrin cell surface interactions 7 14.893617021276595 3.2418830335220263E-7 COL1A1, COL3A1, VCAM1, COL1A2, COL6A2, COL8A1, FBN1 35 78 9277 23.78717948717949 7.520887023115819E-5 1.5661310271364195E-5 1.4115344021401206E-5
|
| 14 |
+
REACTOME_PATHWAY R-MMU-8948216~Collagen chain trimerization 6 12.76595744680851 3.3185861712795075E-7 COL1A1, COL3A1, COL1A2, COL6A2, COL12A1, COL8A1 35 41 9277 38.78885017421603 7.698824820467909E-5 1.5661310271364195E-5 1.4115344021401206E-5
|
| 15 |
+
REACTOME_PATHWAY R-MMU-1474228~Degradation of the extracellular matrix 8 17.02127659574468 3.3607961955717157E-7 COL1A1, COL3A1, COL1A2, COL6A2, COL12A1, COL8A1, TIMP1, FBN1 35 129 9277 16.43765227021041 7.796744522503563E-5 1.5661310271364195E-5 1.4115344021401206E-5
|
| 16 |
+
REACTOME_PATHWAY R-MMU-2022090~Assembly of collagen fibrils and other multimeric structures 6 12.76595744680851 9.16462494236312E-7 COL1A1, COL3A1, COL1A2, COL6A2, COL12A1, COL8A1 35 50 9277 31.806857142857144 2.125967941876139E-4 3.558929352617678E-5 3.207618729827092E-5
|
| 17 |
+
REACTOME_PATHWAY R-MMU-1442490~Collagen degradation 6 12.76595744680851 1.1183145403746236E-6 COL1A1, COL3A1, COL1A2, COL6A2, COL12A1, COL8A1 35 52 9277 30.583516483516483 2.5941546443741803E-4 3.722389827246961E-5 3.354943621123871E-5
|
| 18 |
+
REACTOME_PATHWAY R-MMU-9006934~Signaling by Receptor Tyrosine Kinases 11 23.404255319148938 2.068970107329577E-6 ACTA2, COL1A1, COL3A1, COL1A2, FLRT2, PTPRZ1, SH3KBP1, COL6A2, CAV1, TRIB3, PTN 35 432 9277 6.749140211640212 4.7988637914020416E-4 6.0258754375973924E-5 5.431046531740139E-5
|
| 19 |
+
REACTOME_PATHWAY R-MMU-1650814~Collagen biosynthesis and modifying enzymes 6 12.76595744680851 2.500401462818154E-6 COL1A1, COL3A1, COL1A2, COL6A2, COL12A1, COL8A1 35 61 9277 26.071194379391102 5.79925642680168E-4 6.473261564851443E-5 5.834270079909026E-5
|
| 20 |
+
GOTERM_MF_DIRECT GO:0005178~integrin binding 6 12.76595744680851 3.951504130333476E-6 COL3A1, VCAM1, PTPRZ1, PTN, THY1, FBN1 44 160 28924 24.65113636363636 5.056656360248324E-4 1.6859750956089496E-4 1.5806016521333904E-4
|
| 21 |
+
REACTOME_PATHWAY R-MMU-162582~Signal Transduction 22 46.808510638297875 4.113563511183184E-6 SH3KBP1, CAV1, ADM, PTN, GNG11, AKAP12, COL1A1, ACTA2, APBB1IP, GREM2, COL3A1, COL1A2, GNGT2, FLRT2, PTPRZ1, COL6A2, TRIB3, NCAM1, MYH11, TIMP1, MYL9, FBN1 35 2270 9277 2.568835745752045 9.538934515905817E-4 9.584602981056819E-5 8.638483373484686E-5
|
| 22 |
+
REACTOME_PATHWAY R-MMU-1474290~Collagen formation 6 12.76595744680851 7.46650304695044E-6 COL1A1, COL3A1, COL1A2, COL6A2, COL12A1, COL8A1 35 76 9277 20.925563909774436 0.0017307357203480978 1.581541099944957E-4 1.4254233089632657E-4
|
| 23 |
+
GOTERM_BP_DIRECT GO:0007155~cell adhesion 8 17.02127659574468 1.4860488733480935E-5 VCAN, VCAM1, FLRT2, COL6A2, COL12A1, COL8A1, NCAM1, THY1 47 523 29712 9.669907652251739 0.008331615419916782 0.008351594668216285 0.008232710758348439
|
| 24 |
+
GOTERM_MF_DIRECT GO:0050840~extracellular matrix binding 4 8.51063829787234 1.6185727179649803E-5 SPARC, BGN, CD248, FBLN2 44 33 28924 79.68044077134986 0.0020696451705318752 5.179432697487937E-4 4.855718153894941E-4
|
| 25 |
+
KEGG_PATHWAY mmu04974:Protein digestion and absorption 6 12.76595744680851 1.9031883608157167E-5 COL1A1, COL3A1, COL1A2, COL6A2, COL12A1, COL8A1 31 108 9565 17.1415770609319 0.0013313574863942357 6.661159262855009E-4 5.995043336569508E-4
|
| 26 |
+
KEGG_PATHWAY mmu04926:Relaxin signaling pathway 6 12.76595744680851 4.657577768124781E-5 ACTA2, COL1A1, COL3A1, GNGT2, COL1A2, GNG11 31 130 9565 14.240694789081886 0.0032550710972372165 0.0010867681458957821 9.78091331306204E-4
|
| 27 |
+
GOTERM_BP_DIRECT GO:0001501~skeletal system development 5 10.638297872340425 4.660868393689977E-5 COL1A1, COL3A1, VCAN, COL1A2, FBN1 47 128 29712 24.694148936170212 0.02589998966698781 0.013097040186268834 0.012910605450521235
|
| 28 |
+
GOTERM_MF_DIRECT GO:0048407~platelet-derived growth factor binding 3 6.382978723404255 1.411417082181159E-4 COL1A1, COL3A1, COL1A2 44 12 28924 164.34090909090907 0.017905176533635325 0.0030924960334964167 0.002899215031402891
|
| 29 |
+
GOTERM_MF_DIRECT GO:0008201~heparin binding 5 10.638297872340425 1.4496075157014455E-4 GREM2, CRISPLD2, NCAM1, PTN, FBN1 44 179 28924 18.362112747587606 0.01838521269043336 0.0030924960334964167 0.002899215031402891
|
| 30 |
+
GOTERM_BP_DIRECT GO:0030198~extracellular matrix organization 5 10.638297872340425 1.8179852502727632E-4 COL1A1, COL3A1, COL1A2, CRISPLD2, COL8A1 47 182 29712 17.367313537526304 0.09729717257054937 0.0340569236884431 0.03357212762170369
|
| 31 |
+
GOTERM_BP_DIRECT GO:0071560~cellular response to transforming growth factor beta stimulus 4 8.51063829787234 2.6246708451716275E-4 ACTA2, COL1A1, CAV1, FBN1 47 80 29712 31.608510638297872 0.13738634446279108 0.03687662537466137 0.03635169120562704
|
| 32 |
+
GOTERM_CC_DIRECT GO:0009986~cell surface 8 17.02127659574468 2.734695051633465E-4 VCAN, SPARC, VCAM1, CAV1, BGN, NCAM1, PTN, THY1 47 833 29722 6.0733059181119255 0.03440316195956494 0.004573569985160761 0.004037604752524734
|
| 33 |
+
GOTERM_CC_DIRECT GO:0005925~focal adhesion 5 10.638297872340425 2.8584812407254754E-4 ACTA2, APBB1IP, FLRT2, SH3KBP1, CAV1 47 205 29722 15.42397509081474 0.03593233050495981 0.004573569985160761 0.004037604752524734
|
| 34 |
+
REACTOME_PATHWAY R-MMU-76002~Platelet activation, signaling and aggregation 7 14.893617021276595 2.924308405792443E-4 APBB1IP, COL1A1, SPARC, GNGT2, COL1A2, TIMP1, GNG11 35 256 9277 7.24765625 0.06560299930047075 0.005678032154580326 0.005117539710136775
|
| 35 |
+
GOTERM_CC_DIRECT GO:0098978~glutamatergic synapse 8 17.02127659574468 3.5308599610066246E-4 ACTA2, SPARC, FLRT2, PTPRZ1, SH3KBP1, NCAM1, KIF21B, THY1 47 869 29722 5.82170751413951 0.04419655472052142 0.0050216675000983105 0.004433190839930539
|
| 36 |
+
GOTERM_MF_DIRECT GO:0005509~calcium ion binding 7 14.893617021276595 6.720928802693075E-4 VCAN, SPARC, S100A4, CD248, FBLN2, MYL9, FBN1 44 725 28924 6.346959247648902 0.08245789674491544 0.010974246337109035 0.010288355941039721
|
| 37 |
+
GOTERM_MF_DIRECT GO:0005539~glycosaminoglycan binding 3 6.382978723404255 6.858903960693147E-4 CRISPLD2, BGN, PTN 44 26 28924 75.84965034965035 0.08407802035171197 0.010974246337109035 0.010288355941039721
|
| 38 |
+
GOTERM_BP_DIRECT GO:0071711~basement membrane organization 3 6.382978723404255 6.876796015460026E-4 COL3A1, FLRT2, CAV1 47 25 29712 75.86042553191488 0.32111042871880047 0.0772951872137707 0.07619489985129708
|
| 39 |
+
REACTOME_PATHWAY R-MMU-109582~Hemostasis 9 19.148936170212767 0.001193372143584831 APBB1IP, COL1A1, SPARC, GNGT2, COL1A2, CAV1, TIMP1, KIF21B, GNG11 35 601 9277 3.969241739957214 0.24196647702718177 0.021388900727328124 0.01927755001175496
|
| 40 |
+
REACTOME_PATHWAY R-MMU-3000171~Non-integrin membrane-ECM interactions 4 8.51063829787234 0.0013345928767598207 ACTA2, COL1A1, COL3A1, COL1A2 35 60 9277 17.67047619047619 0.2664300662778777 0.022211438591788445 0.02001889315139731
|
| 41 |
+
GOTERM_BP_DIRECT GO:0030324~lung development 4 8.51063829787234 0.0013957293409172478 COL3A1, SPARC, CRISPLD2, FBN1 47 142 29712 17.80761162721007 0.5444931430558122 0.12253814172238674 0.12079382653772641
|
| 42 |
+
GOTERM_BP_DIRECT GO:0007507~heart development 5 10.638297872340425 0.0015262757865777707 COL3A1, VCAN, VCAM1, ADM, FBN1 47 321 29712 9.846888049313979 0.5768166300241827 0.12253814172238674 0.12079382653772641
|
| 43 |
+
GOTERM_CC_DIRECT GO:0005856~cytoskeleton 6 12.76595744680851 0.0015874202044843396 ACTA2, AKAP12, APBB1IP, CNN1, SH3KBP1, KIF21B 47 552 29722 6.873728029602219 0.18400841032032722 0.02008754888351565 0.017733539248728662
|
| 44 |
+
GOTERM_CC_DIRECT GO:0098685~Schaffer collateral - CA1 synapse 4 8.51063829787234 0.0017262737321771262 ACTA2, AKAP12, NCAM1, PTN 47 153 29722 16.53288833263802 0.19840675760949855 0.02008754888351565 0.017733539248728662
|
| 45 |
+
GOTERM_MF_DIRECT GO:0002020~protease binding 4 8.51063829787234 0.001805749921593141 COL1A1, COL3A1, COL1A2, TIMP1 44 162 28924 16.231200897867566 0.20653427005763692 0.025681776662658 0.02407666562124188
|
| 46 |
+
REACTOME_PATHWAY R-MMU-6806834~Signaling by MET 4 8.51063829787234 0.0019158848129537348 COL1A1, COL3A1, COL1A2, SH3KBP1 35 68 9277 15.591596638655462 0.3591190806267751 0.029760077427881348 0.026822387381352286
|
| 47 |
+
REACTOME_PATHWAY R-MMU-8874081~MET activates PTK2 signaling 3 6.382978723404255 0.0021441142870256523 COL1A1, COL3A1, COL1A2 35 19 9277 41.85112781954887 0.39223595505715503 0.03122366430481106 0.028141500017211687
|
| 48 |
+
GOTERM_BP_DIRECT GO:0010811~positive regulation of cell-substrate adhesion 3 6.382978723404255 0.0023239440007780917 COL8A1, PTN, FBLN2 47 46 29712 41.22849213691027 0.7301538058040932 0.16325706605466095 0.16093312205388285
|
| 49 |
+
REACTOME_PATHWAY R-MMU-419037~NCAM1 interactions 3 6.382978723404255 0.0026210681651430974 COL3A1, COL6A2, NCAM1 35 21 9277 37.865306122448985 0.456043571162699 0.03592405191049069 0.03237790086353238
|
| 50 |
+
GOTERM_CC_DIRECT GO:0005584~collagen type I trimer 2 4.25531914893617 0.003093006940036085 COL1A1, COL1A2 47 2 29722 632.3829787234042 0.3273421151461945 0.032813108433459165 0.028967822288913165
|
| 51 |
+
GOTERM_BP_DIRECT GO:0061870~positive regulation of hepatic stellate cell migration 2 4.25531914893617 0.003094047146850327 ACTA2, AKAP12 47 2 29712 632.1702127659574 0.8252936912696514 0.19320605516998707 0.19045579103945345
|
| 52 |
+
GOTERM_CC_DIRECT GO:0030426~growth cone 4 8.51063829787234 0.0033325813252731962 PTPRZ1, NCAM1, KIF21B, THY1 47 193 29722 13.106382978723403 0.3477209273273387 0.032813108433459165 0.028967822288913165
|
| 53 |
+
KEGG_PATHWAY mmu04510:Focal adhesion 5 10.638297872340425 0.003440887948325218 COL1A1, COL1A2, COL6A2, CAV1, MYL9 31 202 9565 7.637336314276589 0.21437641383300066 0.0528119397434033 0.047530745769062965
|
| 54 |
+
KEGG_PATHWAY mmu04933:AGE-RAGE signaling pathway in diabetic complications 4 8.51063829787234 0.0037722814102430927 COL1A1, COL3A1, VCAM1, COL1A2 31 101 9565 12.219738102842543 0.23245564414851905 0.0528119397434033 0.047530745769062965
|
| 55 |
+
GOTERM_BP_DIRECT GO:0030199~collagen fibril organization 3 6.382978723404255 0.004048871433762763 COL1A1, COL3A1, COL1A2 47 61 29712 31.090338332752005 0.8981385622095254 0.21354606501462295 0.2105062633774041
|
| 56 |
+
GOTERM_BP_DIRECT GO:0048144~fibroblast proliferation 3 6.382978723404255 0.004179727251175894 COL3A1, CAV1, CD248 47 62 29712 30.58888126286891 0.9054019132488886 0.21354606501462295 0.2105062633774041
|
| 57 |
+
GOTERM_BP_DIRECT GO:0009612~response to mechanical stimulus 3 6.382978723404255 0.0047225678923152655 COL1A1, COL3A1, CAV1 47 66 29712 28.735009671179878 0.9304078673769033 0.22117359629009828 0.2180252176952214
|
| 58 |
+
GOTERM_BP_DIRECT GO:0043588~skin development 3 6.382978723404255 0.0052961861338741084 COL1A1, COL3A1, BCL11B 47 70 29712 27.09300911854103 0.9496963048556181 0.22895820055671148 0.225699009089712
|
| 59 |
+
REACTOME_PATHWAY R-MMU-8875878~MET promotes cell motility 3 6.382978723404255 0.005318524831050393 COL1A1, COL3A1, COL1A2 35 30 9277 26.505714285714287 0.7098016718719486 0.0688453492019301 0.06204945636225458
|
| 60 |
+
KEGG_PATHWAY mmu04611:Platelet activation 4 8.51063829787234 0.006990280107083817 APBB1IP, COL1A1, COL3A1, COL1A2 31 126 9565 9.795186891961086 0.38800909648327053 0.08155326791597786 0.07339794112438008
|
| 61 |
+
GOTERM_BP_DIRECT GO:0001568~blood vessel development 3 6.382978723404255 0.00702920913564421 COL1A1, COL3A1, COL1A2 47 81 29712 23.413711583924346 0.9811533315568713 0.2821725381594319 0.278155847224778
|
| 62 |
+
REACTOME_PATHWAY R-MMU-76009~Platelet Aggregation (Plug Formation) 3 6.382978723404255 0.007597674902301182 APBB1IP, COL1A1, COL1A2 35 36 9277 22.08809523809524 0.8295632040329622 0.08851291261180877 0.07977558647416241
|
| 63 |
+
REACTOME_PATHWAY R-MMU-445355~Smooth Muscle Contraction 3 6.382978723404255 0.007597674902301182 ACTA2, MYH11, MYL9 35 36 9277 22.08809523809524 0.8295632040329622 0.08851291261180877 0.07977558647416241
|
| 64 |
+
GOTERM_CC_DIRECT GO:0030485~smooth muscle contractile fiber 2 4.25531914893617 0.007714977227092966 ACTA2, MYH11 47 5 29722 252.9531914893617 0.6289231621374838 0.07053693464770712 0.06227088761867894
|
| 65 |
+
GOTERM_CC_DIRECT GO:0030175~filopodium 3 6.382978723404255 0.008607069231545979 ACTA2, VCAM1, PTPRZ1 47 90 29722 21.079432624113473 0.6692767873049683 0.07344699077585902 0.06483992154431303
|
| 66 |
+
GOTERM_BP_DIRECT GO:0071333~cellular response to glucose stimulus 3 6.382978723404255 0.00861261412687383 COL1A1, VCAM1, SERPINF1 47 90 29712 21.072340425531912 0.9923258925516195 0.32268594262020617 0.31809254841920676
|
| 67 |
+
REACTOME_PATHWAY R-MMU-375165~NCAM signaling for neurite out-growth 3 6.382978723404255 0.009321043691936523 COL3A1, COL6A2, NCAM1 35 40 9277 19.879285714285714 0.8861215354469785 0.10218246897287005 0.09209578748627771
|
| 68 |
+
REACTOME_PATHWAY R-MMU-381426~Regulation of Insulin-like Growth Factor (IGF) transport and uptake by Insulin-like Growth Factor Binding Proteins (IGFBPs) 4 8.51063829787234 0.009648130117610046 VCAN, TIMP1, IGFBP6, FBN1 35 121 9277 8.762219598583235 0.8945200268145299 0.10218246897287005 0.09209578748627771
|
| 69 |
+
GOTERM_MF_DIRECT GO:0019901~protein kinase binding 5 10.638297872340425 0.00997328528044866 ACTA2, CAV1, TRIB3, PTN, THY1 44 569 28924 5.776481866112798 0.7227925154539387 0.12765805158974283 0.11967942336538391
|
| 70 |
+
KEGG_PATHWAY mmu04270:Vascular smooth muscle contraction 4 8.51063829787234 0.010077412705929597 ACTA2, MYH11, ADM, MYL9 31 144 9565 8.57078853046595 0.5078626079957046 0.10077412705929598 0.09069671435336638
|
| 71 |
+
REACTOME_PATHWAY R-MMU-9035034~RHOF GTPase cycle 3 6.382978723404255 0.010242106338136615 ACTA2, AKAP12, CAV1 35 42 9277 18.932653061224492 0.9082255082256379 0.10375699029503614 0.09351488395689952
|
| 72 |
+
GOTERM_MF_DIRECT GO:0042802~identical protein binding 9 19.148936170212767 0.011040722018064282 ACTA2, COL1A1, GREM2, COL1A2, GPX3, CAV1, S100A4, IGFBP6, FBN1 44 2094 28924 2.8253451419640534 0.7585449045018153 0.12847385621020255 0.12044424019706489
|
| 73 |
+
GOTERM_CC_DIRECT GO:0001725~stress fiber 3 6.382978723404255 0.011353928452809301 ACTA2, MYH11, MYL9 47 104 29722 18.24181669394435 0.7681387568481325 0.09083142762247441 0.08018711969796569
|
| 74 |
+
GOTERM_BP_DIRECT GO:0016525~negative regulation of angiogenesis 3 6.382978723404255 0.011361191063208454 SPARC, SERPINF1, PTN 47 104 29712 18.235679214402616 0.9983923201432293 0.39906183609519696 0.3933812405635927
|
| 75 |
+
GOTERM_MF_DIRECT GO:0005515~protein binding 16 34.04255319148936 0.01210659079383143 SPARC, BCL11B, SH3KBP1, CAV1, FBLN2, AKAP12, COL1A1, ACTA2, APBB1IP, COL1A2, FLRT2, PTPRZ1, TRIB3, S100A4, NCAM1, FBN1 44 5596 28924 1.8795243355643643 0.7896748214385103 0.12913696846753525 0.1210659079383143
|
| 76 |
+
GOTERM_BP_DIRECT GO:0007229~integrin-mediated signaling pathway 3 6.382978723404255 0.012642281717714013 COL3A1, PTN, THY1 47 110 29712 17.241005802707928 0.999225256705481 0.41793896031501626 0.41198965127138604
|
| 77 |
+
REACTOME_PATHWAY R-MMU-9856530~High laminar flow shear stress activates signaling by PIEZO1 and PECAM1:CDH5:KDR in endothelial cells 3 6.382978723404255 0.012713235397761534 GNGT2, ADM, GNG11 35 47 9277 16.91854103343465 0.9486134957461371 0.12342432698660157 0.11124080973041342
|
| 78 |
+
GOTERM_BP_DIRECT GO:0007165~signal transduction 6 12.76595744680851 0.013812657132885098 AKAP12, APBB1IP, PTPRZ1, TIMP1, CXCL14, FBN1 47 925 29712 4.10056354226567 0.9996026658524191 0.43126185048230137 0.4251228917565747
|
| 79 |
+
REACTOME_PATHWAY R-MMU-9851151~MDK and PTN in ALK signaling 2 4.25531914893617 0.014581860949365014 PTPRZ1, PTN 35 4 9277 132.52857142857144 0.9668893179133152 0.13590294404808193 0.12248763197466613
|
| 80 |
+
GOTERM_CC_DIRECT GO:0072534~perineuronal net 2 4.25531914893617 0.015371717738213922 VCAN, PTPRZ1 47 10 29722 126.47659574468085 0.8623249655426033 0.115739992381846 0.10217671202459844
|
| 81 |
+
GOTERM_BP_DIRECT GO:0071356~cellular response to tumor necrosis factor 3 6.382978723404255 0.015863396868583804 AKAP12, COL1A1, VCAM1 47 124 29712 15.294440631434455 0.9998769216432507 0.46922258106021564 0.46254325606291724
|
| 82 |
+
REACTOME_PATHWAY R-MMU-422475~Axon guidance 5 10.638297872340425 0.01658609753515625 ACTA2, COL3A1, SH3KBP1, COL6A2, NCAM1 35 271 9277 4.890353189246178 0.9793542001091287 0.1448953872170437 0.13059240907973896
|
| 83 |
+
REACTOME_PATHWAY R-MMU-9675108~Nervous system development 5 10.638297872340425 0.016790452595966437 ACTA2, COL3A1, SH3KBP1, COL6A2, NCAM1 35 272 9277 4.872373949579832 0.980326020912232 0.1448953872170437 0.13059240907973896
|
| 84 |
+
GOTERM_BP_DIRECT GO:0048251~elastic fiber assembly 2 4.25531914893617 0.01690175894231393 COL3A1, MYH11 47 11 29712 114.94003868471954 0.9999320694021577 0.47493942627902147 0.4681787227020959
|
| 85 |
+
GOTERM_MF_DIRECT GO:0030021~extracellular matrix structural constituent conferring compression resistance 2 4.25531914893617 0.017698045366969135 VCAN, BGN 44 12 28924 109.56060606060606 0.8982906091634677 0.17425767745938842 0.1633665726181766
|
| 86 |
+
GOTERM_CC_DIRECT GO:0043005~neuron projection 4 8.51063829787234 0.018217140041640493 FLRT2, BCL11B, SH3KBP1, NCAM1 47 360 29722 7.0264775413711575 0.9049445388655092 0.12954410696277682 0.11436315692807643
|
| 87 |
+
REACTOME_PATHWAY R-MMU-373080~Class B/2 (Secretin family receptors) 3 6.382978723404255 0.01895919255440626 GNGT2, ADM, GNG11 35 58 9277 13.709852216748768 0.9882130655965335 0.1577675666134521 0.14219394415804695
|
| 88 |
+
REACTOME_PATHWAY R-MMU-198933~Immunoregulatory interactions between a Lymphoid and a non-Lymphoid cell 4 8.51063829787234 0.021050515502072722 COL1A1, COL3A1, VCAM1, COL1A2 35 162 9277 6.544620811287478 0.9928156839319909 0.16913000386148086 0.15243476742880246
|
| 89 |
+
GOTERM_BP_DIRECT GO:0043589~skin morphogenesis 2 4.25531914893617 0.021462618908622375 COL1A1, COL1A2 47 14 29712 90.31003039513678 0.9999950444951733 0.5401315663764478 0.5324428608052527
|
| 90 |
+
GOTERM_CC_DIRECT GO:0042383~sarcolemma 3 6.382978723404255 0.021538058955703262 VCAM1, COL6A2, BGN 47 146 29722 12.994170795686388 0.9383938562638995 0.14509850243842196 0.12809477168391942
|
| 91 |
+
GOTERM_BP_DIRECT GO:0071230~cellular response to amino acid stimulus 3 6.382978723404255 0.021551542418191412 COL1A1, COL3A1, COL1A2 47 146 29712 12.98979889245118 0.9999952916632523 0.5401315663764478 0.5324428608052527
|
| 92 |
+
GOTERM_BP_DIRECT GO:0050804~modulation of chemical synaptic transmission 3 6.382978723404255 0.022105028517185585 AKAP12, NCAM1, PTN 47 148 29712 12.814261069580217 0.999996576141018 0.5401315663764478 0.5324428608052527
|
| 93 |
+
GOTERM_BP_DIRECT GO:0008285~negative regulation of cell population proliferation 4 8.51063829787234 0.02352574980497766 BCL11B, PTPRZ1, CAV1, ADM 47 397 29712 6.369473176483198 0.9999984898232426 0.5508946412665602 0.543052724664901
|
| 94 |
+
KEGG_PATHWAY mmu04814:Motor proteins 4 8.51063829787234 0.023528517164250855 ACTA2, MYH11, KIF21B, MYL9 31 198 9565 6.233300749429781 0.8111271567898347 0.20587452518719498 0.1852870726684755
|
| 95 |
+
GOTERM_BP_DIRECT GO:0043116~negative regulation of vascular permeability 2 4.25531914893617 0.02600277769491187 AKAP12, ADM 47 17 29712 74.3729662077597 0.9999996385939455 0.5845424425816188 0.576221553719247
|
| 96 |
+
KEGG_PATHWAY mmu04151:PI3K-Akt signaling pathway 5 10.638297872340425 0.026566971633494064 COL1A1, GNGT2, COL1A2, COL6A2, GNG11 31 367 9565 4.203656499956052 0.8481462794097964 0.20663200159384273 0.18596880143445846
|
| 97 |
+
REACTOME_PATHWAY R-MMU-9860931~Response of endothelial cells to shear stress 3 6.382978723404255 0.026953906695871145 GNGT2, ADM, GNG11 35 70 9277 11.359591836734694 0.9982341577907143 0.20148907322762719 0.1815995938961447
|
| 98 |
+
REACTOME_PATHWAY R-MMU-9855142~Cellular responses to mechanical stimuli 3 6.382978723404255 0.026953906695871145 GNGT2, ADM, GNG11 35 70 9277 11.359591836734694 0.9982341577907143 0.20148907322762719 0.1815995938961447
|
| 99 |
+
REACTOME_PATHWAY R-MMU-9009391~Extra-nuclear estrogen signaling 3 6.382978723404255 0.027672319069888714 GNGT2, CAV1, GNG11 35 71 9277 11.199597585513079 0.9985122336239464 0.20148907322762719 0.1815995938961447
|
| 100 |
+
GOTERM_BP_DIRECT GO:0010812~negative regulation of cell-substrate adhesion 2 4.25531914893617 0.029018094958165423 COL1A1, PTPRZ1 47 19 29712 66.54423292273236 0.9999999369231642 0.6272372833264988 0.6183086387239863
|
| 101 |
+
KEGG_PATHWAY mmu04512:ECM-receptor interaction 3 6.382978723404255 0.03145983615597811 COL1A1, COL1A2, COL6A2 31 89 9565 10.400507430228345 0.8932827941129542 0.22021885309184677 0.19819696778266208
|
| 102 |
+
GOTERM_BP_DIRECT GO:0010976~positive regulation of neuron projection development 3 6.382978723404255 0.03336511698960931 PTPRZ1, SERPINF1, PTN 47 185 29712 10.251408855664174 0.9999999949560944 0.6560667894799646 0.6467277604482212
|
| 103 |
+
GOTERM_CC_DIRECT GO:0043209~myelin sheath 3 6.382978723404255 0.03500646466076349 ACTA2, NCAM1, THY1 47 190 29722 9.984994400895856 0.9895498216097234 0.22404137382888634 0.1977865253333137
|
| 104 |
+
GOTERM_BP_DIRECT GO:1900006~positive regulation of dendrite development 2 4.25531914893617 0.035021358869037254 PTPRZ1, PTN 47 23 29712 54.97132284921369 0.9999999980792735 0.6560667894799646 0.6467277604482212
|
| 105 |
+
GOTERM_BP_DIRECT GO:0007413~axonal fasciculation 2 4.25531914893617 0.035021358869037254 PTPRZ1, NCAM1 47 23 29712 54.97132284921369 0.9999999980792735 0.6560667894799646 0.6467277604482212
|
| 106 |
+
GOTERM_BP_DIRECT GO:0002026~regulation of the force of heart contraction 2 4.25531914893617 0.035021358869037254 CAV1, ADM 47 23 29712 54.97132284921369 0.9999999980792735 0.6560667894799646 0.6467277604482212
|
| 107 |
+
GOTERM_MF_DIRECT GO:0050998~nitric-oxide synthase binding 2 4.25531914893617 0.035090027971754874 ACTA2, CAV1 44 24 28924 54.78030303030303 0.9896650180202876 0.3065780981312424 0.28741696699803976
|
| 108 |
+
GOTERM_BP_DIRECT GO:0032496~response to lipopolysaccharide 3 6.382978723404255 0.03638100432685884 AKAP12, VCAM1, ADM 47 194 29712 9.775828032463258 0.9999999991316126 0.6595524010224086 0.6501637547445096
|
| 109 |
+
GOTERM_MF_DIRECT GO:0031681~G-protein beta-subunit binding 2 4.25531914893617 0.036525707166659205 GNGT2, GNG11 44 25 28924 52.589090909090906 0.9914584473706006 0.3065780981312424 0.28741696699803976
|
| 110 |
+
GOTERM_CC_DIRECT GO:0016460~myosin II complex 2 4.25531914893617 0.03799680254416647 MYH11, MYL9 47 25 29722 50.59063829787234 0.9929758466043639 0.23159955836444324 0.20445898511861008
|
| 111 |
+
GOTERM_MF_DIRECT GO:0005516~calmodulin binding 3 6.382978723404255 0.0383222622664053 AKAP12, CNN1, MYH11 44 208 28924 9.481206293706293 0.9932735797026461 0.3065780981312424 0.28741696699803976
|
| 112 |
+
REACTOME_PATHWAY R-MMU-2022923~Dermatan sulfate biosynthesis 2 4.25531914893617 0.03960501750302037 VCAN, BGN 35 11 9277 48.19220779220779 0.9999152045147619 0.2796354266122347 0.2520319295646751
|
| 113 |
+
GOTERM_CC_DIRECT GO:0030027~lamellipodium 3 6.382978723404255 0.04089080075110019 ACTA2, APBB1IP, PTPRZ1 47 207 29722 9.164970706136293 0.9952234840173367 0.23791011346094654 0.2100300220397419
|
| 114 |
+
REACTOME_PATHWAY R-MMU-75892~Platelet Adhesion to exposed collagen 2 4.25531914893617 0.04312902240333435 COL1A1, COL1A2 35 12 9277 44.17619047619047 0.999963860285541 0.2871160634279115 0.2587741344200061
|
| 115 |
+
REACTOME_PATHWAY R-MMU-430116~GP1b-IX-V activation signalling 2 4.25531914893617 0.04312902240333435 COL1A1, COL1A2 35 12 9277 44.17619047619047 0.999963860285541 0.2871160634279115 0.2587741344200061
|
| 116 |
+
GOTERM_BP_DIRECT GO:0001666~response to hypoxia 3 6.382978723404255 0.04454169753211946 VCAM1, CAV1, ADM 47 217 29712 8.739680360819687 0.9999999999927688 0.7420384315816297 0.7314756069327809
|
| 117 |
+
KEGG_PATHWAY mmu05146:Amoebiasis 3 6.382978723404255 0.04475473166569457 COL1A1, COL3A1, COL1A2 31 108 9565 8.57078853046595 0.9594451629558901 0.2848028378726018 0.2563225540853416
|
| 118 |
+
GOTERM_CC_DIRECT GO:0016514~SWI/SNF complex 2 4.25531914893617 0.04542486433193445 ACTA2, BCL11B 47 30 29722 42.15886524822695 0.9973956755508281 0.25279924497772216 0.2231743334568953
|
| 119 |
+
GOTERM_BP_DIRECT GO:0034113~heterotypic cell-cell adhesion 2 4.25531914893617 0.045439818870130586 VCAM1, THY1 47 30 29712 42.144680851063825 0.9999999999957414 0.7420384315816297 0.7314756069327809
|
| 120 |
+
GOTERM_BP_DIRECT GO:0031641~regulation of myelination 2 4.25531914893617 0.045439818870130586 PTPRZ1, PTN 47 30 29712 42.144680851063825 0.9999999999957414 0.7420384315816297 0.7314756069327809
|
| 121 |
+
REACTOME_PATHWAY R-MMU-4420097~VEGFA-VEGFR2 Pathway 3 6.382978723404255 0.047986780957221205 ACTA2, CAV1, TRIB3 35 96 9277 8.283035714285715 0.9999889038818522 0.3105811100842372 0.2799228889171237
|
| 122 |
+
GOTERM_BP_DIRECT GO:0031032~actomyosin structure organization 2 4.25531914893617 0.04839625590584158 CNN1, MYH11 47 32 29712 39.51063829787234 0.9999999999992573 0.7420384315816297 0.7314756069327809
|
| 123 |
+
GOTERM_CC_DIRECT GO:0005911~cell-cell junction 3 6.382978723404255 0.04978098308079648 FLRT2, SH3KBP1, NCAM1 47 231 29722 8.212765957446807 0.9985497733648512 0.26336646882698656 0.23250321076132405
|
| 124 |
+
GOTERM_BP_DIRECT GO:0035987~endodermal cell differentiation 2 4.25531914893617 0.04987111345690221 COL12A1, COL8A1 47 33 29712 38.313346228239844 0.9999999999996898 0.7420384315816297 0.7314756069327809
|
| 125 |
+
REACTOME_PATHWAY R-MMU-2024101~CS/DS degradation 2 4.25531914893617 0.050139420245921884 VCAN, BGN 35 14 9277 37.86530612244898 0.9999934372096564 0.3157428356026973 0.2845750878822593
|
| 126 |
+
GOTERM_BP_DIRECT GO:0001935~endothelial cell proliferation 2 4.25531914893617 0.05134373479969049 CAV1, COL8A1 47 34 29712 37.18648310387985 0.9999999999998704 0.7420384315816297 0.7314756069327809
|
| 127 |
+
GOTERM_CC_DIRECT GO:0005834~heterotrimeric G-protein complex 2 4.25531914893617 0.052796808120785466 GNGT2, GNG11 47 35 29722 36.136170212765954 0.9990345638277794 0.26336646882698656 0.23250321076132405
|
| 128 |
+
GOTERM_BP_DIRECT GO:1904706~negative regulation of vascular associated smooth muscle cell proliferation 2 4.25531914893617 0.05281412324424411 CNN1, CAV1 47 35 29712 36.12401215805471 0.9999999999999459 0.7420384315816297 0.7314756069327809
|
| 129 |
+
GOTERM_BP_DIRECT GO:0001937~negative regulation of endothelial cell proliferation 2 4.25531914893617 0.05281412324424411 SPARC, CAV1 47 35 29712 36.12401215805471 0.9999999999999459 0.7420384315816297 0.7314756069327809
|
| 130 |
+
GOTERM_BP_DIRECT GO:0048714~positive regulation of oligodendrocyte differentiation 2 4.25531914893617 0.05281412324424411 PTPRZ1, PTN 47 35 29712 36.12401215805471 0.9999999999999459 0.7420384315816297 0.7314756069327809
|
| 131 |
+
KEGG_PATHWAY mmu04670:Leukocyte transendothelial migration 3 6.382978723404255 0.053248636904474234 VCAM1, THY1, MYL9 31 119 9565 7.7785307671455675 0.978297201653411 0.310617048609433 0.2795553437484897
|
| 132 |
+
REACTOME_PATHWAY R-MMU-1280218~Adaptive Immune System 7 14.893617021276595 0.05332719632891561 COL1A1, COL3A1, VCAM1, COL1A2, SH3KBP1, TRIB3, KIF21B 35 753 9277 2.464010624169987 0.9999969913132601 0.3269799143325615 0.29470292708084944
|
| 133 |
+
GOTERM_CC_DIRECT GO:0009897~external side of plasma membrane 4 8.51063829787234 0.053496313980481644 VCAM1, CD248, NCAM1, THY1 47 551 29722 4.590802023400394 0.9991216742227382 0.26336646882698656 0.23250321076132405
|
| 134 |
+
GOTERM_MF_DIRECT GO:0030246~carbohydrate binding 3 6.382978723404255 0.05405303258732842 VCAN, CD248, PTN 44 252 28924 7.825757575757574 0.9991853913375791 0.40698753948106103 0.38155081826349474
|
| 135 |
+
REACTOME_PATHWAY R-MMU-194138~Signaling by VEGF 3 6.382978723404255 0.05535634863677548 ACTA2, CAV1, TRIB3 35 104 9277 7.645879120879121 0.9999981711488538 0.33071869826586375 0.29807264650571413
|
| 136 |
+
GOTERM_BP_DIRECT GO:0043113~receptor clustering 2 4.25531914893617 0.05867341417023918 PTN, THY1 47 39 29712 32.4189852700491 0.9999999999999983 0.804255091796937 0.7928066207393294
|
| 137 |
+
GOTERM_BP_DIRECT GO:0060325~face morphogenesis 2 4.25531914893617 0.06304459867195403 COL1A1, CRISPLD2 47 42 29712 30.103343465045594 0.9999999999999999 0.8435967727056706 0.8315882777205365
|
| 138 |
+
REACTOME_PATHWAY R-MMU-8964315~G beta:gamma signalling through BTK 2 4.25531914893617 0.06401097702134163 GNGT2, GNG11 35 18 9277 29.45079365079365 0.9999997838201198 0.3513482431393882 0.31666579853764604
|
| 139 |
+
REACTOME_PATHWAY R-MMU-2243919~Crosslinking of collagen fibrils 2 4.25531914893617 0.06401097702134163 COL1A1, COL1A2 35 18 9277 29.45079365079365 0.9999997838201198 0.3513482431393882 0.31666579853764604
|
| 140 |
+
GOTERM_BP_DIRECT GO:0030335~positive regulation of cell migration 3 6.382978723404255 0.06473817961805624 COL1A1, PTPRZ1, CAV1 47 268 29712 7.076532232454746 1.0 0.8456168204255533 0.833579570312734
|
| 141 |
+
REACTOME_PATHWAY R-MMU-8957275~Post-translational protein phosphorylation 3 6.382978723404255 0.0660933225286726 VCAN, TIMP1, FBN1 35 115 9277 6.914534161490684 0.9999998710541089 0.3513482431393882 0.31666579853764604
|
| 142 |
+
GOTERM_BP_DIRECT GO:0008284~positive regulation of cell population proliferation 4 8.51063829787234 0.06620487562050595 ACTA2, ADM, TIMP1, PTN 47 602 29712 4.200466530006361 1.0 0.8456168204255533 0.833579570312734
|
| 143 |
+
REACTOME_PATHWAY R-MMU-392851~Prostacyclin signalling through prostacyclin receptor 2 4.25531914893617 0.06744802495190409 GNGT2, GNG11 35 19 9277 27.900751879699246 0.9999999079239411 0.3513482431393882 0.31666579853764604
|
| 144 |
+
REACTOME_PATHWAY R-MMU-201556~Signaling by ALK 2 4.25531914893617 0.06744802495190409 PTPRZ1, PTN 35 19 9277 27.900751879699246 0.9999999079239411 0.3513482431393882 0.31666579853764604
|
| 145 |
+
KEGG_PATHWAY mmu04371:Apelin signaling pathway 3 6.382978723404255 0.07001366206124844 ACTA2, GNGT2, GNG11 31 139 9565 6.6593177071246235 0.9937861638310146 0.3769966418682609 0.33929697768143474
|
| 146 |
+
GOTERM_CC_DIRECT GO:0045211~postsynaptic membrane 3 6.382978723404255 0.07069360841698624 FLRT2, PTPRZ1, NCAM1 47 282 29722 6.727478497057492 0.999915987812827 0.3351400695323792 0.2958658426340535
|
| 147 |
+
REACTOME_PATHWAY R-MMU-8964616~G beta:gamma signalling through CDC42 2 4.25531914893617 0.07087282157747317 GNGT2, GNG11 35 20 9277 26.505714285714284 0.9999999607862784 0.3513482431393882 0.31666579853764604
|
| 148 |
+
REACTOME_PATHWAY R-MMU-418217~G beta:gamma signalling through PLC beta 2 4.25531914893617 0.07087282157747317 GNGT2, GNG11 35 20 9277 26.505714285714284 0.9999999607862784 0.3513482431393882 0.31666579853764604
|
| 149 |
+
REACTOME_PATHWAY R-MMU-2022870~Chondroitin sulfate biosynthesis 2 4.25531914893617 0.07087282157747317 VCAN, BGN 35 20 9277 26.505714285714284 0.9999999607862784 0.3513482431393882 0.31666579853764604
|
| 150 |
+
GOTERM_BP_DIRECT GO:0001525~angiogenesis 3 6.382978723404255 0.07335901502654828 CAV1, COL8A1, THY1 47 288 29712 6.585106382978723 1.0 0.9161725876648918 0.9031309849935055
|
| 151 |
+
REACTOME_PATHWAY R-MMU-500657~Presynaptic function of Kainate receptors 2 4.25531914893617 0.07428540924757544 GNGT2, GNG11 35 21 9277 25.243537414965985 0.9999999833010456 0.35333368400170256 0.31845525167535427
|
| 152 |
+
REACTOME_PATHWAY R-MMU-9006936~Signaling by TGFB family members 3 6.382978723404255 0.074306225390916 GREM2, TIMP1, FBN1 35 123 9277 6.464808362369338 0.9999999833879362 0.35333368400170256 0.31845525167535427
|
| 153 |
+
REACTOME_PATHWAY R-MMU-392170~ADP signalling through P2Y purinoceptor 12 2 4.25531914893617 0.07768583017184705 GNGT2, GNG11 35 22 9277 24.096103896103894 0.9999999928894963 0.36201596860080726 0.32628048672175763
|
| 154 |
+
GOTERM_CC_DIRECT GO:0030016~myofibril 2 4.25531914893617 0.08030567201598654 MYH11, MYL9 47 54 29722 23.42159180457053 0.9999777991790781 0.3671116435016527 0.3240907477788028
|
| 155 |
+
REACTOME_PATHWAY R-MMU-5627123~RHO GTPases activate PAKs 2 4.25531914893617 0.0810741264198134 MYH11, MYL9 35 23 9277 23.048447204968944 0.9999999969725895 0.3632744510733947 0.3274147413107849
|
| 156 |
+
REACTOME_PATHWAY R-MMU-400042~Adrenaline,noradrenaline inhibits insulin secretion 2 4.25531914893617 0.0810741264198134 GNGT2, GNG11 35 23 9277 23.048447204968944 0.9999999969725895 0.3632744510733947 0.3274147413107849
|
| 157 |
+
GOTERM_MF_DIRECT GO:0046332~SMAD binding 2 4.25531914893617 0.08138174072253183 COL3A1, COL1A2 44 57 28924 23.065390749601278 0.9999808887356442 0.5787146006935596 0.5425449381502122
|
| 158 |
+
GOTERM_BP_DIRECT GO:0031623~receptor internalization 2 4.25531914893617 0.08318230175034744 CAV1, ADM 47 56 29712 22.577507598784194 1.0 1.0 0.9875222816399287
|
| 159 |
+
GOTERM_CC_DIRECT GO:0043025~neuronal cell body 4 8.51063829787234 0.08437953214468305 AKAP12, PTPRZ1, SERPINF1, NCAM1 47 668 29722 3.7867244234934385 0.999987422827345 0.37243379705239416 0.3287892114603167
|
| 160 |
+
REACTOME_PATHWAY R-MMU-428930~Thromboxane signalling through TP receptor 2 4.25531914893617 0.08445033991921927 GNGT2, GNG11 35 24 9277 22.088095238095235 0.9999999987111508 0.36438757779959424 0.3284179885747416
|
| 161 |
+
REACTOME_PATHWAY R-MMU-202040~G-protein activation 2 4.25531914893617 0.08445033991921927 GNGT2, GNG11 35 24 9277 22.088095238095235 0.9999999987111508 0.36438757779959424 0.3284179885747416
|
| 162 |
+
GOTERM_BP_DIRECT GO:0016477~cell migration 3 6.382978723404255 0.08462237292548197 SH3KBP1, CD248, IGFBP6 47 313 29712 6.05913941948202 1.0 1.0 0.9875222816399287
|
| 163 |
+
REACTOME_PATHWAY R-MMU-418592~ADP signalling through P2Y purinoceptor 1 2 4.25531914893617 0.08781451245013751 GNGT2, GNG11 35 25 9277 21.20457142857143 0.9999999994513533 0.36537109644432214 0.32930442168801566
|
| 164 |
+
REACTOME_PATHWAY R-MMU-392451~G beta:gamma signalling through PI3Kgamma 2 4.25531914893617 0.08781451245013751 GNGT2, GNG11 35 25 9277 21.20457142857143 0.9999999994513533 0.36537109644432214 0.32930442168801566
|
| 165 |
+
GOTERM_BP_DIRECT GO:0030514~negative regulation of BMP signaling pathway 2 4.25531914893617 0.09027122324048699 GREM2, CAV1 47 61 29712 20.72689222183467 1.0 1.0 0.9875222816399287
|
| 166 |
+
REACTOME_PATHWAY R-MMU-1971475~A tetrasaccharide linker sequence is required for GAG synthesis 2 4.25531914893617 0.09116668566564533 VCAN, BGN 35 26 9277 20.389010989010988 0.9999999997664697 0.36816123724365857 0.33181914086338327
|
| 167 |
+
REACTOME_PATHWAY R-MMU-418555~G alpha (s) signalling events 3 6.382978723404255 0.09164528652417252 GNGT2, ADM, GNG11 35 139 9277 5.720657759506681 0.9999999997933335 0.36816123724365857 0.33181914086338327
|
| 168 |
+
GOTERM_CC_DIRECT GO:0030133~transport vesicle 2 4.25531914893617 0.09165311839877811 CRISPLD2, BGN 47 62 29722 20.399450926561425 0.999995468714926 0.3910533051681199 0.3452267459687309
|
| 169 |
+
GOTERM_BP_DIRECT GO:0006469~negative regulation of protein kinase activity 2 4.25531914893617 0.09168255924097848 TRIB3, THY1 47 62 29712 20.392587508579275 1.0 1.0 0.9875222816399287
|
| 170 |
+
REACTOME_PATHWAY R-MMU-9634597~GPER1 signaling 2 4.25531914893617 0.09450690106454322 GNGT2, GNG11 35 27 9277 19.633862433862433 0.9999999999006075 0.3698855141266195 0.333373210157039
|
| 171 |
+
GOTERM_BP_DIRECT GO:0045597~positive regulation of cell differentiation 2 4.25531914893617 0.09870717283967474 ACTA2, PTN 47 67 29712 18.870752619879326 1.0 1.0 0.9875222816399287
|
analysis/final_results.csv
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model,seed,fold,combination,train_auc,val_auc,precision,recall,f1,accuracy,specificity,n_samples,tn,fp,fn,tp,model_path,selection_criteria,run_id,timestamp
|
| 2 |
+
ATAC,0,1,all_samples,0.9283870967741935,0.831578947368421,0.8947368421052632,0.8947368421052632,0.8947368421052632,0.8490566037735849,0.7333333333333333,53,11,4,4,34,ckp/ATAC_seed0/best_ATAC_seed0_fold1_AUC_0.832.pth,all_samples,ATAC_seed0_fold1_all_samples,2025-10-13 01:52:06
|
| 3 |
+
ATAC,0,1,common_samples,0.9283870967741935,0.8888888888888888,0.92,0.9583333333333334,0.9387755102040817,0.9090909090909091,0.7777777777777778,33,7,2,1,23,ckp/ATAC_seed0/best_ATAC_seed0_fold1_AUC_0.832.pth,all_samples,ATAC_seed0_fold1_common_samples,2025-10-13 01:52:06
|
| 4 |
+
ATAC,0,2,all_samples,0.8974193548387097,0.9666666666666667,0.9743589743589743,1.0,0.9870129870129869,0.9811320754716981,0.9333333333333333,53,14,1,0,38,ckp/ATAC_seed0/best_ATAC_seed0_fold2_AUC_0.967.pth,all_samples,ATAC_seed0_fold2_all_samples,2025-10-13 01:52:06
|
| 5 |
+
ATAC,0,2,common_samples,0.8974193548387097,0.949074074074074,0.96,1.0,0.9795918367346939,0.9696969696969697,0.8888888888888888,33,8,1,0,24,ckp/ATAC_seed0/best_ATAC_seed0_fold2_AUC_0.967.pth,all_samples,ATAC_seed0_fold2_common_samples,2025-10-13 01:52:06
|
| 6 |
+
ATAC,0,3,all_samples,0.9127130604711757,0.7905405405405406,0.7954545454545454,0.9459459459459459,0.8641975308641975,0.7924528301886793,0.4375,53,7,9,2,35,ckp/ATAC_seed0/best_ATAC_seed0_fold3_AUC_0.791.pth,all_samples,ATAC_seed0_fold3_all_samples,2025-10-13 01:52:06
|
| 7 |
+
ATAC,0,3,common_samples,0.9127130604711757,0.7922705314009663,0.8148148148148148,0.9565217391304348,0.8800000000000001,0.8125,0.4444444444444444,32,4,5,1,22,ckp/ATAC_seed0/best_ATAC_seed0_fold3_AUC_0.791.pth,all_samples,ATAC_seed0_fold3_common_samples,2025-10-13 01:52:06
|
| 8 |
+
ATAC,0,4,all_samples,0.8872000868526762,0.8817567567567568,0.9,0.972972972972973,0.935064935064935,0.9056603773584906,0.75,53,12,4,1,36,ckp/ATAC_seed0/best_ATAC_seed0_fold4_AUC_0.882.pth,all_samples,ATAC_seed0_fold4_all_samples,2025-10-13 01:52:06
|
| 9 |
+
ATAC,0,4,common_samples,0.8872000868526762,0.8194444444444444,0.8518518518518519,0.9583333333333334,0.9019607843137256,0.8484848484848485,0.5555555555555556,33,5,4,1,23,ckp/ATAC_seed0/best_ATAC_seed0_fold4_AUC_0.882.pth,all_samples,ATAC_seed0_fold4_common_samples,2025-10-13 01:52:06
|
| 10 |
+
ATAC,0,5,all_samples,0.896236559139785,0.9280701754385965,0.7169811320754716,1.0,0.8351648351648352,0.7169811320754716,0.0,53,0,15,0,38,ckp/ATAC_seed0/best_ATAC_seed0_fold5_AUC_0.928.pth,all_samples,ATAC_seed0_fold5_all_samples,2025-10-13 01:52:06
|
| 11 |
+
ATAC,0,5,common_samples,0.896236559139785,0.9166666666666667,0.75,1.0,0.8571428571428571,0.75,0.0,32,0,8,0,24,ckp/ATAC_seed0/best_ATAC_seed0_fold5_AUC_0.928.pth,all_samples,ATAC_seed0_fold5_common_samples,2025-10-13 01:52:06
|
| 12 |
+
ATAC,6,1,all_samples,0.890752688172043,0.9140350877192983,0.7169811320754716,1.0,0.8351648351648352,0.7169811320754716,0.0,53,0,15,0,38,ckp/ATAC_seed6/best_ATAC_seed6_fold1_AUC_0.914.pth,all_samples,ATAC_seed6_fold1_all_samples,2025-10-13 01:52:06
|
| 13 |
+
ATAC,6,1,common_samples,0.890752688172043,0.8842592592592593,0.7272727272727273,1.0,0.8421052631578948,0.7272727272727273,0.0,33,0,9,0,24,ckp/ATAC_seed6/best_ATAC_seed6_fold1_AUC_0.914.pth,all_samples,ATAC_seed6_fold1_common_samples,2025-10-13 01:52:06
|
| 14 |
+
ATAC,6,2,all_samples,0.8852688172043011,0.9456140350877194,0.8444444444444444,1.0,0.9156626506024096,0.8679245283018868,0.5333333333333333,53,8,7,0,38,ckp/ATAC_seed6/best_ATAC_seed6_fold2_AUC_0.946.pth,all_samples,ATAC_seed6_fold2_all_samples,2025-10-13 01:52:06
|
| 15 |
+
ATAC,6,2,common_samples,0.8852688172043011,0.9166666666666667,0.8571428571428571,1.0,0.923076923076923,0.8787878787878788,0.5555555555555556,33,5,4,0,24,ckp/ATAC_seed6/best_ATAC_seed6_fold2_AUC_0.946.pth,all_samples,ATAC_seed6_fold2_common_samples,2025-10-13 01:52:06
|
| 16 |
+
ATAC,6,3,all_samples,0.9215068939311692,0.8006756756756757,0.8333333333333334,0.9459459459459459,0.8860759493670887,0.8301886792452831,0.5625,53,9,7,2,35,ckp/ATAC_seed6/best_ATAC_seed6_fold3_AUC_0.801.pth,all_samples,ATAC_seed6_fold3_all_samples,2025-10-13 01:52:06
|
| 17 |
+
ATAC,6,3,common_samples,0.9215068939311692,0.7198067632850242,0.8148148148148148,0.9565217391304348,0.8800000000000001,0.8125,0.4444444444444444,32,4,5,1,22,ckp/ATAC_seed6/best_ATAC_seed6_fold3_AUC_0.801.pth,all_samples,ATAC_seed6_fold3_common_samples,2025-10-13 01:52:06
|
| 18 |
+
ATAC,6,4,all_samples,0.9110845727933992,0.9070945945945946,0.875,0.9459459459459459,0.9090909090909091,0.8679245283018868,0.6875,53,11,5,2,35,ckp/ATAC_seed6/best_ATAC_seed6_fold4_AUC_0.907.pth,all_samples,ATAC_seed6_fold4_all_samples,2025-10-13 01:52:06
|
| 19 |
+
ATAC,6,4,common_samples,0.9110845727933992,0.925925925925926,0.88,0.9166666666666666,0.8979591836734694,0.8484848484848485,0.6666666666666666,33,6,3,2,22,ckp/ATAC_seed6/best_ATAC_seed6_fold4_AUC_0.907.pth,all_samples,ATAC_seed6_fold4_common_samples,2025-10-13 01:52:06
|
| 20 |
+
ATAC,6,5,all_samples,0.9196774193548387,0.8192982456140351,0.9210526315789473,0.9210526315789473,0.9210526315789473,0.8867924528301887,0.8,53,12,3,3,35,ckp/ATAC_seed6/best_ATAC_seed6_fold5_AUC_0.819.pth,all_samples,ATAC_seed6_fold5_all_samples,2025-10-13 01:52:06
|
| 21 |
+
ATAC,6,5,common_samples,0.9196774193548387,0.796875,0.92,0.9583333333333334,0.9387755102040817,0.90625,0.75,32,6,2,1,23,ckp/ATAC_seed6/best_ATAC_seed6_fold5_AUC_0.819.pth,all_samples,ATAC_seed6_fold5_common_samples,2025-10-13 01:52:06
|
| 22 |
+
ATAC,42,1,all_samples,0.9063440860215053,0.8157894736842106,0.7169811320754716,1.0,0.8351648351648352,0.7169811320754716,0.0,53,0,15,0,38,ckp/ATAC_seed42/best_ATAC_seed42_fold1_AUC_0.816.pth,all_samples,ATAC_seed42_fold1_all_samples,2025-10-13 01:52:06
|
| 23 |
+
ATAC,42,1,common_samples,0.9063440860215053,0.8055555555555556,0.7272727272727273,1.0,0.8421052631578948,0.7272727272727273,0.0,33,0,9,0,24,ckp/ATAC_seed42/best_ATAC_seed42_fold1_AUC_0.816.pth,all_samples,ATAC_seed42_fold1_common_samples,2025-10-13 01:52:06
|
| 24 |
+
ATAC,42,2,all_samples,0.92,0.8789473684210527,0.9230769230769231,0.9473684210526315,0.935064935064935,0.9056603773584906,0.8,53,12,3,2,36,ckp/ATAC_seed42/best_ATAC_seed42_fold2_AUC_0.879.pth,all_samples,ATAC_seed42_fold2_all_samples,2025-10-13 01:52:06
|
| 25 |
+
ATAC,42,2,common_samples,0.92,0.875,0.9583333333333334,0.9583333333333334,0.9583333333333334,0.9393939393939394,0.8888888888888888,33,8,1,1,23,ckp/ATAC_seed42/best_ATAC_seed42_fold2_AUC_0.879.pth,all_samples,ATAC_seed42_fold2_common_samples,2025-10-13 01:52:06
|
| 26 |
+
ATAC,42,3,all_samples,0.906741938985995,0.8902027027027027,0.8409090909090909,1.0,0.9135802469135803,0.8679245283018868,0.5625,53,9,7,0,37,ckp/ATAC_seed42/best_ATAC_seed42_fold3_AUC_0.890.pth,all_samples,ATAC_seed42_fold3_all_samples,2025-10-13 01:52:06
|
| 27 |
+
ATAC,42,3,common_samples,0.906741938985995,0.8985507246376812,0.7931034482758621,1.0,0.8846153846153846,0.8125,0.3333333333333333,32,3,6,0,23,ckp/ATAC_seed42/best_ATAC_seed42_fold3_AUC_0.890.pth,all_samples,ATAC_seed42_fold3_common_samples,2025-10-13 01:52:06
|
| 28 |
+
ATAC,42,4,all_samples,0.9086961241993269,0.8226351351351351,0.8536585365853658,0.9459459459459459,0.8974358974358975,0.8490566037735849,0.625,53,10,6,2,35,ckp/ATAC_seed42/best_ATAC_seed42_fold4_AUC_0.823.pth,all_samples,ATAC_seed42_fold4_all_samples,2025-10-13 01:52:06
|
| 29 |
+
ATAC,42,4,common_samples,0.9086961241993269,0.7592592592592593,0.8846153846153846,0.9583333333333334,0.9199999999999999,0.8787878787878788,0.6666666666666666,33,6,3,1,23,ckp/ATAC_seed42/best_ATAC_seed42_fold4_AUC_0.823.pth,all_samples,ATAC_seed42_fold4_common_samples,2025-10-13 01:52:06
|
| 30 |
+
ATAC,42,5,all_samples,0.8983870967741936,0.980701754385965,0.925,0.9736842105263158,0.9487179487179489,0.9245283018867925,0.8,53,12,3,1,37,ckp/ATAC_seed42/best_ATAC_seed42_fold5_AUC_0.981.pth,all_samples,ATAC_seed42_fold5_all_samples,2025-10-13 01:52:06
|
| 31 |
+
ATAC,42,5,common_samples,0.8983870967741936,0.9791666666666667,0.8888888888888888,1.0,0.9411764705882353,0.90625,0.625,32,5,3,0,24,ckp/ATAC_seed42/best_ATAC_seed42_fold5_AUC_0.981.pth,all_samples,ATAC_seed42_fold5_common_samples,2025-10-13 01:52:06
|
| 32 |
+
ATAC,123,1,all_samples,0.8980645161290323,0.8649122807017544,0.9024390243902439,0.9736842105263158,0.9367088607594938,0.9056603773584906,0.7333333333333333,53,11,4,1,37,ckp/ATAC_seed123/best_ATAC_seed123_fold1_AUC_0.865.pth,all_samples,ATAC_seed123_fold1_all_samples,2025-10-13 01:52:06
|
| 33 |
+
ATAC,123,1,common_samples,0.8980645161290323,0.8935185185185185,0.9230769230769231,1.0,0.9600000000000001,0.9393939393939394,0.7777777777777778,33,7,2,0,24,ckp/ATAC_seed123/best_ATAC_seed123_fold1_AUC_0.865.pth,all_samples,ATAC_seed123_fold1_common_samples,2025-10-13 01:52:06
|
| 34 |
+
ATAC,123,2,all_samples,0.8958064516129032,0.9842105263157895,0.925,0.9736842105263158,0.9487179487179489,0.9245283018867925,0.8,53,12,3,1,37,ckp/ATAC_seed123/best_ATAC_seed123_fold2_AUC_0.984.pth,all_samples,ATAC_seed123_fold2_all_samples,2025-10-13 01:52:06
|
| 35 |
+
ATAC,123,2,common_samples,0.8958064516129032,0.9953703703703703,0.9230769230769231,1.0,0.9600000000000001,0.9393939393939394,0.7777777777777778,33,7,2,0,24,ckp/ATAC_seed123/best_ATAC_seed123_fold2_AUC_0.984.pth,all_samples,ATAC_seed123_fold2_common_samples,2025-10-13 01:52:06
|
| 36 |
+
ATAC,123,3,all_samples,0.9157529041363587,0.8395270270270271,0.8536585365853658,0.9459459459459459,0.8974358974358975,0.8490566037735849,0.625,53,10,6,2,35,ckp/ATAC_seed123/best_ATAC_seed123_fold3_AUC_0.840.pth,all_samples,ATAC_seed123_fold3_all_samples,2025-10-13 01:52:06
|
| 37 |
+
ATAC,123,3,common_samples,0.9157529041363587,0.7777777777777778,0.8148148148148148,0.9565217391304348,0.8800000000000001,0.8125,0.4444444444444444,32,4,5,1,22,ckp/ATAC_seed123/best_ATAC_seed123_fold3_AUC_0.840.pth,all_samples,ATAC_seed123_fold3_common_samples,2025-10-13 01:52:06
|
| 38 |
+
ATAC,123,4,all_samples,0.8875257843882316,0.9138513513513513,0.9210526315789473,0.9459459459459459,0.9333333333333332,0.9056603773584906,0.8125,53,13,3,2,35,ckp/ATAC_seed123/best_ATAC_seed123_fold4_AUC_0.914.pth,all_samples,ATAC_seed123_fold4_all_samples,2025-10-13 01:52:06
|
| 39 |
+
ATAC,123,4,common_samples,0.8875257843882316,0.9212962962962963,0.9230769230769231,1.0,0.9600000000000001,0.9393939393939394,0.7777777777777778,33,7,2,0,24,ckp/ATAC_seed123/best_ATAC_seed123_fold4_AUC_0.914.pth,all_samples,ATAC_seed123_fold4_common_samples,2025-10-13 01:52:06
|
| 40 |
+
ATAC,123,5,all_samples,0.9141935483870969,0.775438596491228,0.7169811320754716,1.0,0.8351648351648352,0.7169811320754716,0.0,53,0,15,0,38,ckp/ATAC_seed123/best_ATAC_seed123_fold5_AUC_0.775.pth,all_samples,ATAC_seed123_fold5_all_samples,2025-10-13 01:52:06
|
| 41 |
+
ATAC,123,5,common_samples,0.9141935483870969,0.7708333333333334,0.75,1.0,0.8571428571428571,0.75,0.0,32,0,8,0,24,ckp/ATAC_seed123/best_ATAC_seed123_fold5_AUC_0.775.pth,all_samples,ATAC_seed123_fold5_common_samples,2025-10-13 01:52:06
|
| 42 |
+
ATAC,1000,1,all_samples,0.8870967741935484,0.8596491228070176,0.8780487804878049,0.9473684210526315,0.9113924050632912,0.8679245283018868,0.6666666666666666,53,10,5,2,36,ckp/ATAC_seed1000/best_ATAC_seed1000_fold1_AUC_0.860.pth,all_samples,ATAC_seed1000_fold1_all_samples,2025-10-13 01:52:06
|
| 43 |
+
ATAC,1000,1,common_samples,0.8870967741935484,0.888888888888889,0.8846153846153846,0.9583333333333334,0.9199999999999999,0.8787878787878788,0.6666666666666666,33,6,3,1,23,ckp/ATAC_seed1000/best_ATAC_seed1000_fold1_AUC_0.860.pth,all_samples,ATAC_seed1000_fold1_common_samples,2025-10-13 01:52:06
|
| 44 |
+
ATAC,1000,2,all_samples,0.9063440860215054,0.8894736842105264,0.9024390243902439,0.9736842105263158,0.9367088607594938,0.9056603773584906,0.7333333333333333,53,11,4,1,37,ckp/ATAC_seed1000/best_ATAC_seed1000_fold2_AUC_0.889.pth,all_samples,ATAC_seed1000_fold2_all_samples,2025-10-13 01:52:06
|
| 45 |
+
ATAC,1000,2,common_samples,0.9063440860215054,0.8518518518518519,0.8888888888888888,1.0,0.9411764705882353,0.9090909090909091,0.6666666666666666,33,6,3,0,24,ckp/ATAC_seed1000/best_ATAC_seed1000_fold2_AUC_0.889.pth,all_samples,ATAC_seed1000_fold2_common_samples,2025-10-13 01:52:06
|
| 46 |
+
ATAC,1000,3,all_samples,0.9130387580067312,0.972972972972973,0.9722222222222222,0.9459459459459459,0.9589041095890412,0.9433962264150944,0.9375,53,15,1,2,35,ckp/ATAC_seed1000/best_ATAC_seed1000_fold3_AUC_0.973.pth,all_samples,ATAC_seed1000_fold3_all_samples,2025-10-13 01:52:06
|
| 47 |
+
ATAC,1000,3,common_samples,0.9130387580067312,0.9420289855072463,0.9545454545454546,0.9130434782608695,0.9333333333333332,0.90625,0.8888888888888888,32,8,1,2,21,ckp/ATAC_seed1000/best_ATAC_seed1000_fold3_AUC_0.973.pth,all_samples,ATAC_seed1000_fold3_common_samples,2025-10-13 01:52:06
|
| 48 |
+
ATAC,1000,4,all_samples,0.9325806101400499,0.8496621621621622,0.6981132075471698,1.0,0.8222222222222222,0.6981132075471698,0.0,53,0,16,0,37,ckp/ATAC_seed1000/best_ATAC_seed1000_fold4_AUC_0.850.pth,all_samples,ATAC_seed1000_fold4_all_samples,2025-10-13 01:52:06
|
| 49 |
+
ATAC,1000,4,common_samples,0.9325806101400499,0.8796296296296297,0.7272727272727273,1.0,0.8421052631578948,0.7272727272727273,0.0,33,0,9,0,24,ckp/ATAC_seed1000/best_ATAC_seed1000_fold4_AUC_0.850.pth,all_samples,ATAC_seed1000_fold4_common_samples,2025-10-13 01:52:06
|
| 50 |
+
ATAC,1000,5,all_samples,0.9166666666666666,0.7964912280701755,0.875,0.9210526315789473,0.8974358974358975,0.8490566037735849,0.6666666666666666,53,10,5,3,35,ckp/ATAC_seed1000/best_ATAC_seed1000_fold5_AUC_0.796.pth,all_samples,ATAC_seed1000_fold5_all_samples,2025-10-13 01:52:06
|
| 51 |
+
ATAC,1000,5,common_samples,0.9166666666666666,0.75,0.8888888888888888,1.0,0.9411764705882353,0.90625,0.625,32,5,3,0,24,ckp/ATAC_seed1000/best_ATAC_seed1000_fold5_AUC_0.796.pth,all_samples,ATAC_seed1000_fold5_common_samples,2025-10-13 01:52:06
|
| 52 |
+
Flux,0,1,all_samples,0.8590538365883988,0.8308255269320842,0.8503401360544217,0.8928571428571429,0.8710801393728222,0.8159203980099502,0.639344262295082,402,78,44,30,250,ckp/Flux_seed0/best_Flux_seed0_fold1_AUC_0.831.pth,all_samples,Flux_seed0_fold1_all_samples,2025-10-13 01:52:06
|
| 53 |
+
Flux,0,1,common_samples,0.8590538365883988,0.9114583333333333,0.88,0.9166666666666666,0.8979591836734694,0.84375,0.625,32,5,3,2,22,ckp/Flux_seed0/best_Flux_seed0_fold1_AUC_0.831.pth,all_samples,Flux_seed0_fold1_common_samples,2025-10-13 01:52:06
|
| 54 |
+
Flux,0,2,all_samples,0.8484016308656146,0.8173208613806568,0.8661710037174721,0.8351254480286738,0.8503649635036497,0.7960199004975125,0.7073170731707317,402,87,36,46,233,ckp/Flux_seed0/best_Flux_seed0_fold2_AUC_0.817.pth,all_samples,Flux_seed0_fold2_all_samples,2025-10-13 01:52:06
|
| 55 |
+
Flux,0,2,common_samples,0.8484016308656146,0.7053140096618358,0.8518518518518519,1.0,0.92,0.875,0.5555555555555556,32,5,4,0,23,ckp/Flux_seed0/best_Flux_seed0_fold2_AUC_0.817.pth,all_samples,Flux_seed0_fold2_common_samples,2025-10-13 01:52:06
|
| 56 |
+
Flux,0,3,all_samples,0.8441450496418064,0.8383891365795377,0.8875502008032129,0.7921146953405018,0.8371212121212123,0.7860696517412935,0.7723577235772358,402,95,28,58,221,ckp/Flux_seed0/best_Flux_seed0_fold3_AUC_0.838.pth,all_samples,Flux_seed0_fold3_all_samples,2025-10-13 01:52:06
|
| 57 |
+
Flux,0,3,common_samples,0.8441450496418064,0.9351851851851852,0.9545454545454546,0.875,0.9130434782608695,0.8787878787878788,0.8888888888888888,33,8,1,3,21,ckp/Flux_seed0/best_Flux_seed0_fold3_AUC_0.838.pth,all_samples,Flux_seed0_fold3_common_samples,2025-10-13 01:52:06
|
| 58 |
+
Flux,0,4,all_samples,0.8507207717464783,0.8417650860802632,0.7575757575757576,0.985663082437276,0.8566978193146417,0.770573566084788,0.2786885245901639,401,34,88,4,275,ckp/Flux_seed0/best_Flux_seed0_fold4_AUC_0.842.pth,all_samples,Flux_seed0_fold4_all_samples,2025-10-13 01:52:06
|
| 59 |
+
Flux,0,4,common_samples,0.8507207717464783,0.5138888888888888,0.7666666666666667,0.9583333333333334,0.8518518518518519,0.7575757575757576,0.2222222222222222,33,2,7,1,23,ckp/Flux_seed0/best_Flux_seed0_fold4_AUC_0.842.pth,all_samples,Flux_seed0_fold4_common_samples,2025-10-13 01:52:06
|
| 60 |
+
Flux,0,5,all_samples,0.8503663237900353,0.8269874845760621,0.8811475409836066,0.7706093189964157,0.8221797323135754,0.7680798004987531,0.7622950819672131,401,93,29,64,215,ckp/Flux_seed0/best_Flux_seed0_fold5_AUC_0.827.pth,all_samples,Flux_seed0_fold5_all_samples,2025-10-13 01:52:06
|
| 61 |
+
Flux,0,5,common_samples,0.8503663237900353,0.8842592592592593,0.9230769230769231,1.0,0.9600000000000001,0.9393939393939394,0.7777777777777778,33,7,2,0,24,ckp/Flux_seed0/best_Flux_seed0_fold5_AUC_0.827.pth,all_samples,Flux_seed0_fold5_common_samples,2025-10-13 01:52:06
|
| 62 |
+
Flux,6,1,all_samples,0.8451174017994295,0.8394613583138173,0.8727272727272727,0.8571428571428571,0.8648648648648648,0.8134328358208955,0.7131147540983607,402,87,35,40,240,ckp/Flux_seed6/best_Flux_seed6_fold1_AUC_0.839.pth,all_samples,Flux_seed6_fold1_all_samples,2025-10-13 01:52:06
|
| 63 |
+
Flux,6,1,common_samples,0.8451174017994295,0.7708333333333334,0.9166666666666666,0.9166666666666666,0.9166666666666666,0.875,0.75,32,6,2,2,22,ckp/Flux_seed6/best_Flux_seed6_fold1_AUC_0.839.pth,all_samples,Flux_seed6_fold1_common_samples,2025-10-13 01:52:06
|
| 64 |
+
Flux,6,2,all_samples,0.8480199116461893,0.8381560159687618,0.8581818181818182,0.8458781362007168,0.851985559566787,0.7960199004975125,0.6829268292682927,402,84,39,43,236,ckp/Flux_seed6/best_Flux_seed6_fold2_AUC_0.838.pth,all_samples,Flux_seed6_fold2_all_samples,2025-10-13 01:52:06
|
| 65 |
+
Flux,6,2,common_samples,0.8480199116461893,0.9420289855072465,0.9545454545454546,0.9130434782608695,0.9333333333333332,0.90625,0.8888888888888888,32,8,1,2,21,ckp/Flux_seed6/best_Flux_seed6_fold2_AUC_0.838.pth,all_samples,Flux_seed6_fold2_common_samples,2025-10-13 01:52:06
|
| 66 |
+
Flux,6,3,all_samples,0.8609187258450458,0.8010606987790309,0.8680851063829788,0.7311827956989247,0.7937743190661478,0.736318407960199,0.7479674796747967,402,92,31,75,204,ckp/Flux_seed6/best_Flux_seed6_fold3_AUC_0.801.pth,all_samples,Flux_seed6_fold3_all_samples,2025-10-13 01:52:06
|
| 67 |
+
Flux,6,3,common_samples,0.8609187258450458,0.8425925925925927,0.88,0.9166666666666666,0.8979591836734694,0.8484848484848485,0.6666666666666666,33,6,3,2,22,ckp/Flux_seed6/best_Flux_seed6_fold3_AUC_0.801.pth,all_samples,Flux_seed6_fold3_common_samples,2025-10-13 01:52:06
|
| 68 |
+
Flux,6,4,all_samples,0.836520928872892,0.8746988659733239,0.8701754385964913,0.8888888888888888,0.8794326241134752,0.830423940149626,0.6967213114754098,401,85,37,31,248,ckp/Flux_seed6/best_Flux_seed6_fold4_AUC_0.875.pth,all_samples,Flux_seed6_fold4_all_samples,2025-10-13 01:52:06
|
| 69 |
+
Flux,6,4,common_samples,0.836520928872892,0.8472222222222222,0.8518518518518519,0.9583333333333334,0.9019607843137256,0.8484848484848485,0.5555555555555556,33,5,4,1,23,ckp/Flux_seed6/best_Flux_seed6_fold4_AUC_0.875.pth,all_samples,Flux_seed6_fold4_common_samples,2025-10-13 01:52:06
|
| 70 |
+
Flux,6,5,all_samples,0.857159300604754,0.8207297726070863,0.8715953307392996,0.8028673835125448,0.8358208955223881,0.7805486284289277,0.7295081967213115,401,89,33,55,224,ckp/Flux_seed6/best_Flux_seed6_fold5_AUC_0.821.pth,all_samples,Flux_seed6_fold5_all_samples,2025-10-13 01:52:06
|
| 71 |
+
Flux,6,5,common_samples,0.857159300604754,0.7037037037037037,0.8461538461538461,0.9166666666666666,0.8799999999999999,0.8181818181818182,0.5555555555555556,33,5,4,2,22,ckp/Flux_seed6/best_Flux_seed6_fold5_AUC_0.821.pth,all_samples,Flux_seed6_fold5_common_samples,2025-10-13 01:52:06
|
| 72 |
+
Flux,42,1,all_samples,0.8687861165971764,0.8190573770491805,0.8692307692307693,0.8071428571428572,0.837037037037037,0.7810945273631841,0.7213114754098361,402,88,34,54,226,ckp/Flux_seed42/best_Flux_seed42_fold1_AUC_0.819.pth,all_samples,Flux_seed42_fold1_all_samples,2025-10-13 01:52:06
|
| 73 |
+
Flux,42,1,common_samples,0.8687861165971764,0.5885416666666666,0.84,0.875,0.8571428571428572,0.78125,0.5,32,4,4,3,21,ckp/Flux_seed42/best_Flux_seed42_fold1_AUC_0.819.pth,all_samples,Flux_seed42_fold1_common_samples,2025-10-13 01:52:06
|
| 74 |
+
Flux,42,2,all_samples,0.8456517878556535,0.8228866159629339,0.8316498316498316,0.8853046594982079,0.857638888888889,0.7960199004975125,0.5934959349593496,402,73,50,32,247,ckp/Flux_seed42/best_Flux_seed42_fold2_AUC_0.823.pth,all_samples,Flux_seed42_fold2_all_samples,2025-10-13 01:52:06
|
| 75 |
+
Flux,42,2,common_samples,0.8456517878556535,0.8695652173913044,0.8214285714285714,1.0,0.9019607843137255,0.84375,0.4444444444444444,32,4,5,0,23,ckp/Flux_seed42/best_Flux_seed42_fold2_AUC_0.823.pth,all_samples,Flux_seed42_fold2_common_samples,2025-10-13 01:52:06
|
| 76 |
+
Flux,42,3,all_samples,0.846706321526584,0.8512107701722178,0.9,0.7741935483870968,0.8323699421965317,0.7835820895522388,0.8048780487804879,402,99,24,63,216,ckp/Flux_seed42/best_Flux_seed42_fold3_AUC_0.851.pth,all_samples,Flux_seed42_fold3_all_samples,2025-10-13 01:52:06
|
| 77 |
+
Flux,42,3,common_samples,0.846706321526584,0.8287037037037037,0.8636363636363636,0.7916666666666666,0.8260869565217391,0.7575757575757576,0.6666666666666666,33,6,3,5,19,ckp/Flux_seed42/best_Flux_seed42_fold3_AUC_0.851.pth,all_samples,Flux_seed42_fold3_common_samples,2025-10-13 01:52:06
|
| 78 |
+
Flux,42,4,all_samples,0.8438583669815285,0.8358305423350374,0.85,0.8530465949820788,0.851520572450805,0.7930174563591023,0.6557377049180327,401,80,42,41,238,ckp/Flux_seed42/best_Flux_seed42_fold4_AUC_0.836.pth,all_samples,Flux_seed42_fold4_all_samples,2025-10-13 01:52:06
|
| 79 |
+
Flux,42,4,common_samples,0.8438583669815285,0.9259259259259259,0.9166666666666666,0.9166666666666666,0.9166666666666666,0.8787878787878788,0.7777777777777778,33,7,2,2,22,ckp/Flux_seed42/best_Flux_seed42_fold4_AUC_0.836.pth,all_samples,Flux_seed42_fold4_common_samples,2025-10-13 01:52:06
|
| 80 |
+
Flux,42,5,all_samples,0.8543931449034403,0.8448792526000352,0.879245283018868,0.8351254480286738,0.8566176470588235,0.8054862842892768,0.7377049180327869,401,90,32,46,233,ckp/Flux_seed42/best_Flux_seed42_fold5_AUC_0.845.pth,all_samples,Flux_seed42_fold5_all_samples,2025-10-13 01:52:06
|
| 81 |
+
Flux,42,5,common_samples,0.8543931449034403,0.8472222222222222,0.8846153846153846,0.9583333333333334,0.9199999999999999,0.8787878787878788,0.6666666666666666,33,6,3,1,23,ckp/Flux_seed42/best_Flux_seed42_fold5_AUC_0.845.pth,all_samples,Flux_seed42_fold5_common_samples,2025-10-13 01:52:06
|
| 82 |
+
Flux,123,1,all_samples,0.8501755540926047,0.8135831381733022,0.8148148148148148,0.8642857142857143,0.8388214904679375,0.7686567164179104,0.5491803278688525,402,67,55,38,242,ckp/Flux_seed123/best_Flux_seed123_fold1_AUC_0.814.pth,all_samples,Flux_seed123_fold1_all_samples,2025-10-13 01:52:06
|
| 83 |
+
Flux,123,1,common_samples,0.8501755540926047,0.75,0.8214285714285714,0.9583333333333334,0.8846153846153847,0.8125,0.375,32,3,5,1,23,ckp/Flux_seed123/best_Flux_seed123_fold1_AUC_0.814.pth,all_samples,Flux_seed123_fold1_common_samples,2025-10-13 01:52:06
|
| 84 |
+
Flux,123,2,all_samples,0.844562469219883,0.8737943293411429,0.8680555555555556,0.8960573476702509,0.8818342151675486,0.8333333333333334,0.6910569105691057,402,85,38,29,250,ckp/Flux_seed123/best_Flux_seed123_fold2_AUC_0.874.pth,all_samples,Flux_seed123_fold2_all_samples,2025-10-13 01:52:06
|
| 85 |
+
Flux,123,2,common_samples,0.844562469219883,0.8695652173913044,0.875,0.9130434782608695,0.8936170212765957,0.84375,0.6666666666666666,32,6,3,2,21,ckp/Flux_seed123/best_Flux_seed123_fold2_AUC_0.874.pth,all_samples,Flux_seed123_fold2_common_samples,2025-10-13 01:52:06
|
| 86 |
+
Flux,123,3,all_samples,0.8554007319488917,0.8134160911501589,0.8252427184466019,0.9139784946236559,0.8673469387755103,0.8059701492537313,0.5609756097560976,402,69,54,24,255,ckp/Flux_seed123/best_Flux_seed123_fold3_AUC_0.813.pth,all_samples,Flux_seed123_fold3_all_samples,2025-10-13 01:52:06
|
| 87 |
+
Flux,123,3,common_samples,0.8554007319488917,0.8611111111111112,0.8888888888888888,1.0,0.9411764705882353,0.9090909090909091,0.6666666666666666,33,6,3,0,24,ckp/Flux_seed123/best_Flux_seed123_fold3_AUC_0.813.pth,all_samples,Flux_seed123_fold3_common_samples,2025-10-13 01:52:06
|
| 88 |
+
Flux,123,4,all_samples,0.8516598761259202,0.84382161114049,0.8745098039215686,0.7992831541218638,0.8352059925093633,0.7805486284289277,0.7377049180327869,401,90,32,56,223,ckp/Flux_seed123/best_Flux_seed123_fold4_AUC_0.844.pth,all_samples,Flux_seed123_fold4_all_samples,2025-10-13 01:52:06
|
| 89 |
+
Flux,123,4,common_samples,0.8516598761259202,0.625,0.8518518518518519,0.9583333333333334,0.9019607843137256,0.8484848484848485,0.5555555555555556,33,5,4,1,23,ckp/Flux_seed123/best_Flux_seed123_fold4_AUC_0.844.pth,all_samples,Flux_seed123_fold4_common_samples,2025-10-13 01:52:06
|
| 90 |
+
Flux,123,5,all_samples,0.84918970273875,0.8277513367412893,0.8614232209737828,0.8243727598566308,0.8424908424908425,0.7855361596009975,0.6967213114754098,401,85,37,49,230,ckp/Flux_seed123/best_Flux_seed123_fold5_AUC_0.828.pth,all_samples,Flux_seed123_fold5_all_samples,2025-10-13 01:52:06
|
| 91 |
+
Flux,123,5,common_samples,0.84918970273875,0.875,0.9166666666666666,0.9166666666666666,0.9166666666666666,0.8787878787878788,0.7777777777777778,33,7,2,2,22,ckp/Flux_seed123/best_Flux_seed123_fold5_AUC_0.828.pth,all_samples,Flux_seed123_fold5_common_samples,2025-10-13 01:52:06
|
| 92 |
+
Flux,1000,1,all_samples,0.8517207958452198,0.8416861826697892,0.8355263157894737,0.9071428571428571,0.8698630136986301,0.8109452736318408,0.5901639344262295,402,72,50,26,254,ckp/Flux_seed1000/best_Flux_seed1000_fold1_AUC_0.842.pth,all_samples,Flux_seed1000_fold1_all_samples,2025-10-13 01:52:06
|
| 93 |
+
Flux,1000,1,common_samples,0.8517207958452198,0.8645833333333334,0.8888888888888888,1.0,0.9411764705882353,0.90625,0.625,32,5,3,0,24,ckp/Flux_seed1000/best_Flux_seed1000_fold1_AUC_0.842.pth,all_samples,Flux_seed1000_fold1_common_samples,2025-10-13 01:52:06
|
| 94 |
+
Flux,1000,2,all_samples,0.8500182163368504,0.8088702392400268,0.8526315789473684,0.8709677419354839,0.8617021276595744,0.8059701492537313,0.6585365853658537,402,81,42,36,243,ckp/Flux_seed1000/best_Flux_seed1000_fold2_AUC_0.809.pth,all_samples,Flux_seed1000_fold2_all_samples,2025-10-13 01:52:06
|
| 95 |
+
Flux,1000,2,common_samples,0.8500182163368504,0.8599033816425121,0.9166666666666666,0.9565217391304348,0.9361702127659574,0.90625,0.7777777777777778,32,7,2,1,22,ckp/Flux_seed1000/best_Flux_seed1000_fold2_AUC_0.809.pth,all_samples,Flux_seed1000_fold2_common_samples,2025-10-13 01:52:06
|
| 96 |
+
Flux,1000,3,all_samples,0.8456462954927839,0.8495789258967859,0.8769230769230769,0.8172043010752689,0.8460111317254174,0.7935323383084577,0.7398373983739838,402,91,32,51,228,ckp/Flux_seed1000/best_Flux_seed1000_fold3_AUC_0.850.pth,all_samples,Flux_seed1000_fold3_all_samples,2025-10-13 01:52:06
|
| 97 |
+
Flux,1000,3,common_samples,0.8456462954927839,0.8981481481481481,0.8846153846153846,0.9583333333333334,0.9199999999999999,0.8787878787878788,0.6666666666666666,33,6,3,1,23,ckp/Flux_seed1000/best_Flux_seed1000_fold3_AUC_0.850.pth,all_samples,Flux_seed1000_fold3_common_samples,2025-10-13 01:52:06
|
| 98 |
+
Flux,1000,4,all_samples,0.8509071309813093,0.826987484576062,0.8392857142857143,0.8422939068100358,0.8407871198568874,0.7780548628428927,0.6311475409836066,401,77,45,44,235,ckp/Flux_seed1000/best_Flux_seed1000_fold4_AUC_0.827.pth,all_samples,Flux_seed1000_fold4_all_samples,2025-10-13 01:52:06
|
| 99 |
+
Flux,1000,4,common_samples,0.8509071309813093,0.7222222222222222,0.8260869565217391,0.7916666666666666,0.8085106382978724,0.7272727272727273,0.5555555555555556,33,5,4,5,19,ckp/Flux_seed1000/best_Flux_seed1000_fold4_AUC_0.827.pth,all_samples,Flux_seed1000_fold4_common_samples,2025-10-13 01:52:06
|
| 100 |
+
Flux,1000,5,all_samples,0.8567189812361828,0.8422351489511721,0.8478260869565217,0.8387096774193549,0.8432432432432434,0.7830423940149626,0.6557377049180327,401,80,42,45,234,ckp/Flux_seed1000/best_Flux_seed1000_fold5_AUC_0.842.pth,all_samples,Flux_seed1000_fold5_all_samples,2025-10-13 01:52:06
|
| 101 |
+
Flux,1000,5,common_samples,0.8567189812361828,0.7592592592592593,0.8461538461538461,0.9166666666666666,0.8799999999999999,0.8181818181818182,0.5555555555555556,33,5,4,2,22,ckp/Flux_seed1000/best_Flux_seed1000_fold5_AUC_0.842.pth,all_samples,Flux_seed1000_fold5_common_samples,2025-10-13 01:52:06
|
| 102 |
+
RNA,0,1,all_samples,0.9977031672884208,0.9213407494145199,0.8877887788778878,0.9607142857142857,0.9228130360205833,0.8880597014925373,0.7213114754098361,402,88,34,11,269,ckp/RNA_seed0/best_RNA_seed0_fold1_AUC_0.921.pth,all_samples,RNA_seed0_fold1_all_samples,2025-10-13 01:52:06
|
| 103 |
+
RNA,0,1,common_samples,0.9977031672884208,0.9270833333333333,0.92,0.9583333333333334,0.9387755102040817,0.90625,0.75,32,6,2,1,23,ckp/RNA_seed0/best_RNA_seed0_fold1_AUC_0.921.pth,all_samples,RNA_seed0_fold1_common_samples,2025-10-13 01:52:06
|
| 104 |
+
RNA,0,2,all_samples,0.9974844978058011,0.8986799545414809,0.8640776699029126,0.956989247311828,0.9081632653061223,0.8656716417910447,0.6585365853658537,402,81,42,12,267,ckp/RNA_seed0/best_RNA_seed0_fold2_AUC_0.899.pth,all_samples,RNA_seed0_fold2_all_samples,2025-10-13 01:52:06
|
| 105 |
+
RNA,0,2,common_samples,0.9974844978058011,0.748792270531401,0.8214285714285714,1.0,0.9019607843137255,0.84375,0.4444444444444444,32,4,5,0,23,ckp/RNA_seed0/best_RNA_seed0_fold2_AUC_0.899.pth,all_samples,RNA_seed0_fold2_common_samples,2025-10-13 01:52:06
|
| 106 |
+
RNA,0,3,all_samples,0.9918017330235641,0.9314334003555089,0.9190140845070423,0.9354838709677419,0.9271758436944938,0.8980099502487562,0.8130081300813008,402,100,23,18,261,ckp/RNA_seed0/best_RNA_seed0_fold3_AUC_0.931.pth,all_samples,RNA_seed0_fold3_all_samples,2025-10-13 01:52:06
|
| 107 |
+
RNA,0,3,common_samples,0.9918017330235641,0.9490740740740741,0.9583333333333334,0.9583333333333334,0.9583333333333334,0.9393939393939394,0.8888888888888888,33,8,1,1,23,ckp/RNA_seed0/best_RNA_seed0_fold3_AUC_0.931.pth,all_samples,RNA_seed0_fold3_common_samples,2025-10-13 01:52:06
|
| 108 |
+
RNA,0,4,all_samples,0.990707616976961,0.9351019448851285,0.8973509933774835,0.9713261648745519,0.9328743545611016,0.9027431421446384,0.7459016393442623,401,91,31,8,271,ckp/RNA_seed0/best_RNA_seed0_fold4_AUC_0.935.pth,all_samples,RNA_seed0_fold4_all_samples,2025-10-13 01:52:06
|
| 109 |
+
RNA,0,4,common_samples,0.990707616976961,0.6481481481481481,0.8518518518518519,0.9583333333333334,0.9019607843137256,0.8484848484848485,0.5555555555555556,33,5,4,1,23,ckp/RNA_seed0/best_RNA_seed0_fold4_AUC_0.935.pth,all_samples,RNA_seed0_fold4_common_samples,2025-10-13 01:52:06
|
| 110 |
+
RNA,0,5,all_samples,0.9906966546690297,0.9159175039661555,0.8770226537216829,0.9713261648745519,0.9217687074829931,0.885286783042394,0.6885245901639344,401,84,38,8,271,ckp/RNA_seed0/best_RNA_seed0_fold5_AUC_0.916.pth,all_samples,RNA_seed0_fold5_all_samples,2025-10-13 01:52:06
|
| 111 |
+
RNA,0,5,common_samples,0.9906966546690297,0.9351851851851851,0.8888888888888888,1.0,0.9411764705882353,0.9090909090909091,0.6666666666666666,33,6,3,0,24,ckp/RNA_seed0/best_RNA_seed0_fold5_AUC_0.916.pth,all_samples,RNA_seed0_fold5_common_samples,2025-10-13 01:52:06
|
| 112 |
+
RNA,6,1,all_samples,0.9952088362226611,0.9352751756440281,0.9381818181818182,0.9214285714285714,0.9297297297297297,0.9029850746268657,0.860655737704918,402,105,17,22,258,ckp/RNA_seed6/best_RNA_seed6_fold1_AUC_0.935.pth,all_samples,RNA_seed6_fold1_all_samples,2025-10-13 01:52:06
|
| 113 |
+
RNA,6,1,common_samples,0.9952088362226611,0.890625,0.92,0.9583333333333334,0.9387755102040817,0.90625,0.75,32,6,2,1,23,ckp/RNA_seed6/best_RNA_seed6_fold1_AUC_0.935.pth,all_samples,RNA_seed6_fold1_common_samples,2025-10-13 01:52:06
|
| 114 |
+
RNA,6,2,all_samples,0.9930741304216488,0.9105982457674038,0.8969072164948454,0.9354838709677419,0.9157894736842105,0.8805970149253731,0.7560975609756098,402,93,30,18,261,ckp/RNA_seed6/best_RNA_seed6_fold2_AUC_0.911.pth,all_samples,RNA_seed6_fold2_all_samples,2025-10-13 01:52:06
|
| 115 |
+
RNA,6,2,common_samples,0.9930741304216488,0.9420289855072463,0.92,1.0,0.9583333333333334,0.9375,0.7777777777777778,32,7,2,0,23,ckp/RNA_seed6/best_RNA_seed6_fold2_AUC_0.911.pth,all_samples,RNA_seed6_fold2_common_samples,2025-10-13 01:52:06
|
| 116 |
+
RNA,6,3,all_samples,0.9984328457945892,0.9058775533991898,0.8758169934640523,0.9605734767025089,0.9162393162393162,0.8781094527363185,0.6910569105691057,402,85,38,11,268,ckp/RNA_seed6/best_RNA_seed6_fold3_AUC_0.906.pth,all_samples,RNA_seed6_fold3_all_samples,2025-10-13 01:52:06
|
| 117 |
+
RNA,6,3,common_samples,0.9984328457945892,0.9166666666666667,0.8571428571428571,1.0,0.923076923076923,0.8787878787878788,0.5555555555555556,33,5,4,0,24,ckp/RNA_seed6/best_RNA_seed6_fold3_AUC_0.906.pth,all_samples,RNA_seed6_fold3_common_samples,2025-10-13 01:52:06
|
| 118 |
+
RNA,6,4,all_samples,0.9914932490453657,0.9473529584581937,0.9206896551724137,0.956989247311828,0.9384885764499121,0.912718204488778,0.8114754098360656,401,99,23,12,267,ckp/RNA_seed6/best_RNA_seed6_fold4_AUC_0.947.pth,all_samples,RNA_seed6_fold4_all_samples,2025-10-13 01:52:06
|
| 119 |
+
RNA,6,4,common_samples,0.9914932490453657,0.7916666666666666,0.8518518518518519,0.9583333333333334,0.9019607843137256,0.8484848484848485,0.5555555555555556,33,5,4,1,23,ckp/RNA_seed6/best_RNA_seed6_fold4_AUC_0.947.pth,all_samples,RNA_seed6_fold4_common_samples,2025-10-13 01:52:06
|
| 120 |
+
RNA,6,5,all_samples,0.9894177187437195,0.9125389270814972,0.9106529209621993,0.9498207885304659,0.9298245614035087,0.9002493765586035,0.7868852459016393,401,96,26,14,265,ckp/RNA_seed6/best_RNA_seed6_fold5_AUC_0.913.pth,all_samples,RNA_seed6_fold5_all_samples,2025-10-13 01:52:06
|
| 121 |
+
RNA,6,5,common_samples,0.9894177187437195,0.7407407407407408,0.88,0.9166666666666666,0.8979591836734694,0.8484848484848485,0.6666666666666666,33,6,3,2,22,ckp/RNA_seed6/best_RNA_seed6_fold5_AUC_0.913.pth,all_samples,RNA_seed6_fold5_common_samples,2025-10-13 01:52:06
|
| 122 |
+
RNA,42,1,all_samples,0.9967787652695487,0.9374414519906323,0.9290780141843972,0.9357142857142857,0.9323843416370108,0.9054726368159204,0.8360655737704918,402,102,20,18,262,ckp/RNA_seed42/best_RNA_seed42_fold1_AUC_0.937.pth,all_samples,RNA_seed42_fold1_all_samples,2025-10-13 01:52:06
|
| 123 |
+
RNA,42,1,common_samples,0.9967787652695487,0.7864583333333333,0.8888888888888888,1.0,0.9411764705882353,0.90625,0.625,32,5,3,0,24,ckp/RNA_seed42/best_RNA_seed42_fold1_AUC_0.937.pth,all_samples,RNA_seed42_fold1_common_samples,2025-10-13 01:52:06
|
| 124 |
+
RNA,42,2,all_samples,0.9959978982558086,0.9107148060727919,0.8786885245901639,0.9605734767025089,0.9178082191780821,0.8805970149253731,0.6991869918699187,402,86,37,11,268,ckp/RNA_seed42/best_RNA_seed42_fold2_AUC_0.911.pth,all_samples,RNA_seed42_fold2_all_samples,2025-10-13 01:52:06
|
| 125 |
+
RNA,42,2,common_samples,0.9959978982558086,0.7729468599033817,0.88,0.9565217391304348,0.9166666666666666,0.875,0.6666666666666666,32,6,3,1,22,ckp/RNA_seed42/best_RNA_seed42_fold2_AUC_0.911.pth,all_samples,RNA_seed42_fold2_common_samples,2025-10-13 01:52:06
|
| 126 |
+
RNA,42,3,all_samples,0.9824317619683164,0.9190197278316868,0.8881578947368421,0.967741935483871,0.9262435677530019,0.8930348258706468,0.7235772357723578,402,89,34,9,270,ckp/RNA_seed42/best_RNA_seed42_fold3_AUC_0.919.pth,all_samples,RNA_seed42_fold3_all_samples,2025-10-13 01:52:06
|
| 127 |
+
RNA,42,3,common_samples,0.9824317619683164,0.7824074074074074,0.8888888888888888,1.0,0.9411764705882353,0.9090909090909091,0.6666666666666666,33,6,3,0,24,ckp/RNA_seed42/best_RNA_seed42_fold3_AUC_0.919.pth,all_samples,RNA_seed42_fold3_common_samples,2025-10-13 01:52:06
|
| 128 |
+
RNA,42,4,all_samples,0.993795333710924,0.9258769610435396,0.9,0.967741935483871,0.9326424870466321,0.9027431421446384,0.7540983606557377,401,92,30,9,270,ckp/RNA_seed42/best_RNA_seed42_fold4_AUC_0.926.pth,all_samples,RNA_seed42_fold4_all_samples,2025-10-13 01:52:06
|
| 129 |
+
RNA,42,4,common_samples,0.993795333710924,0.9722222222222222,0.9230769230769231,1.0,0.9600000000000001,0.9393939393939394,0.7777777777777778,33,7,2,0,24,ckp/RNA_seed42/best_RNA_seed42_fold4_AUC_0.926.pth,all_samples,RNA_seed42_fold4_common_samples,2025-10-13 01:52:06
|
| 130 |
+
RNA,42,5,all_samples,0.983532786435971,0.9294024325753569,0.9190140845070423,0.9354838709677419,0.9271758436944938,0.8977556109725686,0.8114754098360656,401,99,23,18,261,ckp/RNA_seed42/best_RNA_seed42_fold5_AUC_0.929.pth,all_samples,RNA_seed42_fold5_all_samples,2025-10-13 01:52:06
|
| 131 |
+
RNA,42,5,common_samples,0.983532786435971,0.8611111111111112,0.8888888888888888,1.0,0.9411764705882353,0.9090909090909091,0.6666666666666666,33,6,3,0,24,ckp/RNA_seed42/best_RNA_seed42_fold5_AUC_0.929.pth,all_samples,RNA_seed42_fold5_common_samples,2025-10-13 01:52:06
|
| 132 |
+
RNA,123,1,all_samples,0.9946181698485846,0.9142271662763466,0.8881578947368421,0.9642857142857143,0.9246575342465754,0.8905472636815921,0.7213114754098361,402,88,34,10,270,ckp/RNA_seed123/best_RNA_seed123_fold1_AUC_0.914.pth,all_samples,RNA_seed123_fold1_all_samples,2025-10-13 01:52:06
|
| 133 |
+
RNA,123,1,common_samples,0.9946181698485846,0.8958333333333333,0.92,0.9583333333333334,0.9387755102040817,0.90625,0.75,32,6,2,1,23,ckp/RNA_seed123/best_RNA_seed123_fold1_AUC_0.914.pth,all_samples,RNA_seed123_fold1_common_samples,2025-10-13 01:52:06
|
| 134 |
+
RNA,123,2,all_samples,0.9941085254287247,0.9536381385319229,0.9432624113475178,0.953405017921147,0.948306595365419,0.927860696517413,0.8699186991869918,402,107,16,13,266,ckp/RNA_seed123/best_RNA_seed123_fold2_AUC_0.954.pth,all_samples,RNA_seed123_fold2_all_samples,2025-10-13 01:52:06
|
| 135 |
+
RNA,123,2,common_samples,0.9941085254287247,0.9903381642512077,0.92,1.0,0.9583333333333334,0.9375,0.7777777777777778,32,7,2,0,23,ckp/RNA_seed123/best_RNA_seed123_fold2_AUC_0.954.pth,all_samples,RNA_seed123_fold2_common_samples,2025-10-13 01:52:06
|
| 136 |
+
RNA,123,3,all_samples,0.9964592567368408,0.9091412419500539,0.865814696485623,0.9713261648745519,0.9155405405405406,0.8756218905472637,0.6585365853658537,402,81,42,8,271,ckp/RNA_seed123/best_RNA_seed123_fold3_AUC_0.909.pth,all_samples,RNA_seed123_fold3_all_samples,2025-10-13 01:52:06
|
| 137 |
+
RNA,123,3,common_samples,0.9964592567368408,0.8101851851851852,0.8888888888888888,1.0,0.9411764705882353,0.9090909090909091,0.6666666666666666,33,6,3,0,24,ckp/RNA_seed123/best_RNA_seed123_fold3_AUC_0.909.pth,all_samples,RNA_seed123_fold3_common_samples,2025-10-13 01:52:06
|
| 138 |
+
RNA,123,4,all_samples,0.9955977198399504,0.9346025030847877,0.9054054054054054,0.9605734767025089,0.9321739130434782,0.9027431421446384,0.7704918032786885,401,94,28,11,268,ckp/RNA_seed123/best_RNA_seed123_fold4_AUC_0.935.pth,all_samples,RNA_seed123_fold4_all_samples,2025-10-13 01:52:06
|
| 139 |
+
RNA,123,4,common_samples,0.9955977198399504,0.7453703703703705,0.8275862068965517,1.0,0.9056603773584906,0.8484848484848485,0.4444444444444444,33,4,5,0,24,ckp/RNA_seed123/best_RNA_seed123_fold4_AUC_0.935.pth,all_samples,RNA_seed123_fold4_common_samples,2025-10-13 01:52:06
|
| 140 |
+
RNA,123,5,all_samples,0.992043191493249,0.9150067571537693,0.92,0.9068100358422939,0.9133574007220215,0.8802992518703242,0.819672131147541,401,100,22,26,253,ckp/RNA_seed123/best_RNA_seed123_fold5_AUC_0.915.pth,all_samples,RNA_seed123_fold5_all_samples,2025-10-13 01:52:06
|
| 141 |
+
RNA,123,5,common_samples,0.992043191493249,0.9074074074074074,0.9230769230769231,1.0,0.9600000000000001,0.9393939393939394,0.7777777777777778,33,7,2,0,24,ckp/RNA_seed123/best_RNA_seed123_fold5_AUC_0.915.pth,all_samples,RNA_seed123_fold5_common_samples,2025-10-13 01:52:06
|
| 142 |
+
RNA,1000,1,all_samples,0.9847469095164948,0.93711943793911,0.9146757679180887,0.9571428571428572,0.9354275741710297,0.9079601990049752,0.7950819672131147,402,97,25,12,268,ckp/RNA_seed1000/best_RNA_seed1000_fold1_AUC_0.937.pth,all_samples,RNA_seed1000_fold1_all_samples,2025-10-13 01:52:06
|
| 143 |
+
RNA,1000,1,common_samples,0.9847469095164948,0.8020833333333333,0.8846153846153846,0.9583333333333334,0.9199999999999999,0.875,0.625,32,5,3,1,23,ckp/RNA_seed1000/best_RNA_seed1000_fold1_AUC_0.937.pth,all_samples,RNA_seed1000_fold1_common_samples,2025-10-13 01:52:06
|
| 144 |
+
RNA,1000,2,all_samples,0.9944197593246591,0.9213509339394469,0.8918918918918919,0.946236559139785,0.9182608695652175,0.8830845771144279,0.7398373983739838,402,91,32,15,264,ckp/RNA_seed1000/best_RNA_seed1000_fold2_AUC_0.921.pth,all_samples,RNA_seed1000_fold2_all_samples,2025-10-13 01:52:06
|
| 145 |
+
RNA,1000,2,common_samples,0.9944197593246591,0.9323671497584541,0.92,1.0,0.9583333333333334,0.9375,0.7777777777777778,32,7,2,0,23,ckp/RNA_seed1000/best_RNA_seed1000_fold2_AUC_0.921.pth,all_samples,RNA_seed1000_fold2_common_samples,2025-10-13 01:52:06
|
| 146 |
+
RNA,1000,3,all_samples,0.9929423137127823,0.8946586240055949,0.869281045751634,0.953405017921147,0.9094017094017094,0.8681592039800995,0.6747967479674797,402,83,40,13,266,ckp/RNA_seed1000/best_RNA_seed1000_fold3_AUC_0.895.pth,all_samples,RNA_seed1000_fold3_all_samples,2025-10-13 01:52:06
|
| 147 |
+
RNA,1000,3,common_samples,0.9929423137127823,0.8796296296296297,0.8846153846153846,0.9583333333333334,0.9199999999999999,0.8787878787878788,0.6666666666666666,33,6,3,1,23,ckp/RNA_seed1000/best_RNA_seed1000_fold3_AUC_0.895.pth,all_samples,RNA_seed1000_fold3_common_samples,2025-10-13 01:52:06
|
| 148 |
+
RNA,1000,4,all_samples,0.9947234757824348,0.937951701040014,0.9228070175438596,0.942652329749104,0.9326241134751773,0.9052369077306733,0.819672131147541,401,100,22,16,263,ckp/RNA_seed1000/best_RNA_seed1000_fold4_AUC_0.938.pth,all_samples,RNA_seed1000_fold4_all_samples,2025-10-13 01:52:06
|
| 149 |
+
RNA,1000,4,common_samples,0.9947234757824348,0.7685185185185186,0.8571428571428571,1.0,0.923076923076923,0.8787878787878788,0.5555555555555556,33,5,4,0,24,ckp/RNA_seed1000/best_RNA_seed1000_fold4_AUC_0.938.pth,all_samples,RNA_seed1000_fold4_common_samples,2025-10-13 01:52:06
|
| 150 |
+
RNA,1000,5,all_samples,0.9877313503736319,0.9158881250367237,0.9078014184397163,0.9175627240143369,0.9126559714795008,0.8778054862842892,0.7868852459016393,401,96,26,23,256,ckp/RNA_seed1000/best_RNA_seed1000_fold5_AUC_0.916.pth,all_samples,RNA_seed1000_fold5_all_samples,2025-10-13 01:52:06
|
| 151 |
+
RNA,1000,5,common_samples,0.9877313503736319,0.925925925925926,0.8888888888888888,1.0,0.9411764705882353,0.9090909090909091,0.6666666666666666,33,6,3,0,24,ckp/RNA_seed1000/best_RNA_seed1000_fold5_AUC_0.916.pth,all_samples,RNA_seed1000_fold5_common_samples,2025-10-13 01:52:06
|
| 152 |
+
Multi,0,1,all_samples,0.9959861712864237,0.9038052721088435,0.9078947368421053,0.9387755102040817,0.923076923076923,0.8909952606635071,0.78125,422,100,28,18,276,ckp/Multi_seed0/multi_seed0_fold1.pth,all_samples,Multi_seed0_fold1_all_samples,2025-10-13 01:52:06
|
| 153 |
+
Multi,0,1,common_samples,0.9959861712864237,0.8802083333333334,0.9230769230769231,1.0,0.9600000000000001,0.9375,0.75,32,6,2,0,24,ckp/Multi_seed0/multi_seed0_fold1.pth,all_samples,Multi_seed0_fold1_common_samples,2025-10-13 01:52:06
|
| 154 |
+
Multi,0,2,all_samples,0.9977147657221472,0.919226393629124,0.9303135888501742,0.9112627986348123,0.9206896551724137,0.8909952606635071,0.8449612403100775,422,109,20,26,267,ckp/Multi_seed0/multi_seed0_fold2.pth,all_samples,Multi_seed0_fold2_all_samples,2025-10-13 01:52:06
|
| 155 |
+
Multi,0,2,common_samples,0.9977147657221472,0.8985507246376812,0.9166666666666666,0.9565217391304348,0.9361702127659574,0.90625,0.7777777777777778,32,7,2,1,22,ckp/Multi_seed0/multi_seed0_fold2.pth,all_samples,Multi_seed0_fold2_common_samples,2025-10-13 01:52:06
|
| 156 |
+
Multi,0,3,all_samples,0.9947706900404738,0.9427555321390937,0.9249146757679181,0.928082191780822,0.9264957264957264,0.8981042654028436,0.8307692307692308,422,108,22,21,271,ckp/Multi_seed0/multi_seed0_fold3.pth,all_samples,Multi_seed0_fold3_all_samples,2025-10-13 01:52:06
|
| 157 |
+
Multi,0,3,common_samples,0.9947706900404738,0.949074074074074,0.9583333333333334,0.9583333333333334,0.9583333333333334,0.9393939393939394,0.8888888888888888,33,8,1,1,23,ckp/Multi_seed0/multi_seed0_fold3.pth,all_samples,Multi_seed0_fold3_common_samples,2025-10-13 01:52:06
|
| 158 |
+
Multi,0,4,all_samples,0.9942133304230494,0.9401275233484139,0.93,0.9522184300341296,0.9409780775716695,0.9170616113744076,0.8372093023255814,422,108,21,14,279,ckp/Multi_seed0/multi_seed0_fold4.pth,all_samples,Multi_seed0_fold4_all_samples,2025-10-13 01:52:06
|
| 159 |
+
Multi,0,4,common_samples,0.9942133304230494,0.9490740740740742,0.8888888888888888,1.0,0.9411764705882353,0.9090909090909091,0.6666666666666666,33,6,3,0,24,ckp/Multi_seed0/multi_seed0_fold4.pth,all_samples,Multi_seed0_fold4_common_samples,2025-10-13 01:52:06
|
| 160 |
+
Multi,0,5,all_samples,0.9918892372410509,0.8986692065507845,0.871875,0.9522184300341296,0.9102773246329526,0.8696682464454977,0.6821705426356589,422,88,41,14,279,ckp/Multi_seed0/multi_seed0_fold5.pth,all_samples,Multi_seed0_fold5_all_samples,2025-10-13 01:52:06
|
| 161 |
+
Multi,0,5,common_samples,0.9918892372410509,0.8101851851851852,0.8846153846153846,0.9583333333333334,0.9199999999999999,0.8787878787878788,0.6666666666666666,33,6,3,1,23,ckp/Multi_seed0/multi_seed0_fold5.pth,all_samples,Multi_seed0_fold5_common_samples,2025-10-13 01:52:06
|
| 162 |
+
Multi,6,1,all_samples,0.979498089714853,0.9367559523809523,0.9188311688311688,0.9625850340136054,0.9401993355481727,0.9146919431279621,0.8046875,422,103,25,11,283,ckp/Multi_seed6/best_Multi_seed6_fold1_AUC_0.937.pth,all_samples,Multi_seed6_fold1_all_samples,2025-10-13 01:52:06
|
| 163 |
+
Multi,6,1,common_samples,0.979498089714853,0.84375,0.92,0.9583333333333334,0.9387755102040817,0.90625,0.75,32,6,2,1,23,ckp/Multi_seed6/best_Multi_seed6_fold1_AUC_0.937.pth,all_samples,Multi_seed6_fold1_common_samples,2025-10-13 01:52:06
|
| 164 |
+
Multi,6,2,all_samples,0.9946854247691616,0.9230362198058046,0.8814102564102564,0.9385665529010239,0.9090909090909091,0.8696682464454977,0.7131782945736435,422,92,37,18,275,ckp/Multi_seed6/best_Multi_seed6_fold2_AUC_0.923.pth,all_samples,Multi_seed6_fold2_all_samples,2025-10-13 01:52:06
|
| 165 |
+
Multi,6,2,common_samples,0.9946854247691616,0.9710144927536232,0.92,1.0,0.9583333333333334,0.9375,0.7777777777777778,32,7,2,0,23,ckp/Multi_seed6/best_Multi_seed6_fold2_AUC_0.923.pth,all_samples,Multi_seed6_fold2_common_samples,2025-10-13 01:52:06
|
| 166 |
+
Multi,6,3,all_samples,0.9967289913010371,0.9115121180189673,0.8706624605678234,0.9452054794520548,0.9064039408866994,0.8649289099526066,0.6846153846153846,422,89,41,16,276,ckp/Multi_seed6/best_Multi_seed6_fold3_AUC_0.912.pth,all_samples,Multi_seed6_fold3_all_samples,2025-10-13 01:52:06
|
| 167 |
+
Multi,6,3,common_samples,0.9967289913010371,0.9398148148148148,0.9230769230769231,1.0,0.9600000000000001,0.9393939393939394,0.7777777777777778,33,7,2,0,24,ckp/Multi_seed6/best_Multi_seed6_fold3_AUC_0.912.pth,all_samples,Multi_seed6_fold3_common_samples,2025-10-13 01:52:06
|
| 168 |
+
Multi,6,4,all_samples,0.9915419874593222,0.9445987776807683,0.9038461538461539,0.962457337883959,0.9322314049586776,0.9028436018957346,0.7674418604651163,422,99,30,11,282,ckp/Multi_seed6/best_Multi_seed6_fold4_AUC_0.945.pth,all_samples,Multi_seed6_fold4_all_samples,2025-10-13 01:52:06
|
| 169 |
+
Multi,6,4,common_samples,0.9915419874593222,0.875,0.8846153846153846,0.9583333333333334,0.9199999999999999,0.8787878787878788,0.6666666666666666,33,6,3,1,23,ckp/Multi_seed6/best_Multi_seed6_fold4_AUC_0.945.pth,all_samples,Multi_seed6_fold4_common_samples,2025-10-13 01:52:06
|
| 170 |
+
Multi,6,5,all_samples,0.9950029102838849,0.9193851363864857,0.9347826086956522,0.8805460750853242,0.9068541300527242,0.8744075829383886,0.8604651162790697,422,111,18,35,258,ckp/Multi_seed6/best_Multi_seed6_fold5_AUC_0.919.pth,all_samples,Multi_seed6_fold5_all_samples,2025-10-13 01:52:06
|
| 171 |
+
Multi,6,5,common_samples,0.9950029102838849,0.9537037037037037,0.9166666666666666,0.9166666666666666,0.9166666666666666,0.8787878787878788,0.7777777777777778,33,7,2,2,22,ckp/Multi_seed6/best_Multi_seed6_fold5_AUC_0.919.pth,all_samples,Multi_seed6_fold5_common_samples,2025-10-13 01:52:06
|
| 172 |
+
Multi,42,1,all_samples,0.9918781910351219,0.9233896683673469,0.9009584664536742,0.9591836734693877,0.9291598023064251,0.8981042654028436,0.7578125,422,97,31,12,282,ckp/Multi_seed42/best_Multi_seed42_fold1_AUC_0.923.pth,all_samples,Multi_seed42_fold1_all_samples,2025-10-13 01:52:06
|
| 173 |
+
Multi,42,1,common_samples,0.9918781910351219,0.9166666666666666,0.9230769230769231,1.0,0.9600000000000001,0.9375,0.75,32,6,2,0,24,ckp/Multi_seed42/best_Multi_seed42_fold1_AUC_0.923.pth,all_samples,Multi_seed42_fold1_common_samples,2025-10-13 01:52:06
|
| 174 |
+
Multi,42,2,all_samples,0.989594081540863,0.9052570309812948,0.9081632653061225,0.9112627986348123,0.909710391822828,0.8744075829383886,0.7906976744186046,422,102,27,26,267,ckp/Multi_seed42/best_Multi_seed42_fold2_AUC_0.905.pth,all_samples,Multi_seed42_fold2_all_samples,2025-10-13 01:52:06
|
| 175 |
+
Multi,42,2,common_samples,0.989594081540863,0.9227053140096619,0.9130434782608695,0.9130434782608695,0.9130434782608695,0.875,0.7777777777777778,32,7,2,2,21,ckp/Multi_seed42/best_Multi_seed42_fold2_AUC_0.905.pth,all_samples,Multi_seed42_fold2_common_samples,2025-10-13 01:52:06
|
| 176 |
+
Multi,42,3,all_samples,0.9926667163277298,0.9284246575342465,0.898360655737705,0.9383561643835616,0.9179229480737019,0.8838862559241706,0.7615384615384615,422,99,31,18,274,ckp/Multi_seed42/best_Multi_seed42_fold3_AUC_0.928.pth,all_samples,Multi_seed42_fold3_all_samples,2025-10-13 01:52:06
|
| 177 |
+
Multi,42,3,common_samples,0.9926667163277298,0.9212962962962963,0.8846153846153846,0.9583333333333334,0.9199999999999999,0.8787878787878788,0.6666666666666666,33,6,3,1,23,ckp/Multi_seed42/best_Multi_seed42_fold3_AUC_0.928.pth,all_samples,Multi_seed42_fold3_common_samples,2025-10-13 01:52:06
|
| 178 |
+
Multi,42,4,all_samples,0.9914667500066143,0.9289361589544144,0.9201388888888888,0.9044368600682594,0.9122203098106714,0.8791469194312796,0.8217054263565892,422,106,23,28,265,ckp/Multi_seed42/best_Multi_seed42_fold4_AUC_0.929.pth,all_samples,Multi_seed42_fold4_all_samples,2025-10-13 01:52:06
|
| 179 |
+
Multi,42,4,common_samples,0.9914667500066143,0.949074074074074,0.8888888888888888,1.0,0.9411764705882353,0.9090909090909091,0.6666666666666666,33,6,3,0,24,ckp/Multi_seed42/best_Multi_seed42_fold4_AUC_0.929.pth,all_samples,Multi_seed42_fold4_common_samples,2025-10-13 01:52:06
|
| 180 |
+
Multi,42,5,all_samples,0.9991550255311269,0.9331428420244993,0.9105960264900662,0.9385665529010239,0.9243697478991597,0.8933649289099526,0.7906976744186046,422,102,27,18,275,ckp/Multi_seed42/best_Multi_seed42_fold5_AUC_0.933.pth,all_samples,Multi_seed42_fold5_all_samples,2025-10-13 01:52:06
|
| 181 |
+
Multi,42,5,common_samples,0.9991550255311269,0.9722222222222222,0.8888888888888888,1.0,0.9411764705882353,0.9090909090909091,0.6666666666666666,33,6,3,0,24,ckp/Multi_seed42/best_Multi_seed42_fold5_AUC_0.933.pth,all_samples,Multi_seed42_fold5_common_samples,2025-10-13 01:52:06
|
| 182 |
+
Multi,123,1,all_samples,0.9849753967165891,0.9181813350340136,0.8711656441717791,0.9659863945578231,0.9161290322580644,0.8767772511848341,0.671875,422,86,42,10,284,ckp/Multi_seed123/best_Multi_seed123_fold1_AUC_0.918.pth,all_samples,Multi_seed123_fold1_all_samples,2025-10-13 01:52:06
|
| 183 |
+
Multi,123,1,common_samples,0.9849753967165891,0.75,0.875,0.875,0.875,0.8125,0.625,32,5,3,3,21,ckp/Multi_seed123/best_Multi_seed123_fold1_AUC_0.918.pth,all_samples,Multi_seed123_fold1_common_samples,2025-10-13 01:52:06
|
| 184 |
+
Multi,123,2,all_samples,0.9950343281212795,0.9552080852977749,0.9096774193548387,0.962457337883959,0.9353233830845771,0.9075829383886256,0.7829457364341085,422,101,28,11,282,ckp/Multi_seed123/best_Multi_seed123_fold2_AUC_0.955.pth,all_samples,Multi_seed123_fold2_all_samples,2025-10-13 01:52:06
|
| 185 |
+
Multi,123,2,common_samples,0.9950343281212795,0.9903381642512077,0.92,1.0,0.9583333333333334,0.9375,0.7777777777777778,32,7,2,0,23,ckp/Multi_seed123/best_Multi_seed123_fold2_AUC_0.955.pth,all_samples,Multi_seed123_fold2_common_samples,2025-10-13 01:52:06
|
| 186 |
+
Multi,123,3,all_samples,0.9933106547811189,0.9123551106427819,0.9,0.9246575342465754,0.9121621621621622,0.8767772511848341,0.7692307692307693,422,100,30,22,270,ckp/Multi_seed123/best_Multi_seed123_fold3_AUC_0.912.pth,all_samples,Multi_seed123_fold3_all_samples,2025-10-13 01:52:06
|
| 187 |
+
Multi,123,3,common_samples,0.9933106547811189,0.8611111111111112,0.92,0.9583333333333334,0.9387755102040817,0.9090909090909091,0.7777777777777778,33,7,2,1,23,ckp/Multi_seed123/best_Multi_seed123_fold3_AUC_0.912.pth,all_samples,Multi_seed123_fold3_common_samples,2025-10-13 01:52:06
|
| 188 |
+
Multi,123,4,all_samples,0.9793932058099849,0.9063682302828268,0.8892405063291139,0.9590443686006825,0.922824302134647,0.8886255924170616,0.7286821705426356,422,94,35,12,281,ckp/Multi_seed123/best_Multi_seed123_fold4_AUC_0.906.pth,all_samples,Multi_seed123_fold4_all_samples,2025-10-13 01:52:06
|
| 189 |
+
Multi,123,4,common_samples,0.9793932058099849,0.8935185185185185,0.8275862068965517,1.0,0.9056603773584906,0.8484848484848485,0.4444444444444444,33,4,5,0,24,ckp/Multi_seed123/best_Multi_seed123_fold4_AUC_0.906.pth,all_samples,Multi_seed123_fold4_common_samples,2025-10-13 01:52:06
|
| 190 |
+
Multi,123,5,all_samples,0.9993914860967801,0.9235918194565705,0.9161073825503355,0.931740614334471,0.9238578680203046,0.8933649289099526,0.8062015503875969,422,104,25,20,273,ckp/Multi_seed123/best_Multi_seed123_fold5_AUC_0.924.pth,all_samples,Multi_seed123_fold5_all_samples,2025-10-13 01:52:06
|
| 191 |
+
Multi,123,5,common_samples,0.9993914860967801,0.9537037037037036,0.9230769230769231,1.0,0.9600000000000001,0.9393939393939394,0.7777777777777778,33,7,2,0,24,ckp/Multi_seed123/best_Multi_seed123_fold5_AUC_0.924.pth,all_samples,Multi_seed123_fold5_common_samples,2025-10-13 01:52:06
|
| 192 |
+
Multi,1000,1,all_samples,0.9992913857950106,0.9387223639455782,0.9254237288135593,0.9285714285714286,0.9269949066213922,0.8981042654028436,0.828125,422,106,22,21,273,ckp/Multi_seed1000/best_Multi_seed1000_fold1_AUC_0.939.pth,all_samples,Multi_seed1000_fold1_all_samples,2025-10-13 01:52:06
|
| 193 |
+
Multi,1000,1,common_samples,0.9992913857950106,0.875,0.92,0.9583333333333334,0.9387755102040817,0.90625,0.75,32,6,2,1,23,ckp/Multi_seed1000/best_Multi_seed1000_fold1_AUC_0.939.pth,all_samples,Multi_seed1000_fold1_common_samples,2025-10-13 01:52:06
|
| 194 |
+
Multi,1000,2,all_samples,0.9862505622139324,0.9068444585549118,0.871875,0.9522184300341296,0.9102773246329526,0.8696682464454977,0.6821705426356589,422,88,41,14,279,ckp/Multi_seed1000/best_Multi_seed1000_fold2_AUC_0.907.pth,all_samples,Multi_seed1000_fold2_all_samples,2025-10-13 01:52:06
|
| 195 |
+
Multi,1000,2,common_samples,0.9862505622139324,0.9855072463768116,0.9166666666666666,0.9565217391304348,0.9361702127659574,0.90625,0.7777777777777778,32,7,2,1,22,ckp/Multi_seed1000/best_Multi_seed1000_fold2_AUC_0.907.pth,all_samples,Multi_seed1000_fold2_common_samples,2025-10-13 01:52:06
|
| 196 |
+
Multi,1000,3,all_samples,0.9946299836946175,0.8979847207586934,0.8996655518394648,0.9212328767123288,0.910321489001692,0.8744075829383886,0.7692307692307693,422,100,30,23,269,ckp/Multi_seed1000/best_Multi_seed1000_fold3_AUC_0.898.pth,all_samples,Multi_seed1000_fold3_all_samples,2025-10-13 01:52:06
|
| 197 |
+
Multi,1000,3,common_samples,0.9946299836946175,0.9351851851851851,0.92,0.9583333333333334,0.9387755102040817,0.9090909090909091,0.7777777777777778,33,7,2,1,23,ckp/Multi_seed1000/best_Multi_seed1000_fold3_AUC_0.898.pth,all_samples,Multi_seed1000_fold3_common_samples,2025-10-13 01:52:06
|
| 198 |
+
Multi,1000,4,all_samples,0.9942736857422546,0.9377463819879884,0.9261744966442953,0.9419795221843004,0.934010152284264,0.9075829383886256,0.8294573643410853,422,107,22,17,276,ckp/Multi_seed1000/best_Multi_seed1000_fold4_AUC_0.938.pth,all_samples,Multi_seed1000_fold4_all_samples,2025-10-13 01:52:06
|
| 199 |
+
Multi,1000,4,common_samples,0.9942736857422546,0.962962962962963,0.9230769230769231,1.0,0.9600000000000001,0.9393939393939394,0.7777777777777778,33,7,2,0,24,ckp/Multi_seed1000/best_Multi_seed1000_fold4_AUC_0.938.pth,all_samples,Multi_seed1000_fold4_common_samples,2025-10-13 01:52:06
|
| 200 |
+
Multi,1000,5,all_samples,0.997084755403868,0.9168187951424714,0.9090909090909091,0.9215017064846417,0.9152542372881356,0.8815165876777251,0.7906976744186046,422,102,27,23,270,ckp/Multi_seed1000/best_Multi_seed1000_fold5_AUC_0.917.pth,all_samples,Multi_seed1000_fold5_all_samples,2025-10-13 01:52:06
|
| 201 |
+
Multi,1000,5,common_samples,0.997084755403868,0.9398148148148149,0.9230769230769231,1.0,0.9600000000000001,0.9393939393939394,0.7777777777777778,33,7,2,0,24,ckp/Multi_seed1000/best_Multi_seed1000_fold5_AUC_0.917.pth,all_samples,Multi_seed1000_fold5_common_samples,2025-10-13 01:52:06
|
analysis/re_all_48.tsv
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Category Term Count % PValue Genes List Total Pop Hits Pop Total Fold Enrichment Bonferroni Benjamini FDR
|
| 2 |
+
GOTERM_CC_DIRECT GO:0005615~extracellular space 16 34.04255319148936 4.663616024540596E-8 "FGB, SERPINB11, IGFBP4, IGFBP2, APOA1, NDNF, MSLN, KRT18, S100A16, S100A13, TIMP3, DPEP1, APOE, ANGPTL4, APOB, RAMP1" 47 1809 29722 5.593215953330275 6.0626825979515075E-6 6.062700831902774E-6 5.269886107730873E-6
|
| 3 |
+
GOTERM_BP_DIRECT GO:0071402~cellular response to lipoprotein particle stimulus 3 6.382978723404255 1.3431633526269892E-5 "APOA1, APOE, APOB" 46 4 29712 484.4347826086956 0.006680019 0.003278001 0.003231925
|
| 4 |
+
GOTERM_BP_DIRECT GO:0070328~triglyceride homeostasis 4 8.510638298 1.7157082277104606E-5 "HNF4A, APOA1, ANGPTL4, APOE" 46 33 29712 78.29249011857706 0.008524913 0.003278001 0.003231925
|
| 5 |
+
GOTERM_BP_DIRECT GO:0042632~cholesterol homeostasis 5 10.638297872340425 2.2737647712678613E-5 "HNF4A, EPHX2, APOA1, APOE, APOB" 46 109 29712 29.629038691663343 0.011282089659063299 0.003278001 0.003231925
|
| 6 |
+
GOTERM_BP_DIRECT GO:0006629~lipid metabolic process 6 12.76595744680851 2.632932918372177E-5 "HNF4A, MGST2, DPEP1, ANGPTL4, APOE, APOB" 46 232 29712 16.70464767616192 0.013052574785678228 0.003278001 0.003231925
|
| 7 |
+
GOTERM_BP_DIRECT GO:0042158~lipoprotein biosynthetic process 3 6.382978723404255 3.3514330785070164E-5 "APOA1, APOE, APOB" 46 6 29712 322.9565217391304 0.016584862706917503 0.003338027 0.003291107
|
| 8 |
+
REACTOME_PATHWAY R-MMU-381426~Regulation of IGF transport and uptake by IGFBPs 6 12.76595744680851 4.499995756510954E-5 "IGFBP4, IGFBP2, APOA1, APOE, APOB, MSLN" 32 121 9277 14.37551652892562 0.006682715 0.006749994 0.006164994
|
| 9 |
+
GOTERM_CC_DIRECT GO:0034363~intermediate-density lipoprotein particle 3 6.382978723404255 4.8967094457451444E-5 "APOA1, APOE, APOB" 47 7 29722 271.02127659574467 0.006345659 0.003182861 0.002766641
|
| 10 |
+
REACTOME_PATHWAY R-MMU-8963899~Plasma lipoprotein remodeling 4 8.510638298 9.360819206260126E-5 "APOA1, ANGPTL4, APOE, APOB" 32 27 9277 42.949074074074076 0.013851447007621642 0.007020614 0.006412161
|
| 11 |
+
GOTERM_CC_DIRECT GO:0016324~apical plasma membrane 7 14.893617021276595 9.972581619651291E-5 "UPK1B, IGFBP2, DPEP1, CD9, SORBS2, ATP1B1, EZR" 47 486 29722 9.108396812888538 0.012881318986070145 0.004321452 0.003756339
|
| 12 |
+
KEGG_PATHWAY mmu04979:Cholesterol metabolism 4 8.510638298 1.6730513704172898E-4 "APOA1, ANGPTL4, APOE, APOB" 22 50 9565 34.78181818181818 0.016592712395294584 0.016730514 0.016563208567131168
|
| 13 |
+
GOTERM_CC_DIRECT GO:0042627~chylomicron 3 6.382978723404255 1.8080415559233024E-4 "APOA1, APOE, APOB" 47 13 29722 145.9345335515548 0.023232536 0.005876135 0.005107717
|
| 14 |
+
GOTERM_CC_DIRECT GO:0034362~low-density lipoprotein particle 3 6.382978723404255 2.4291054284262555E-4 "APOA1, APOE, APOB" 47 15 29722 126.47659574468085 0.031088698 0.006315674 0.005489778
|
| 15 |
+
REACTOME_PATHWAY R-MMU-975634~Retinoid metabolism and transport 4 8.510638298 3.5513433187466735E-4 "RBP1, APOA1, APOE, APOB" 32 42 9277 27.610119047619047 0.0515483 0.010937765361186881 0.009989826
|
| 16 |
+
GOTERM_CC_DIRECT GO:0005902~microvillus 4 8.510638298 4.213662153132071E-4 "FOXA1, STARD10, CALML4, EZR" 47 94 29722 26.909913988229967 0.053315266 0.009129601 0.00793573
|
| 17 |
+
REACTOME_PATHWAY R-MMU-6806667~Metabolism of fat-soluble vitamins 4 8.510638298 4.6536661052986125E-4 "RBP1, APOA1, APOE, APOB" 32 46 9277 25.209239130434785 0.067005305 0.010937765361186881 0.009989826
|
| 18 |
+
REACTOME_PATHWAY R-MMU-3000480~Scavenging by Class A Receptors 3 6.382978723404255 4.782798164994853E-4 "APOA1, APOE, APOB" 32 10 9277 86.971875 0.06879957 0.010937765361186881 0.009989826
|
| 19 |
+
REACTOME_PATHWAY R-MMU-8957275~Post-translational protein phosphorylation 5 10.638297872340425 5.445284931620396E-4 "IGFBP4, APOA1, APOE, APOB, MSLN" 32 115 9277 12.60461956521739 0.077950937 0.010937765361186881 0.009989826
|
| 20 |
+
REACTOME_PATHWAY R-MMU-8963888~Chylomicron assembly 3 6.382978723404255 5.83347485929967E-4 "APOA1, APOE, APOB" 32 11 9277 79.06534091 0.083271695 0.010937765361186881 0.009989826
|
| 21 |
+
REACTOME_PATHWAY R-MMU-8963901~Chylomicron remodeling 3 6.382978723404255 5.83347485929967E-4 "APOA1, APOE, APOB" 32 11 9277 79.06534091 0.083271695 0.010937765361186881 0.009989826
|
| 22 |
+
GOTERM_MF_DIRECT GO:0120020~cholesterol transfer activity 3 6.382978723404255 6.391569962462111E-4 "APOA1, APOE, APOB" 46 24 28924 78.59782608695652 0.10356616276729458 0.064222692 0.060091407
|
| 23 |
+
GOTERM_BP_DIRECT GO:0006869~lipid transport 4 8.510638298 7.119332909555852E-4 "STARD10, APOA1, APOE, APOB" 46 115 29712 22.466540642722116 0.29909381494470433 0.059090463 0.058259874309865387
|
| 24 |
+
GOTERM_MF_DIRECT GO:0005319~lipid transporter activity 3 6.382978723404255 7.511425919989355E-4 "APOA1, APOE, APOB" 46 26 28924 72.55183946488295 0.12058085135426433 0.064222692 0.060091407
|
| 25 |
+
GOTERM_CC_DIRECT GO:0034361~very-low-density lipoprotein particle 3 6.382978723404255 8.02467227887457E-4 "APOA1, APOE, APOB" 47 27 29722 70.26477541371158 0.099101445 0.014287577651110425 0.012419202112119061
|
| 26 |
+
GOTERM_CC_DIRECT GO:0071944~cell periphery 4 8.510638298 8.792355477606416E-4 "KRT19, KRT18, KRT8, EZR" 47 121 29722 20.905222437137333 0.10805508185429236 0.014287577651110425 0.012419202112119061
|
| 27 |
+
GOTERM_CC_DIRECT GO:0034364~high-density lipoprotein particle 3 6.382978723404255 9.915764026191033E-4 "APOA1, APOE, APOB" 47 30 29722 63.238297872340425 0.12099868401909286 0.014322770260053715 0.012449792610662074
|
| 28 |
+
GOTERM_BP_DIRECT GO:0042157~lipoprotein metabolic process 3 6.382978723404255 0.001014208 "APOA1, APOE, APOB" 46 31 29712 62.507713884992995 0.3973066691945585 0.071532922 0.070527439
|
| 29 |
+
GOTERM_BP_DIRECT GO:0033344~cholesterol efflux 3 6.382978723404255 0.001220065 "APOA1, APOE, APOB" 46 34 29712 56.99232737 0.4562056575077009 0.071532922 0.070527439
|
| 30 |
+
GOTERM_BP_DIRECT GO:2000352~negative regulation of endothelial cell apoptotic process 3 6.382978723404255 0.001292764 "FGB, NDNF, ANGPTL4" 46 35 29712 55.36397516 0.475603075 0.071532922 0.070527439
|
| 31 |
+
REACTOME_PATHWAY "R-MMU-174824~Plasma lipoprotein assembly, remodeling, and clearance" 4 8.510638298 0.001340879 "APOA1, ANGPTL4, APOE, APOB" 32 66 9277 17.570075757575758 0.18120790705085665 0.022347988 0.020411162549310708
|
| 32 |
+
GOTERM_CC_DIRECT GO:0042383~sarcolemma 4 8.510638298 0.001509937 "KRT19, KRT8, ANXA8, ATP1B1" 47 146 29722 17.325561060915184 0.17834954313428197 0.018671911 0.016230199410954965
|
| 33 |
+
GOTERM_CC_DIRECT GO:0005856~cytoskeleton 6 12.76595744680851 0.00158742 "ACTA1, KRT19, KRT18, SORBS2, FRMD4B, EZR" 47 552 29722 6.873728029602219 0.186596997 0.018671911 0.016230199410954965
|
| 34 |
+
GOTERM_CC_DIRECT GO:0005829~cytosol 16 34.04255319148936 0.001723561 "BEX4, GSS, EPHX2, STARD10, APOA1, KRT18, S100A16, HNF4A, RBP1, S100A13, PIR, ANXA8, APOE, 2200002D01RIK, APOB, EZR" 47 4363 29722 2.319075787204783 0.2008896600123118 0.018671911 0.016230199410954965
|
| 35 |
+
GOTERM_MF_DIRECT GO:0042803~protein homodimerization activity 7 14.893617021276595 0.001762493 "S100A16, HNF4A, GSS, EPHX2, S100A13, APOA1, APOE" 46 834 28924 5.277551871546241 0.26040470565960694 0.10046207797952855 0.093999605
|
| 36 |
+
REACTOME_PATHWAY R-MMU-8963898~Plasma lipoprotein assembly 3 6.382978723404255 0.001783723 "APOA1, APOE, APOB" 32 19 9277 45.77467105263158 0.23357015038480022 0.026755851 0.024437010768742114
|
| 37 |
+
GOTERM_BP_DIRECT GO:0055088~lipid homeostasis 3 6.382978723404255 0.003279104 "HNF4A, RBP1, APOE" 46 56 29712 34.60248447204969 0.8058184128002348 0.14845398213697608 0.14636727957681778
|
| 38 |
+
GOTERM_BP_DIRECT GO:0031623~receptor internalization 3 6.382978723404255 0.003279104 "CD9, EZR, RAMP1" 46 56 29712 34.60248447204969 0.8058184128002348 0.14845398213697608 0.14636727957681778
|
| 39 |
+
REACTOME_PATHWAY R-MMU-2187338~Visual phototransduction 4 8.510638298 0.003364668 "RBP1, APOA1, APOE, APOB" 32 91 9277 12.743131868131869 0.3947906556263795 0.045881838 0.041905411759735486
|
| 40 |
+
GOTERM_MF_DIRECT GO:0005102~signaling receptor binding 5 10.638297872340425 0.00384581 "FGB, BEX1, HNF4A, APOA1, APOE" 46 413 28924 7.612380251 0.4825805306247958 0.16440839053758885 0.15383241219891353
|
| 41 |
+
KEGG_PATHWAY mmu05418:Fluid shear stress and atherosclerosis 4 8.510638298 0.003935318 "GSTA4, DUSP1, MGST2, CALML4" 22 148 9565 11.75061425061425 0.32585374062731987 0.19676590780176212 0.1947982487237445
|
| 42 |
+
GOTERM_CC_DIRECT GO:0034365~discoidal high-density lipoprotein particle 2 4.255319149 0.004635999 "APOA1, APOE" 47 3 29722 421.58865248226954 0.4534236118352485 0.046359989 0.040297529
|
| 43 |
+
GOTERM_MF_DIRECT GO:0008289~lipid binding 4 8.510638298 0.005375197 "STARD10, S100A13, APOA1, APOE" 46 228 28924 11.031273836765827 0.6021331379269204 0.1838317259373572 0.17200629327472017
|
| 44 |
+
REACTOME_PATHWAY R-MMU-8964043~Plasma lipoprotein clearance 3 6.382978723404255 0.006343615 "APOA1, APOE, APOB" 32 36 9277 24.158854166666668 0.6125658275230705 0.079295184 0.072422935
|
| 45 |
+
GOTERM_BP_DIRECT GO:0055090~acylglycerol homeostasis 2 4.255319149 0.007550301 "APOA1, APOE" 46 5 29712 258.3652173913043 0.9772205273239976 0.26898569617640505 0.2652047727361745
|
| 46 |
+
GOTERM_BP_DIRECT GO:1902995~positive regulation of phospholipid efflux 2 4.255319149 0.007550301 "APOA1, APOE" 46 5 29712 258.3652173913043 0.9772205273239976 0.26898569617640505 0.2652047727361745
|
| 47 |
+
GOTERM_BP_DIRECT GO:0007596~blood coagulation 3 6.382978723404255 0.007561847 "HNF4A, ANXA8, ANGPTL4" 46 86 29712 22.53185035389282 0.9773523846967987 0.26898569617640505 0.2652047727361745
|
| 48 |
+
GOTERM_MF_DIRECT GO:0060228~phosphatidylcholine-sterol O-acyltransferase activator activity 2 4.255319149 0.007755374 "APOA1, APOE" 46 5 28924 251.51304347826084 0.7358764214212566 0.19401925415601676 0.18153848342083442
|
| 49 |
+
GOTERM_MF_DIRECT GO:0005515~protein binding 17 36.17021276595745 0.007942309 "BEX1, IGFBP4, TMEM176A, KRT8, APOA1, SORBS2, FRMD4B, MSLN, ATP1B1, PLAC8, ACTA1, KRT19, KRT18, HNF4A, CD9, APOE, EZR" 46 5596 28924 1.9101687540790004 0.7442505105038177 0.19401925415601676 0.18153848342083442
|
| 50 |
+
REACTOME_PATHWAY R-MMU-114608~Platelet degranulation 4 8.510638298 0.008332442 "FGB, TIMP3, APOA1, CD9" 32 126 9277 9.203373015873016 0.7125583342810163 0.096143567 0.087811124
|
| 51 |
+
GOTERM_BP_DIRECT GO:0060706~cell differentiation involved in embryonic placenta development 2 4.255319149 0.009053658 "KRT19, KRT8" 46 6 29712 215.30434782608694 0.9893089785197537 0.281795117 0.27783414098302806
|
| 52 |
+
GOTERM_BP_DIRECT GO:0034380~high-density lipoprotein particle assembly 2 4.255319149 0.009053658 "APOA1, APOE" 46 6 29712 215.30434782608694 0.9893089785197537 0.281795117 0.27783414098302806
|
| 53 |
+
GOTERM_CC_DIRECT GO:0009986~cell surface 6 12.76595744680851 0.009063279 "FGB, APOA1, CD9, APOE, MSLN, RAMP1" 47 833 29722 4.554979438583944 0.6938248720271083 0.084159018 0.073153608
|
| 54 |
+
REACTOME_PATHWAY R-MMU-76005~Response to elevated platelet cytosolic Ca2+ 4 8.510638298 0.009268231 "FGB, TIMP3, APOA1, CD9" 32 131 9277 8.852099236641221 0.7502776882727377 0.09930247 0.090696256
|
| 55 |
+
GOTERM_BP_DIRECT GO:0008203~cholesterol metabolic process 3 6.382978723404255 0.010299088947230067 "APOA1, APOE, APOB" 46 101 29712 19.185535944898838 0.9942920382543627 0.29201582770897955 0.28791118756045975
|
| 56 |
+
GOTERM_BP_DIRECT GO:0042159~lipoprotein catabolic process 2 4.255319149 0.010554789 "APOE, APOB" 46 7 29712 184.54658385093165 0.9949825428698913 0.29201582770897955 0.28791118756045975
|
| 57 |
+
GOTERM_CC_DIRECT GO:0005882~intermediate filament 3 6.382978723404255 0.010736695583574613 "KRT19, KRT18, KRT8" 47 101 29722 18.783652833368443 0.7542197600415235 0.093051362 0.080883107
|
| 58 |
+
KEGG_PATHWAY mmu00480:Glutathione metabolism 3 6.382978723404255 0.010985222690159977 "GSTA4, GSS, MGST2" 22 73 9565 17.867372353673723 0.6686567991786101 0.2892295889407606 0.286337293
|
| 59 |
+
KEGG_PATHWAY mmu04971:Gastric acid secretion 3 6.382978723404255 0.011569183557630425 "CALML4, ATP1B1, EZR" 22 75 9565 17.39090909090909 0.6876599324114097 0.2892295889407606 0.286337293
|
| 60 |
+
GOTERM_BP_DIRECT GO:0010628~positive regulation of gene expression 5 10.638297872340425 0.011780649958481128 "ACTA1, HNF4A, EPHX2, APOB, EZR" 46 586 29712 5.511203442647277 0.9972971411699633 0.29339524474205136 0.2892712151974844
|
| 61 |
+
GOTERM_BP_DIRECT GO:0032489~regulation of Cdc42 protein signal transduction 2 4.255319149 0.012053696023619767 "APOA1, APOE" 46 8 29712 161.47826086956522 0.9976452919411164 0.29339524474205136 0.2892712151974844
|
| 62 |
+
GOTERM_MF_DIRECT GO:0031995~insulin-like growth factor II binding 2 4.255319149 0.012380337288475646 "IGFBP4, IGFBP2" 46 8 28924 157.19565217391303 0.8811926104424386 0.2234078151622103 0.20903655219855938
|
| 63 |
+
REACTOME_PATHWAY R-MMU-3000471~Scavenging by Class B Receptors 2 4.255319149 0.013301681639626462 "APOA1, APOB" 32 4 9277 144.953125 0.8640203820754289 0.13301681639626461 0.12148869230858834
|
| 64 |
+
GOTERM_BP_DIRECT GO:1905920~positive regulation of CoA-transferase activity 2 4.255319149 0.013550383 "APOA1, APOE" 46 9 29712 143.53623188405797 0.9988949564325077 0.29339524474205136 0.2892712151974844
|
| 65 |
+
GOTERM_BP_DIRECT GO:1903753~negative regulation of p38MAPK cascade 2 4.255319149 0.013550383 "DUSP1, EZR" 46 9 29712 143.53623188405797 0.9988949564325077 0.29339524474205136 0.2892712151974844
|
| 66 |
+
GOTERM_BP_DIRECT GO:0043567~regulation of insulin-like growth factor receptor signaling pathway 2 4.255319149 0.013550383 "IGFBP4, IGFBP2" 46 9 29712 143.53623188405797 0.9988949564325077 0.29339524474205136 0.2892712151974844
|
| 67 |
+
GOTERM_MF_DIRECT GO:0071813~lipoprotein particle binding 2 4.255319149 0.013917302457922998 "APOA1, APOE" 46 9 28924 139.7294685990338 0.9089707687621291 0.2234078151622103 0.20903655219855938
|
| 68 |
+
GOTERM_MF_DIRECT GO:0042802~identical protein binding 9 19.148936170212767 0.014445321808808755 "S100A16, GSS, MGST2, APOA1, SORBS2, ANGPTL4, APOE, EZR, IFI27L2B" 46 2094 28924 2.7025040488351815 0.9169377358336714 0.2234078151622103 0.20903655219855938
|
| 69 |
+
GOTERM_MF_DIRECT GO:0019904~protein domain specific binding 4 8.510638298 0.015141379454204905 "FOXA1, HNF4A, SORBS2, EZR" 46 334 28924 7.530330643061703 0.9263902562792856 0.2234078151622103 0.20903655219855938
|
| 70 |
+
GOTERM_CC_DIRECT GO:0044297~cell body 3 6.382978723404255 0.015853359283249914 "ACTA1, RBP1, EZR" 47 124 29722 15.299588194921068 0.8747505373027604 0.12880854417640555 0.11196434993795251
|
| 71 |
+
KEGG_PATHWAY mmu03320:PPAR signaling pathway 3 6.382978723404255 0.016028100085975338 "ACSL1, APOA1, ANGPTL4" 22 89 9565 14.655260469867212 0.8012671728700713 0.3205620017195068 0.31735638170231173
|
| 72 |
+
GOTERM_MF_DIRECT GO:0005543~phospholipid binding 3 6.382978723404255 0.016248545272126637 "APOA1, APOE, APOB" 46 125 28924 15.090782608695651 0.93927028 0.2234078151622103 0.20903655219855938
|
| 73 |
+
GOTERM_BP_DIRECT GO:0006750~glutathione biosynthetic process 2 4.255319149 0.016537108 "GSS, MGST2" 46 11 29712 117.43873517786561 0.9997566502791231 0.3232287937051377 0.3186854170867924
|
| 74 |
+
GOTERM_BP_DIRECT GO:0090205~positive regulation of cholesterol metabolic process 2 4.255319149 0.016537108 "APOA1, APOE" 46 11 29712 117.43873517786561 0.9997566502791231 0.3232287937051377 0.3186854170867924
|
| 75 |
+
REACTOME_PATHWAY R-MMU-8964026~Chylomicron clearance 2 4.255319149 0.016600252530351765 "APOE, APOB" 32 5 9277 115.96249999999999 0.9174373218398605 0.1556273674720478 0.142139662
|
| 76 |
+
GOTERM_BP_DIRECT GO:0007219~Notch signaling pathway 3 6.382978723404255 0.016875399 "FOXA1, KRT19, SORBS2" 46 131 29712 14.791901759044142 0.9997950380142201 0.3232287937051377 0.3186854170867924
|
| 77 |
+
GOTERM_CC_DIRECT GO:1903561~extracellular vesicle 2 4.255319149 0.016896115321687888 "APOA1, APOE" 47 11 29722 114.97872340425532 0.8908751686921086 0.12920558775408386 0.11230947243239597
|
| 78 |
+
GOTERM_MF_DIRECT GO:0005520~insulin-like growth factor binding 2 4.255319149 0.01698422 "IGFBP4, IGFBP2" 46 11 28924 114.32411067193675 0.9465627982585717 0.2234078151622103 0.20903655219855938
|
| 79 |
+
GOTERM_BP_DIRECT GO:0043066~negative regulation of apoptotic process 5 10.638297872340425 0.018037932391729884 "PLAC8, KRT18, DUSP1, DPEP1, ANGPTL4" 46 666 29712 4.849197023110066 0.9998864306543682 0.3326996418919067 0.3280231409014583
|
| 80 |
+
GOTERM_CC_DIRECT GO:0005576~extracellular region 8 17.02127659574468 0.018787582756394895 "IGFBP4, IGFBP2, TIMP3, APOA1, NDNF, ANGPTL4, APOE, APOB" 47 1780 29722 2.842170690891704 0.9150438890190896 0.13568809768507425 0.11794426952625683
|
| 81 |
+
GOTERM_BP_DIRECT GO:0034374~low-density lipoprotein particle remodeling 2 4.255319149 0.019514991027873532 "APOE, APOB" 46 13 29712 99.37123745819397 0.9999464156436694 0.3470880547100364 0.342209307
|
| 82 |
+
GOTERM_MF_DIRECT GO:0031994~insulin-like growth factor I binding 2 4.255319149 0.02004181 "IGFBP4, IGFBP2" 46 13 28924 96.73578595317726 0.9686317330025632 0.24479639195869982 0.22904925563387118
|
| 83 |
+
GOTERM_MF_DIRECT GO:0044877~protein-containing complex binding 5 10.638297872340425 0.021788815279992987 "KRT19, HNF4A, KRT8, APOE, EZR" 46 687 28924 4.576292639706347 0.9768804842486836 0.24839249419192005 0.23241402965325852
|
| 84 |
+
GOTERM_BP_DIRECT GO:0033700~phospholipid efflux 2 4.255319149 0.022484057 "APOA1, APOE" 46 15 29712 86.12173913043478 0.9999882022037042 0.386105525 0.38067833891367864
|
| 85 |
+
REACTOME_PATHWAY R-MMU-9709957~Sensory Perception 4 8.510638298 0.022959227 "RBP1, APOA1, APOE, APOB" 32 184 9277 6.302309782608696 0.9685956035032179 0.1967871190120001 0.1797322353642934
|
| 86 |
+
GOTERM_CC_DIRECT GO:0030018~Z disc 3 6.382978723404255 0.023215134 "KRT19, KRT8, SORBS2" 47 152 29722 12.48124300111982 0.9528089266310974 0.15884039360273036 0.13806895751621948
|
| 87 |
+
REACTOME_PATHWAY R-MMU-196854~Metabolism of vitamins and cofactors 4 8.510638298 0.023614454 "RBP1, APOA1, APOE, APOB" 32 186 9277 6.234543010752688 0.9715828910914607 0.1967871190120001 0.1797322353642934
|
| 88 |
+
GOTERM_CC_DIRECT GO:0016010~dystrophin-associated glycoprotein complex 2 4.255319149 0.024483538975750146 "KRT19, KRT8" 47 16 29722 79.04787234042553 0.960143581 0.15914300334237597 0.13833199521298833
|
| 89 |
+
GOTERM_BP_DIRECT GO:0090181~regulation of cholesterol metabolic process 2 4.255319149 0.025444330723180475 "EPHX2, APOE" 46 17 29712 75.98976982097187 0.9999974027158138 0.41397828215724164 0.40815931031969005
|
| 90 |
+
GOTERM_MF_DIRECT GO:0015643~toxic substance binding 2 4.255319149 0.026129118460428674 "GSTA4, EPHX2" 46 17 28924 73.97442455242967 0.9891922296548761 0.27925495354583146 0.26129118460428674
|
| 91 |
+
GOTERM_BP_DIRECT GO:0034375~high-density lipoprotein particle remodeling 2 4.255319149 0.026921178849089584 "APOA1, APOE" 46 18 29712 71.76811594202898 0.9999987813972694 0.41397828215724164 0.40815931031969005
|
| 92 |
+
GOTERM_BP_DIRECT GO:0055091~phospholipid homeostasis 2 4.255319149 0.026921178849089584 "HNF4A, APOA1" 46 18 29712 71.76811594202898 0.9999987813972694 0.41397828215724164 0.40815931031969005
|
| 93 |
+
GOTERM_CC_DIRECT GO:0005737~cytoplasm 17 36.17021276595745 0.027089245251105277 "BEX4, SERPINB11, BEX1, DUSP1, KRT8, FRMD4B, CALML4, SPINT2, ACTA1, KRT18, S100A16, HNF4A, S100A13, PIR, ANXA8, APOB, EZR" 47 6408 29722 1.6776701994846868 0.9718492126874074 0.16769532774493742 0.14576593873213792
|
| 94 |
+
GOTERM_BP_DIRECT GO:0072659~protein localization to plasma membrane 3 6.382978723404255 0.027432296 "ATP1B1, EZR, RAMP1" 46 170 29712 11.398465473145778 0.9999990624333748 0.41397828215724164 0.40815931031969005
|
| 95 |
+
GOTERM_BP_DIRECT GO:0043691~reverse cholesterol transport 2 4.255319149 0.029868313177000345 "APOA1, APOE" 46 20 29712 64.59130434782608 0.9999997317657274 0.4374829400631227 0.43133358146785794
|
| 96 |
+
GOTERM_CC_DIRECT GO:0043034~costamere 2 4.255319149 0.030512205131596002 "KRT19, KRT8" 47 20 29722 63.238297872340425 0.9821965167408635 0.18029939395943093 0.15672178090319766
|
| 97 |
+
GOTERM_MF_DIRECT GO:0043295~glutathione binding 2 4.255319149 0.030670321 "GSS, MGST2" 46 20 28924 62.87826086956521 0.9951400898727204 0.30100541018029503 0.28164248905758604
|
| 98 |
+
GOTERM_BP_DIRECT GO:0097284~hepatocyte apoptotic process 2 4.255319149 0.031338606 "KRT18, KRT8" 46 21 29712 61.51552795031056 0.999999874 0.4459035898618767 0.43963586871923993
|
| 99 |
+
GOTERM_MF_DIRECT GO:0008201~heparin binding 3 6.382978723404255 0.031684780018978426 "NDNF, APOE, APOB" 46 179 28924 10.538256011658975 0.9959368239172737 0.30100541018029503 0.28164248905758604
|
| 100 |
+
REACTOME_PATHWAY R-MMU-8964058~HDL remodeling 2 4.255319149 0.032933683 "APOA1, APOE" 32 10 9277 57.981249999999996 0.9931925837731957 0.2576843899493848 0.23535174282043808
|
| 101 |
+
KEGG_PATHWAY mmu04915:Estrogen signaling pathway 3 6.382978723404255 0.034381445 "KRT19, KRT18, CALML4" 22 134 9565 9.733717775 0.9697615004494204 0.5730240852231167 0.5672938443708855
|
| 102 |
+
GOTERM_CC_DIRECT GO:0005886~plasma membrane 16 34.04255319148936 0.03515941 "MGST2, SORBS2, SPINT2, AIG1, MSLN, ATP1B1, KRT19, S100A16, S100A13, DPEP1, CD9, ANXA8, APOE, 2200002D01RIK, EZR, RAMP1" 47 6054 29722 1.6713127947760933 0.990467133 0.19872709854747708 0.17273970873742236
|
| 103 |
+
REACTOME_PATHWAY R-MMU-2173782~Binding and Uptake of Ligands by Scavenger Receptors 3 6.382978723404255 0.035346598 "APOA1, APOE, APOB" 32 89 9277 9.772120786516854 0.9953083704498519 0.2576843899493848 0.23535174282043808
|
| 104 |
+
REACTOME_PATHWAY R-MMU-6809371~Formation of the cornified envelope 3 6.382978723404255 0.036075815 "KRT19, KRT18, KRT8" 32 90 9277 9.663541666666665 0.9958083145176094 0.2576843899493848 0.23535174282043808
|
| 105 |
+
GOTERM_BP_DIRECT GO:0030301~cholesterol transport 2 4.255319149 0.037198019 "APOA1, APOB" 46 25 29712 51.673043478260865 0.999999994 0.5145725915032304 0.5073396434298918
|
| 106 |
+
GOTERM_MF_DIRECT GO:0061629~RNA polymerase II-specific DNA-binding transcription factor binding 3 6.382978723404255 0.039129978779947616 "FOXA1, BEX1, HNF4A" 46 201 28924 9.384815055158988 0.9989144393685045 0.35216980901952855 0.3295156107785062
|
| 107 |
+
REACTOME_PATHWAY R-MMU-9707616~Heme signaling 2 4.255319149 0.042607394659846705 "APOA1, APOB" 32 13 9277 44.60096153846154 0.9984779803905748 0.2905049635898639 0.26532786674540904
|
| 108 |
+
GOTERM_CC_DIRECT GO:0048471~perinuclear region of cytoplasm 5 10.638297872340425 0.042648795 "KRT18, S100A16, S100A13, SORBS2, EZR" 47 854 29722 3.7024764562260204 0.9965384748157866 0.22850752460083787 0.19862577138380522
|
| 109 |
+
GOTERM_BP_DIRECT GO:0048844~artery morphogenesis 2 4.255319149 0.043022771126108805 "APOE, APOB" 46 29 29712 44.54572713643178 0.9999999997049468 0.5790632438054645 0.5709238006194439
|
| 110 |
+
GOTERM_CC_DIRECT GO:0016327~apicolateral plasma membrane 2 4.255319149 0.043943755 "KRT19, KRT8" 47 29 29722 43.61261922230374 0.9970969964780505 0.22850752460083787 0.19862577138380522
|
| 111 |
+
GOTERM_MF_DIRECT GO:0050750~low-density lipoprotein particle receptor binding 2 4.255319149 0.044170053263193135 "APOE, APOB" 46 29 28924 43.36431784107946 0.9995583398030792 0.37765395540030133 0.3533604261055451
|
| 112 |
+
REACTOME_PATHWAY R-MMU-109582~Hemostasis 6 12.76595744680851 0.047050720844276105 "FGB, TIMP3, APOA1, CD9, APOB, ATP1B1" 32 601 9277 2.8942387687188025 0.9992389764514836 0.30685252724527895 0.2802586415506881
|
| 113 |
+
GOTERM_CC_DIRECT GO:0005635~nuclear envelope 3 6.382978723404255 0.048634109 "MGST2, APOE, IFI27L2B" 47 228 29722 8.320828667413213 0.9984682603589135 0.2431705471484874 0.21137132175214673
|
| 114 |
+
GOTERM_MF_DIRECT GO:0004364~glutathione transferase activity 2 4.255319149 0.050110762 "GSTA4, MGST2" 46 33 28924 38.108036890645586 0.9998479181489063 0.4080447774347403 0.38179628297987395
|
| 115 |
+
GOTERM_BP_DIRECT GO:0010875~positive regulation of cholesterol efflux 2 4.255319149 0.050255276 "APOA1, APOE" 46 34 29712 37.994884910485936 0.9999999999933037 0.6586086111263479 0.6493510603675439
|
| 116 |
+
GOTERM_CC_DIRECT GO:0031528~microvillus membrane 2 4.255319149 0.051326889 "DPEP1, EZR" 47 34 29722 37.19899874843554 0.9989403636102837 0.24712946488060694 0.21481253485775834
|
| 117 |
+
GOTERM_BP_DIRECT GO:0015908~fatty acid transport 2 4.255319149 0.051695349432729076 "ACSL1, RBP1" 46 35 29712 36.90931677018633 0.9999999999968595 0.6601098466025406 0.6508311941402558
|
| 118 |
+
REACTOME_PATHWAY "R-MMU-76002~Platelet activation, signaling and aggregation" 4 8.510638298 0.052913301576377775 "FGB, TIMP3, APOA1, CD9" 32 256 9277 4.529785156 0.9996965613979936 0.3307081348523611 0.30204676316515644
|
| 119 |
+
GOTERM_BP_DIRECT GO:0019915~lipid storage 2 4.255319149 0.053133288 "RBP1, APOA1" 46 36 29712 35.88405797101449 0.9999999999985272 0.6615094382679285 0.6522111128304277
|
| 120 |
+
KEGG_PATHWAY mmu04977:Vitamin digestion and absorption 2 4.255319149 0.055614442714328305 "APOA1, APOB" 22 26 9565 33.44405594405594 0.9967270929710323 0.7530944537961819 0.745563509
|
| 121 |
+
GOTERM_BP_DIRECT GO:0043651~linoleic acid metabolic process 2 4.255319149 0.056002773 "ACSL1, EPHX2" 46 38 29712 33.995423340961096 0.9999999999996761 0.6802288021130457 0.6706673530873604
|
| 122 |
+
GOTERM_MF_DIRECT GO:0140678~molecular function inhibitor activity 2 4.255319149 0.056015364 "BEX4, BEX1" 46 37 28924 33.98824912 0.9999476396717799 0.435392145 0.40738446277442064
|
| 123 |
+
GOTERM_BP_DIRECT GO:0032870~cellular response to hormone stimulus 2 4.255319149 0.057434325 "IGFBP2, RAMP1" 46 39 29712 33.12374581939799 0.9999999999998481 0.6810069952673832 0.671434608
|
| 124 |
+
GOTERM_CC_DIRECT GO:0031012~extracellular matrix 3 6.382978723404255 0.060120618 "TIMP3, NDNF, APOE" 47 257 29722 7.381902475370477 0.9996842263232665 0.27913144230531395 0.2426296383115421
|
| 125 |
+
GOTERM_BP_DIRECT GO:0043407~negative regulation of MAP kinase activity 2 4.255319149 0.060291064 "DUSP1, APOE" 46 41 29712 31.507953340402967 0.9999999999999666 0.6982546459975671 0.688439822
|
| 126 |
+
GOTERM_BP_DIRECT GO:0019216~regulation of lipid metabolic process 2 4.255319149 0.064560305 "HNF4A, ANGPTL4" 46 44 29712 29.359683794466402 0.9999999999999966 0.7307052741402671 0.7204343164716287
|
| 127 |
+
GOTERM_BP_DIRECT GO:0045214~sarcomere organization 2 4.255319149 0.067395924 "KRT19, KRT8" 46 46 29712 28.083175803402646 0.9999999999999992 0.7458482279235058 0.735364417
|
| 128 |
+
REACTOME_PATHWAY R-MMU-5686938~Regulation of TLR by endogenous ligand 2 4.255319149 0.067948365 "FGB, APOB" 32 21 9277 27.610119047619047 0.9999720387926998 0.4076901901820616 0.37235704
|
| 129 |
+
GOTERM_BP_DIRECT GO:0001525~angiogenesis 3 6.382978723404255 0.07060693 "NDNF, ANGPTL4, RAMP1" 46 288 29712 6.728260869565218 0.9999999999999999 0.7643967642661483 0.7536522314350982
|
| 130 |
+
GOTERM_MF_DIRECT GO:0005504~fatty acid binding 2 4.255319149 0.070620302 "HNF4A, RBP1" 46 47 28924 26.756706753006476 0.9999963605648977 0.5250465940313758 0.49127166692994223
|
| 131 |
+
REACTOME_PATHWAY R-MMU-5423646~Aflatoxin activation and detoxification 2 4.255319149 0.074181462 "MGST2, DPEP1" 32 23 9277 25.209239130434785 0.9999897114296403 0.42796997150625177 0.3908792406423766
|
| 132 |
+
GOTERM_BP_DIRECT GO:0033209~tumor necrosis factor-mediated signaling pathway 2 4.255319149 0.077254559 "KRT18, KRT8" 46 53 29712 24.37407711 1 0.8185695796217634 0.8070635815146302
|
| 133 |
+
GOTERM_BP_DIRECT GO:0098869~cellular oxidant detoxification 2 4.255319149 0.080052545 "MGST2, APOE" 46 55 29712 23.487747035573122 1 0.8305451583344289 0.8188708287996076
|
| 134 |
+
KEGG_PATHWAY mmu05417:Lipid and atherosclerosis 3 6.382978723404255 0.081151305 "APOA1, CALML4, APOB" 22 217 9565 6.0106828655215745 0.9997889427455483 0.7530944537961819 0.745563509
|
| 135 |
+
GOTERM_BP_DIRECT GO:0097191~extrinsic apoptotic signaling pathway 2 4.255319149 0.082842236 "KRT18, KRT8" 46 57 29712 22.663615560640732 1 0.8419476214133508 0.8301130162930828
|
| 136 |
+
KEGG_PATHWAY mmu04216:Ferroptosis 2 4.255319149 0.086346434 "ACSL1, GSS" 22 41 9565 21.208425720620845 0.9998802831246382 0.7530944537961819 0.745563509
|
| 137 |
+
KEGG_PATHWAY mmu05207:Chemical carcinogenesis - receptor activation 3 6.382978723404255 0.088337907 "GSTA4, EPHX2, MGST2" 22 228 9565 5.720693779904305 0.9999037524423499 0.7530944537961819 0.745563509
|
| 138 |
+
GOTERM_BP_DIRECT GO:0031397~negative regulation of protein ubiquitination 2 4.255319149 0.088396824 "BEX4, BEX1" 46 61 29712 21.177476835352817 1 0.8804323699067076 0.8680568145064125
|
| 139 |
+
KEGG_PATHWAY mmu05208:Chemical carcinogenesis - reactive oxygen species 3 6.382978723404255 0.089000791 "GSTA4, EPHX2, MGST2" 22 229 9565 5.695712584358872 0.9999105047499052 0.7530944537961819 0.745563509
|
| 140 |
+
KEGG_PATHWAY mmu04975:Fat digestion and absorption 2 4.255319149 0.090371334 "APOA1, APOB" 22 43 9565 20.221987315010573 0.9999230136372668 0.7530944537961819 0.745563509
|
| 141 |
+
GOTERM_BP_DIRECT GO:0032526~response to retinoic acid 2 4.255319149 0.092541166 "IGFBP2, RBP1" 46 64 29712 20.184782608695652 1 0.9036372722225943 0.8909355434965738
|
| 142 |
+
GOTERM_CC_DIRECT GO:0062023~collagen-containing extracellular matrix 3 6.382978723404255 0.092985917 "FGB, S100A13, ANGPTL4" 47 331 29722 5.731567782991578 0.9999969109571304 0.4079055725078437 0.35456407456451033
|
| 143 |
+
GOTERM_CC_DIRECT GO:0005654~nucleoplasm 10 21.27659574468085 0.094132055 "PLAC8, BEX4, FOXA1, HNF4A, RBP1, KRT8, S100A13, PIR, DPEP1, 2200002D01RIK" 47 3572 29722 1.7703890781730243 0.9999973791875169 0.4079055725078437 0.35456407456451033
|
| 144 |
+
GOTERM_BP_DIRECT GO:0009612~response to mechanical stimulus 2 4.255319149 0.095293821 "ACTA1, IGFBP2" 46 66 29712 19.573122529644266 1 0.9083058114261483 0.895538461
|
| 145 |
+
GOTERM_BP_DIRECT GO:0051592~response to calcium ion 2 4.255319149 0.096667084 "FGB, S100A16" 46 67 29712 19.280986372485398 1 0.9083058114261483 0.895538461
|
ckp/Multi_seed0/multi_seed0_fold1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25af2f56d064c28a72cd02cbc8dfbbb3e9d5925ed624d781a94e0edfe7720777
|
| 3 |
+
size 6992494
|
ckp/Multi_seed0/multi_seed0_fold2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e45bb566c1dbca12289c587f48283766e437c483aef5593258a3645d4a10d11
|
| 3 |
+
size 6992494
|
ckp/Multi_seed0/multi_seed0_fold3.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20bf5b58bbc7a6ffe5e81e270b8773562b4f965eb97850c34ee78f00680021ff
|
| 3 |
+
size 6992494
|
ckp/Multi_seed0/multi_seed0_fold4.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8c10ffbe33a618aa58f56dd335b0849ebb221594a2774cdbdd43dfc98684352
|
| 3 |
+
size 6992494
|
ckp/Multi_seed0/multi_seed0_fold5.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d4b2c284762c7f7ed955e5d72b423ffd2affa245fe7ac43d0619d374278d152
|
| 3 |
+
size 6992494
|
config.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
MLM_RNA_CKP = "ckp/MLM/MLM_RNA_ValLoss0.4277.pth"
|
| 3 |
+
MLM_ATAC_CKP = "ckp/MLM/MLM_ATAC_ValLoss0.0019.pth"
|
| 4 |
+
MLM_FLUX_CKP = "ckp/MLM/MLM_Flux_ValLoss0.1001.pth"
|
| 5 |
+
SEED = 6
|
data/__init__.py
ADDED
|
File without changes
|
data/create_dataset.py
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from torch.utils.data import DataLoader, TensorDataset, random_split
|
| 3 |
+
from torch.utils.data.dataset import Dataset
|
| 4 |
+
from anndata import AnnData
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import random
|
| 7 |
+
import numpy as np
|
| 8 |
+
|
| 9 |
+
def get_mlm_loaders(train_data, val_data, batch_size=32, batch_key='batch_no', data_dtype=torch.float32):
|
| 10 |
+
if isinstance(train_data, AnnData) and \
|
| 11 |
+
isinstance(val_data, AnnData):
|
| 12 |
+
X_train = torch.tensor(train_data.X.toarray().copy(), dtype=data_dtype)
|
| 13 |
+
b_train = torch.tensor(train_data.obs[batch_key], dtype=torch.int32)
|
| 14 |
+
|
| 15 |
+
X_val = torch.tensor(val_data.X.toarray().copy(), dtype=data_dtype)
|
| 16 |
+
b_val = torch.tensor(val_data.obs[batch_key], dtype=torch.int32)
|
| 17 |
+
|
| 18 |
+
elif isinstance(train_data, tuple) and \
|
| 19 |
+
isinstance(train_data[0], (pd.DataFrame)) and \
|
| 20 |
+
isinstance(val_data, (tuple)) and \
|
| 21 |
+
isinstance(val_data[0], (pd.DataFrame)):
|
| 22 |
+
|
| 23 |
+
X_train = torch.tensor(train_data[0].values, dtype=data_dtype)
|
| 24 |
+
b_train = torch.tensor(train_data[1], dtype=torch.int32)
|
| 25 |
+
|
| 26 |
+
X_val = torch.tensor(val_data[0].values, dtype=data_dtype)
|
| 27 |
+
b_val = torch.tensor(val_data[1], dtype=torch.int32)
|
| 28 |
+
else:
|
| 29 |
+
raise ValueError("Data must be an AnnData object or a tuple of (pd.DataFrame, list).")
|
| 30 |
+
|
| 31 |
+
mlm_train_dataset = TensorDataset(X_train, b_train)
|
| 32 |
+
mlm_train_loader = DataLoader(mlm_train_dataset, batch_size=batch_size, shuffle=True)
|
| 33 |
+
|
| 34 |
+
mlm_val_dataset = TensorDataset(X_val, b_val)
|
| 35 |
+
mlm_val_loader = DataLoader(mlm_val_dataset, batch_size=batch_size, shuffle=False)
|
| 36 |
+
|
| 37 |
+
return mlm_train_loader, mlm_val_loader
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def get_cls_dataset(data, batch_key='batch_no', label_key='label',
|
| 41 |
+
pct_key='pct', filter_pcts=50.0,
|
| 42 |
+
data_dtype=torch.float32):
|
| 43 |
+
|
| 44 |
+
if isinstance(data, AnnData):
|
| 45 |
+
X = torch.tensor(data.X.toarray().copy(), dtype=data_dtype)
|
| 46 |
+
y = torch.tensor([{'reprogramming':1, 'dead-end':0}[i] for i in list(data.obs[label_key])], dtype=torch.float32)
|
| 47 |
+
b = torch.tensor(data.obs[batch_key], dtype=torch.int32)
|
| 48 |
+
pcts = torch.tensor(data.obs[pct_key], dtype=torch.float32)
|
| 49 |
+
|
| 50 |
+
X = X[pcts > filter_pcts]
|
| 51 |
+
y = y[pcts > filter_pcts]
|
| 52 |
+
b = b[pcts > filter_pcts]
|
| 53 |
+
pcts = pcts[pcts > filter_pcts]
|
| 54 |
+
feature_names = data.var_names.tolist()
|
| 55 |
+
|
| 56 |
+
elif isinstance(data, tuple) and isinstance(data[0], pd.DataFrame):
|
| 57 |
+
X = torch.tensor(data[0].values, dtype=data_dtype)
|
| 58 |
+
y = torch.tensor([{'reprogramming':1, 'dead-end':0}[i] for i in list(data[1])], dtype=torch.float32)
|
| 59 |
+
b = torch.tensor(data[2], dtype=torch.int32)
|
| 60 |
+
pcts = torch.tensor(data[3], dtype=torch.float32)
|
| 61 |
+
X = X[pcts > filter_pcts]
|
| 62 |
+
y = y[pcts > filter_pcts]
|
| 63 |
+
b = b[pcts > filter_pcts]
|
| 64 |
+
pcts = pcts[pcts > filter_pcts]
|
| 65 |
+
feature_names = data[0].columns.tolist()
|
| 66 |
+
|
| 67 |
+
else:
|
| 68 |
+
raise ValueError("Data must be an AnnData object or a tuple of (pd.DataFrame, list, list, list).")
|
| 69 |
+
|
| 70 |
+
dataset = TensorDataset(X, b, y)
|
| 71 |
+
|
| 72 |
+
return dataset, pcts, feature_names
|
| 73 |
+
|
| 74 |
+
def get_pair_modalities(adata_rna, adata_atac, flux_df, include_unused_atacs=False, seed=42):
|
| 75 |
+
"""
|
| 76 |
+
Pair RNA, ATAC and Flux data based on clone IDs.
|
| 77 |
+
Args:
|
| 78 |
+
adata_rna (AnnData): RNA data.
|
| 79 |
+
adata_atac (AnnData): ATAC data.
|
| 80 |
+
flux_df (pd.DataFrame): Flux data.
|
| 81 |
+
include_unused_atacs (bool): Include ATAC samples that do not have a paired RNA sample.
|
| 82 |
+
Returns:
|
| 83 |
+
tuple:
|
| 84 |
+
- rna_data (pd.DataFrame): RNA data matched by clone IDs, with rows representing samples and columns representing gene expressions.
|
| 85 |
+
- atac_data (pd.DataFrame): ATAC data matched by clone IDs, with rows representing samples and columns representing chromatin accessibility features.
|
| 86 |
+
- flux_data (pd.DataFrame): Flux data matched by clone IDs, with rows representing samples and columns representing flux measurements.
|
| 87 |
+
|
| 88 |
+
np.array: labels. np.array of labels.
|
| 89 |
+
np.array: batch indices. np.array of batch indices.
|
| 90 |
+
pd.DataFrame: indices. A DataFrame where each row contains the indices of matched RNA and ATAC samples.
|
| 91 |
+
If no match is found for one modality, the corresponding value is None.
|
| 92 |
+
np.array: pcts. Array of dominant fate percentages for each paired sample.
|
| 93 |
+
"""
|
| 94 |
+
|
| 95 |
+
# Create a dictionary to map ATAC clone IDs to their indices
|
| 96 |
+
atac_clone_to_indices = {clone_id: [] for clone_id in adata_atac.obs['clone_id'].unique()}
|
| 97 |
+
adata_atac.obs['index'] = adata_atac.obs.index
|
| 98 |
+
grouped = adata_atac.obs.groupby('clone_id')['index'].apply(list)
|
| 99 |
+
atac_clone_to_indices.update(grouped)
|
| 100 |
+
|
| 101 |
+
rna_data, atac_data, flux_data, labels, batch_ind, indices, pcts = [], [], [], [], [], [], []
|
| 102 |
+
|
| 103 |
+
used_atac_indices = set()
|
| 104 |
+
|
| 105 |
+
for rna_index, row in adata_rna.obs.iterrows():
|
| 106 |
+
clone_id = row['clone_id']
|
| 107 |
+
sibling_atac_indices = [idx for idx in atac_clone_to_indices.get(clone_id, []) if idx not in used_atac_indices]
|
| 108 |
+
|
| 109 |
+
if sibling_atac_indices:
|
| 110 |
+
random.seed(seed)
|
| 111 |
+
atac_index = random.choice(sibling_atac_indices)
|
| 112 |
+
# atac_index = sibling_atac_indices[0]
|
| 113 |
+
|
| 114 |
+
used_atac_indices.add(atac_index)
|
| 115 |
+
|
| 116 |
+
rna_sample = adata_rna[rna_index].X.toarray().flatten() if hasattr(adata_rna[rna_index].X, 'toarray') else adata_rna[rna_index].X
|
| 117 |
+
atac_sample = adata_atac[atac_index].X.toarray().flatten() if hasattr(adata_atac[atac_index].X, 'toarray') else adata_atac[atac_index].X
|
| 118 |
+
else:
|
| 119 |
+
rna_sample = adata_rna[rna_index].X.toarray().flatten() if hasattr(adata_rna[rna_index].X, 'toarray') else adata_rna[rna_index].X
|
| 120 |
+
atac_sample = np.zeros(adata_atac.shape[1]) # Fill with zeros if no ATAC pair is found
|
| 121 |
+
|
| 122 |
+
flux_sample = flux_df.loc[rna_index].values
|
| 123 |
+
|
| 124 |
+
label = row['label']
|
| 125 |
+
bt = row['batch_no']
|
| 126 |
+
pct = row['pct']
|
| 127 |
+
|
| 128 |
+
rna_data.append(rna_sample)
|
| 129 |
+
atac_data.append(atac_sample)
|
| 130 |
+
flux_data.append(flux_sample)
|
| 131 |
+
labels.append(label)
|
| 132 |
+
batch_ind.append(bt)
|
| 133 |
+
pcts.append(pct)
|
| 134 |
+
indices.append((rna_index, atac_index) if sibling_atac_indices else (rna_index, None))
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
if include_unused_atacs:
|
| 138 |
+
all_atac_indices = set(adata_atac.obs.index)
|
| 139 |
+
unused_atac_indices = sorted(list(all_atac_indices - used_atac_indices))
|
| 140 |
+
unused_atac_samples = adata_atac[list(unused_atac_indices)]
|
| 141 |
+
|
| 142 |
+
for atac_index in unused_atac_indices:
|
| 143 |
+
atac_sample = unused_atac_samples[atac_index].X.toarray().flatten() if hasattr(unused_atac_samples[atac_index].X, 'toarray') else unused_atac_samples[atac_index].X
|
| 144 |
+
rna_sample = np.zeros(adata_rna.shape[1]) # Fill with zeros for RNA
|
| 145 |
+
flux_sample = np.zeros(flux_df.shape[1]) # Fill with zeros for flux
|
| 146 |
+
|
| 147 |
+
label = adata_atac.obs.loc[atac_index, 'label']
|
| 148 |
+
bt = adata_atac.obs.loc[atac_index, 'batch_no']
|
| 149 |
+
pct = adata_atac.obs.loc[atac_index, 'pct']
|
| 150 |
+
|
| 151 |
+
rna_data.append(rna_sample)
|
| 152 |
+
atac_data.append(atac_sample)
|
| 153 |
+
flux_data.append(flux_sample)
|
| 154 |
+
labels.append(label)
|
| 155 |
+
batch_ind.append(bt)
|
| 156 |
+
pcts.append(pct)
|
| 157 |
+
indices.append((None, atac_index))
|
| 158 |
+
|
| 159 |
+
rna_data = pd.DataFrame(rna_data, columns=adata_rna.var_names, index=indices)
|
| 160 |
+
atac_data = pd.DataFrame(atac_data, columns=adata_atac.var_names, index=indices)
|
| 161 |
+
flux_data = pd.DataFrame(flux_data, columns=flux_df.columns, index=indices)
|
| 162 |
+
|
| 163 |
+
X_i = (rna_data, atac_data, flux_data)
|
| 164 |
+
y_i = np.array(labels)
|
| 165 |
+
b_i = np.array(batch_ind)
|
| 166 |
+
indices = pd.DataFrame(np.array(indices), columns=["RNA", "ATAC"])
|
| 167 |
+
pcts = np.array(pcts)
|
| 168 |
+
|
| 169 |
+
return X_i, y_i, b_i, indices, pcts
|
| 170 |
+
|
| 171 |
+
class MultiModalDataset(Dataset):
|
| 172 |
+
"""
|
| 173 |
+
Multi-modal dataset for RNA, ATAC, and Flux data.
|
| 174 |
+
Args:
|
| 175 |
+
X (tuple): Tuple of (RNA, ATAC, Flux) data.
|
| 176 |
+
batch_no (list): List of batch indices.
|
| 177 |
+
labels (list): List of labels.
|
| 178 |
+
"""
|
| 179 |
+
def __init__(self, X, batch_no, labels, df_indics=None, pcts=None, label_names=None):
|
| 180 |
+
if isinstance(X[0], pd.DataFrame):
|
| 181 |
+
self.rna_data = torch.tensor(X[0].values, dtype=torch.int32)
|
| 182 |
+
self.atac_data = torch.tensor(X[1].values, dtype=torch.float32)
|
| 183 |
+
self.flux_data = torch.tensor(X[2].values, dtype=torch.float32)
|
| 184 |
+
else:
|
| 185 |
+
self.rna_data = torch.tensor(X[0], dtype=torch.int32)
|
| 186 |
+
self.atac_data = torch.tensor(X[1], dtype=torch.float32)
|
| 187 |
+
self.flux_data = torch.tensor(X[2], dtype=torch.float32)
|
| 188 |
+
|
| 189 |
+
self.batch_no = torch.tensor(batch_no, dtype=torch.int32)
|
| 190 |
+
self.labels = torch.tensor(labels, dtype=torch.float32)
|
| 191 |
+
self.df_indics = df_indics
|
| 192 |
+
self.pcts = pcts
|
| 193 |
+
self.label_names = label_names
|
| 194 |
+
def __len__(self):
|
| 195 |
+
return len(self.labels)
|
| 196 |
+
|
| 197 |
+
def get_df_indices(self):
|
| 198 |
+
return self.df_indics
|
| 199 |
+
def get_pcts(self):
|
| 200 |
+
return self.pcts
|
| 201 |
+
def get_label_names(self):
|
| 202 |
+
return self.label_names
|
| 203 |
+
def __getitem__(self, idx):
|
| 204 |
+
rna_sample = self.rna_data[idx]
|
| 205 |
+
atac_sample = self.atac_data[idx]
|
| 206 |
+
flux_sample = self.flux_data[idx]
|
| 207 |
+
batch_no = self.batch_no[idx]
|
| 208 |
+
label = self.labels[idx]
|
| 209 |
+
return (rna_sample, atac_sample, flux_sample), batch_no, label
|
| 210 |
+
|
data/datasets/atac_labelled.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07fb45d79bff16f7fad9d7f9009053629be37d9bfc71dec9bd65a26fc7e74660
|
| 3 |
+
size 2869395
|
data/datasets/clones.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d64e578c90f4033771f50132897c5f28c6dc7da73bd249cca000adb345bf0a3
|
| 3 |
+
size 5572281
|
data/datasets/flux_labelled_11nov.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b439720848f7b97ee08662603d8cf0cddb9502b973a151280d63b30427075bc3
|
| 3 |
+
size 4726079
|
data/datasets/metabolic_model_metadata.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1cc3192a8fe075f24e2435a3af0d963928af4d32b6576e179e687089a451b257
|
| 3 |
+
size 16995
|
data/datasets/rna_labelled.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3190a03dd53b4022952e647836021d5752bc5d6c1139eb3fa1f68c6a6b407b8
|
| 3 |
+
size 425798344
|
data/datasets/rna_labelled_all.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:437cc1a740d127f89c8e17f380e35881cb71b8189a14b68000ff9e54c0d531ab
|
| 3 |
+
size 326163366
|
data/load_data.py
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import anndata as ad
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from sklearn.preprocessing import StandardScaler
|
| 5 |
+
from . import preprocess_data
|
| 6 |
+
|
| 7 |
+
def load_clones(data_path):
|
| 8 |
+
df_clone = pd.read_csv(data_path, index_col=["cell.bc"])
|
| 9 |
+
df_clone = df_clone[["assay", 'state/fate', 'cell_type',
|
| 10 |
+
'most_dominant_fate', 'most_dominant_fate_pct',
|
| 11 |
+
"clone_id", "clone.size (RNA & ATAC)", 'clone.size (RNA)', 'clone.size (ATAC)',
|
| 12 |
+
'# of D3 cells (RNA)', '# of D3 cells (ATAC)']]
|
| 13 |
+
df_clone.rename({"clone.size (RNA & ATAC)": "clone_size",
|
| 14 |
+
"clone.size (RNA)": "cells_RNA",
|
| 15 |
+
'clone.size (ATAC)': "cells_ATAC",
|
| 16 |
+
'# of D3 cells (ATAC)' : "cells_ATAC_D3",
|
| 17 |
+
'# of D3 cells (RNA)' : "cells_RNA_D3",
|
| 18 |
+
'most_dominant_fate': 'label',
|
| 19 |
+
'most_dominant_fate_pct': 'pct',
|
| 20 |
+
'state/fate': 'day3_day21'}, inplace=True, axis=1)
|
| 21 |
+
return df_clone
|
| 22 |
+
|
| 23 |
+
def add_clone_info(adata, clone_path, split=False):
|
| 24 |
+
"""
|
| 25 |
+
Adds clone information to the given AnnData object.
|
| 26 |
+
Parameters:
|
| 27 |
+
adata (AnnData): The AnnData object to which clone information will be added.
|
| 28 |
+
clone_path (str): The file path to the clone data.
|
| 29 |
+
split (bool): Whether to split the data into labelled and unlabelled. Default is False.
|
| 30 |
+
Returns:
|
| 31 |
+
AnnData: The modified AnnData object with clone information added.
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
df_clone = load_clones(clone_path)
|
| 35 |
+
filtered_obs = adata.obs.join(df_clone, how='inner')
|
| 36 |
+
|
| 37 |
+
if split:
|
| 38 |
+
filtered_obs = filtered_obs[(filtered_obs.label=='reprogramming') | (filtered_obs.label=='dead-end')]
|
| 39 |
+
adata_labelled = adata[filtered_obs.index].copy()
|
| 40 |
+
adata_labelled.obs = filtered_obs
|
| 41 |
+
adata_unlabelled = adata[~adata.obs.index.isin(adata_labelled.obs.index)].copy()
|
| 42 |
+
return adata_labelled, adata_unlabelled
|
| 43 |
+
|
| 44 |
+
adata = adata[filtered_obs.index]
|
| 45 |
+
adata.obs = filtered_obs
|
| 46 |
+
return adata
|
| 47 |
+
|
| 48 |
+
def load_rna(data_path, return_raw=True, clone_info=False, clone_path=None):
|
| 49 |
+
"""
|
| 50 |
+
Load RNA data from a given file path.
|
| 51 |
+
Parameters:
|
| 52 |
+
- data_path (str): The file path to the RNA data.
|
| 53 |
+
- return_raw (bool): Whether to return the raw counts or not. Default is False.
|
| 54 |
+
- add_clone_info (bool): Whether to add clone information or not. Default is True.
|
| 55 |
+
- clone_path (str): The file path to the clone information. Required if add_clone_info is True.
|
| 56 |
+
Returns:
|
| 57 |
+
- adata_RNA (AnnData): Annotated data object containing the loaded RNA data.
|
| 58 |
+
"""
|
| 59 |
+
|
| 60 |
+
# Load RNA data
|
| 61 |
+
adata_RNA = ad.read_h5ad(data_path)
|
| 62 |
+
adata_RNA.obs.index = adata_RNA.obs.index.str.replace('_', '-')
|
| 63 |
+
|
| 64 |
+
# Restore raw counts if necessary
|
| 65 |
+
if return_raw:
|
| 66 |
+
adata_RNA.X = adata_RNA.raw.X.copy() # Copy raw counts to the expression matrix
|
| 67 |
+
|
| 68 |
+
# Add batch information
|
| 69 |
+
adata_RNA.obs['batch_no'] = adata_RNA.obs.index.to_series().apply(lambda idx: 1 if 'r1' in idx else (2 if 'r2' in idx else 0))
|
| 70 |
+
|
| 71 |
+
# Add clone information
|
| 72 |
+
if clone_info:
|
| 73 |
+
if clone_path is None:
|
| 74 |
+
raise ValueError("clone_path must be provided if add_clone_info is True.")
|
| 75 |
+
else:
|
| 76 |
+
adata_RNA = add_clone_info(adata_RNA, clone_path)
|
| 77 |
+
|
| 78 |
+
# Remove unwanted columns
|
| 79 |
+
columns_to_remove = ['orig.ident', 'old_ident', 'cc_score_diff', 'snn_res_0_8',
|
| 80 |
+
'seurat_clusters',
|
| 81 |
+
'predicted__cca_co_id', 'prediction_score_fib_1', 'prediction_score_fib_0',
|
| 82 |
+
'prediction_score_fib_2',
|
| 83 |
+
'prediction_score_early_0', 'prediction_score_transition_0',
|
| 84 |
+
'prediction_score_transition_1',
|
| 85 |
+
'prediction_score_early_1', 'prediction_score_early_2', 'prediction_score_iep_1',
|
| 86 |
+
'prediction_score_transition_2', 'prediction_score_iep_2', 'prediction_score_dead_end_1',
|
| 87 |
+
'prediction_score_dead_end_0', 'prediction_score_iep_0', 'prediction_score_dead_end_2',
|
| 88 |
+
'prediction_score_max', 'snn_res_0_2', 'cellranger_ident', 'metadata_fate_coarse_rev1',
|
| 89 |
+
'md_fate_rev1', 'md_fate_coarse_rev1', 'metadata_fate_rev1', 'day3_day21', 'sample_id',
|
| 90 |
+
'replicate_id', 'cell_type', 'assay']
|
| 91 |
+
intersection = set(columns_to_remove).intersection(adata_RNA.obs.columns)
|
| 92 |
+
if intersection:
|
| 93 |
+
adata_RNA.obs.drop(intersection, axis=1, inplace=True)
|
| 94 |
+
|
| 95 |
+
# Rename columns
|
| 96 |
+
columns_to_rename = {'S.Score': 'S_score',
|
| 97 |
+
'G2M.Score': 'G2M_score',
|
| 98 |
+
'nCount_RNA': 'total_counts',
|
| 99 |
+
'nFeature_RNA': 'n_genes_by_counts',
|
| 100 |
+
'Phase': 'phase',
|
| 101 |
+
'percent.mt': 'pct_counts_mt',
|
| 102 |
+
}
|
| 103 |
+
intersection = set(columns_to_rename.keys()).intersection(adata_RNA.obs.columns)
|
| 104 |
+
if intersection:
|
| 105 |
+
adata_RNA.obs.rename(columns=columns_to_rename, inplace=True)
|
| 106 |
+
|
| 107 |
+
return adata_RNA
|
| 108 |
+
|
| 109 |
+
def load_atac(data_path, clone_info=False, clone_path=None):
|
| 110 |
+
"""
|
| 111 |
+
Load ATAC data from a given file path.
|
| 112 |
+
Parameters:
|
| 113 |
+
- data_path (str): The file path to the ATAC data.
|
| 114 |
+
- clone_info (bool): Whether to add clone information or not. Default is False.
|
| 115 |
+
- clone_path (str): The file path to the clone information. Required if add_clone_info is True.
|
| 116 |
+
Returns:
|
| 117 |
+
- adata_atac (AnnData): Annotated data object containing the loaded ATAC data.
|
| 118 |
+
"""
|
| 119 |
+
adata_atac = ad.read_h5ad(data_path)
|
| 120 |
+
adata_atac = adata_atac[:,adata_atac.var['name'] != "Crebzf_122"]
|
| 121 |
+
adata_atac.obs.index = adata_atac.obs.index.str.replace('_', '-')
|
| 122 |
+
|
| 123 |
+
adata_atac = adata_atac.copy()
|
| 124 |
+
adata_atac.obs['batch_no'] = adata_atac.obs.index.to_series().apply(lambda idx: 1 if 'r1' in idx else (2 if 'r2' in idx else 0))
|
| 125 |
+
|
| 126 |
+
columns_to_remove = ['BlacklistRatio', 'CellNames', 'DoubletEnrichment',
|
| 127 |
+
'DoubletScore', 'NucleosomeRatio', 'PassQC', 'PromoterRatio',
|
| 128 |
+
'ReadsInBlacklist', 'ReadsInPromoter', 'ReadsInTSS', 'TSSEnrichment',
|
| 129 |
+
'nDiFrags', 'nFrags', 'nMonoFrags', 'nMultiFrags',
|
| 130 |
+
'origin']
|
| 131 |
+
|
| 132 |
+
intersection = set(columns_to_remove).intersection(adata_atac.obs.columns)
|
| 133 |
+
if intersection:
|
| 134 |
+
adata_atac.obs.drop(intersection, axis=1, inplace=True)
|
| 135 |
+
|
| 136 |
+
if clone_info:
|
| 137 |
+
if clone_path is None:
|
| 138 |
+
raise ValueError("clone_path must be provided if add_clone_info is True.")
|
| 139 |
+
else:
|
| 140 |
+
adata_atac_labelled, adata_atac_unlabelled = add_clone_info(adata_atac, clone_path, split=True)
|
| 141 |
+
return adata_atac_labelled, adata_atac_unlabelled
|
| 142 |
+
else:
|
| 143 |
+
# warning that without clone info, the data will be returned as a single object
|
| 144 |
+
print("Warning: Clone information not provided. Returning a single object.")
|
| 145 |
+
|
| 146 |
+
return adata_atac
|
| 147 |
+
|
| 148 |
+
def concat_fluxes(directory, prefix):
|
| 149 |
+
df_list = []
|
| 150 |
+
for filename in os.listdir(directory):
|
| 151 |
+
if filename.startswith(prefix) and filename.endswith('.csv'):
|
| 152 |
+
file_path = os.path.join(directory, filename)
|
| 153 |
+
df = pd.read_csv(file_path, index_col=0)
|
| 154 |
+
df_list.append(df)
|
| 155 |
+
|
| 156 |
+
if df_list:
|
| 157 |
+
concatenated_df = pd.concat(df_list, axis=0)
|
| 158 |
+
else:
|
| 159 |
+
concatenated_df = pd.DataFrame()
|
| 160 |
+
|
| 161 |
+
return concatenated_df
|
| 162 |
+
|
| 163 |
+
def load_flux(data_path, prefix='flux_un', clone_info=False, clone_path=None, scale=True, flux_metadata_path=None):
|
| 164 |
+
"""
|
| 165 |
+
Load Flux data from a given file path.
|
| 166 |
+
Parameters:
|
| 167 |
+
- data_path (str): The file path to the Flux data.
|
| 168 |
+
- prefix (str): The prefix of the Flux files. Default is 'flux_un'.
|
| 169 |
+
- clone_info (bool): Whether to add clone information or not. Default is False.
|
| 170 |
+
- clone_path (str): The file path to the clone information. Required if add_clone_info is True.
|
| 171 |
+
Returns:
|
| 172 |
+
- adata_Flux_labelled (pd.DataFrame): Annotated data object containing the labelled Flux data.
|
| 173 |
+
- adata_Flux_unlabelled (pd.DataFrame): Annotated data object containing the unlabelled Flux data.
|
| 174 |
+
- bi_labelled (list): List of binary labels for the labelled Flux data.
|
| 175 |
+
- bi_unlabelled (list): List of binary labels for the unlabelled Flux data.
|
| 176 |
+
- labels (list): List of labels for the labelled Flux data.
|
| 177 |
+
"""
|
| 178 |
+
|
| 179 |
+
adata_Flux_labelled = pd.read_csv(data_path, index_col=0)
|
| 180 |
+
directory = os.path.dirname(data_path)
|
| 181 |
+
adata_Flux_unlabelled = concat_fluxes(directory, prefix)
|
| 182 |
+
|
| 183 |
+
adata_Flux_labelled.index = adata_Flux_labelled.index.str.replace('_', '-')
|
| 184 |
+
if not adata_Flux_unlabelled.empty:
|
| 185 |
+
adata_Flux_unlabelled.index = adata_Flux_unlabelled.index.str.replace('_', '-')
|
| 186 |
+
else:
|
| 187 |
+
# Keep schema consistent when unlabeled files are not shipped.
|
| 188 |
+
adata_Flux_unlabelled = pd.DataFrame(columns=adata_Flux_labelled.columns)
|
| 189 |
+
|
| 190 |
+
if scale:
|
| 191 |
+
std_sc = StandardScaler()
|
| 192 |
+
if not adata_Flux_unlabelled.empty:
|
| 193 |
+
scaled_unl = std_sc.fit_transform(adata_Flux_unlabelled.values)
|
| 194 |
+
scaled_unl += abs(scaled_unl.min())
|
| 195 |
+
adata_Flux_unlabelled = pd.DataFrame(
|
| 196 |
+
scaled_unl,
|
| 197 |
+
index=adata_Flux_unlabelled.index,
|
| 198 |
+
columns=adata_Flux_unlabelled.columns,
|
| 199 |
+
)
|
| 200 |
+
scaled_la = std_sc.transform(adata_Flux_labelled.values)
|
| 201 |
+
scaled_la += abs(scaled_la.min())
|
| 202 |
+
else:
|
| 203 |
+
# Fallback for minimal/portable app packages: scale from labelled only.
|
| 204 |
+
scaled_la = std_sc.fit_transform(adata_Flux_labelled.values)
|
| 205 |
+
scaled_la += abs(scaled_la.min())
|
| 206 |
+
|
| 207 |
+
adata_Flux_labelled = pd.DataFrame(
|
| 208 |
+
scaled_la,
|
| 209 |
+
index=adata_Flux_labelled.index,
|
| 210 |
+
columns=adata_Flux_labelled.columns,
|
| 211 |
+
)
|
| 212 |
+
if flux_metadata_path is not None:
|
| 213 |
+
md = pd.read_csv(flux_metadata_path)[['X', 'rxnName']]
|
| 214 |
+
else:
|
| 215 |
+
md = pd.read_csv("data/datasets/flux/metabolic_model_metadata.csv")[['X', 'rxnName']]
|
| 216 |
+
dict_rename = {}
|
| 217 |
+
for col in adata_Flux_labelled.columns:
|
| 218 |
+
reaction = md[md['X'] == col]['rxnName'].str.replace(" -> ", "→").values
|
| 219 |
+
dict_rename[col] = reaction[0]
|
| 220 |
+
adata_Flux_labelled = adata_Flux_labelled.rename(columns=dict_rename)
|
| 221 |
+
adata_Flux_unlabelled = adata_Flux_unlabelled.rename(columns=dict_rename)
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
if clone_info:
|
| 225 |
+
if clone_path is None:
|
| 226 |
+
raise ValueError("clone_path must be provided if add_clone_info is True.")
|
| 227 |
+
else:
|
| 228 |
+
df_clone = load_clones(clone_path)
|
| 229 |
+
filtered_obs = adata_Flux_labelled.join(df_clone, how='inner')
|
| 230 |
+
labels = filtered_obs['label']
|
| 231 |
+
pcts = filtered_obs['pct']
|
| 232 |
+
bi_labelled = adata_Flux_labelled.index.map(lambda x: 2 if 'r2' in x else 1 if 'r1' in x else 0)
|
| 233 |
+
bi_unlabelled = adata_Flux_unlabelled.index.map(lambda x: 2 if 'r2' in x else 1 if 'r1' in x else 0)
|
| 234 |
+
adata_Flux_labelled = adata_Flux_labelled.loc[filtered_obs.index]
|
| 235 |
+
return adata_Flux_labelled, adata_Flux_unlabelled, bi_labelled, bi_unlabelled, labels, pcts
|
| 236 |
+
else:
|
| 237 |
+
print("Warning: Clone information not provided. Returning raw data.")
|
| 238 |
+
return adata_Flux_labelled, adata_Flux_unlabelled
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
def load_processed_rna(verbose=True, return_raw=True, return_all_features=False):
|
| 242 |
+
|
| 243 |
+
if verbose:
|
| 244 |
+
print('Loading RNA data...')
|
| 245 |
+
# Load RNA data labelled
|
| 246 |
+
adata_RNA_labelled = load_rna("data/datasets/rna/all_rna_d3_labelled.h5ad",
|
| 247 |
+
return_raw=True,
|
| 248 |
+
clone_info=True,
|
| 249 |
+
clone_path="data/datasets/clone/clones.csv")
|
| 250 |
+
# Load RNA data unlabelled
|
| 251 |
+
adata_RNA_unlabelled = load_rna("data/datasets/rna/all_rna_d3_unlabelled.h5ad",
|
| 252 |
+
return_raw=True,
|
| 253 |
+
clone_info=False)
|
| 254 |
+
|
| 255 |
+
if verbose:
|
| 256 |
+
print('Filtering RNA data...')
|
| 257 |
+
adata_RNA_labelled = preprocess_data.filter_rna_cells_genes(adata_RNA_labelled.copy())
|
| 258 |
+
adata_RNA_unlabelled = preprocess_data.filter_rna_cells_genes(adata_RNA_unlabelled.copy())
|
| 259 |
+
|
| 260 |
+
if verbose:
|
| 261 |
+
print('Feature Selection by DEGs...')
|
| 262 |
+
deg_list = preprocess_data.get_degs(adata_RNA_labelled, method='t-test')
|
| 263 |
+
|
| 264 |
+
if verbose:
|
| 265 |
+
print('Filtering Genes...')
|
| 266 |
+
genes_intersection = set(adata_RNA_labelled.var_names).intersection(set(adata_RNA_unlabelled.var_names)).intersection(set(deg_list.gene))
|
| 267 |
+
adata_RNA_labelled_all = adata_RNA_labelled.copy()
|
| 268 |
+
adata_RNA_labelled = adata_RNA_labelled[:, list(genes_intersection)]
|
| 269 |
+
adata_RNA_unlabelled = adata_RNA_unlabelled[:, list(genes_intersection)]
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
if return_raw:
|
| 273 |
+
gene_indices = [adata_RNA_labelled.raw.var_names.get_loc(gene) for gene in adata_RNA_labelled.var_names]
|
| 274 |
+
adata_RNA_labelled.X = adata_RNA_labelled.raw.X[:, gene_indices].toarray().copy()
|
| 275 |
+
adata_RNA_unlabelled.X = adata_RNA_unlabelled.raw.X[:, gene_indices].copy()
|
| 276 |
+
|
| 277 |
+
if return_all_features:
|
| 278 |
+
return adata_RNA_labelled, adata_RNA_unlabelled, deg_list, adata_RNA_labelled_all
|
| 279 |
+
return adata_RNA_labelled, adata_RNA_unlabelled, deg_list
|
| 280 |
+
|
| 281 |
+
if __name__ == '__main__':
|
| 282 |
+
adata_ATAC_labelled, adata_ATAC_unlabelled = load_atac("data/datasets/atac/all_atac_d3_motif.h5ad",
|
| 283 |
+
clone_info=True,
|
| 284 |
+
clone_path="data/datasets/clone/clones.csv")
|
| 285 |
+
print(adata_ATAC_labelled.obs.columns, adata_ATAC_labelled.obs.shape, adata_ATAC_labelled.obs.index[:10])
|
| 286 |
+
print(adata_ATAC_unlabelled.obs.columns, adata_ATAC_unlabelled.obs.shape, adata_ATAC_unlabelled.obs.index[:10])
|
| 287 |
+
print("Data loaded successfully!")
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
|
data/preprocess_data.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import scanpy as sc
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
from scipy.stats import ttest_ind
|
| 5 |
+
from statsmodels.stats.multitest import multipletests
|
| 6 |
+
|
| 7 |
+
def filter_rna_cells_genes(adata, min_genes=100, min_cells=10):
|
| 8 |
+
"""
|
| 9 |
+
Filter cells and genes in RNA data.
|
| 10 |
+
Parameters:
|
| 11 |
+
- adata (AnnData): Annotated data object containing the RNA data.
|
| 12 |
+
- min_genes (int): The minimum number of genes to keep a cell. Default is 100.
|
| 13 |
+
- min_cells (int): The minimum number of cells to keep a gene. Default is 10.
|
| 14 |
+
Returns:
|
| 15 |
+
- adata_filtered (AnnData): Annotated data object containing the filtered RNA data.
|
| 16 |
+
"""
|
| 17 |
+
sc.pp.filter_cells(adata, min_genes=min_genes)
|
| 18 |
+
sc.pp.filter_genes(adata, min_cells=min_cells)
|
| 19 |
+
return adata
|
| 20 |
+
|
| 21 |
+
def get_degs(adata, method='t-test', p_val=0.05,
|
| 22 |
+
batch_remove=True, batch_key='batch_no', label_key='label',
|
| 23 |
+
reference='dead-end', target='reprogramming'):
|
| 24 |
+
"""
|
| 25 |
+
Get differentially expressed genes (DEGs) from the RNA data.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
sc.pp.normalize_total(adata, target_sum=1e4, exclude_highly_expressed=False)
|
| 29 |
+
sc.pp.log1p(adata)
|
| 30 |
+
if batch_remove:
|
| 31 |
+
sc.pp.combat(adata, key=batch_key)
|
| 32 |
+
|
| 33 |
+
sc.tl.rank_genes_groups(adata, groupby=label_key, method=method, n_genes=adata.shape[1], use_raw=False, reference=reference)
|
| 34 |
+
|
| 35 |
+
de_results = adata.uns['rank_genes_groups']
|
| 36 |
+
gene_list = list(pd.DataFrame(de_results['names'])[target])
|
| 37 |
+
|
| 38 |
+
# Compute mean and std for each gene in both groups.
|
| 39 |
+
# These are Series indexed by gene names (from adata.var_names).
|
| 40 |
+
group_a_mean_expression = adata[adata.obs[label_key] == reference].to_df().mean()
|
| 41 |
+
group_a_std_expression = adata[adata.obs[label_key] == reference].to_df().std()
|
| 42 |
+
group_b_mean_expression = adata[adata.obs[label_key] == target].to_df().mean()
|
| 43 |
+
group_b_std_expression = adata[adata.obs[label_key] == target].to_df().std()
|
| 44 |
+
|
| 45 |
+
# Reorder (or reindex) the computed series so that they match the order in gene_list.
|
| 46 |
+
group_a_mean_expression = group_a_mean_expression.reindex(gene_list)
|
| 47 |
+
group_a_std_expression = group_a_std_expression.reindex(gene_list)
|
| 48 |
+
group_b_mean_expression = group_b_mean_expression.reindex(gene_list)
|
| 49 |
+
group_b_std_expression = group_b_std_expression.reindex(gene_list)
|
| 50 |
+
|
| 51 |
+
# Create the DEG DataFrame.
|
| 52 |
+
df = pd.DataFrame({
|
| 53 |
+
'gene': gene_list,
|
| 54 |
+
'mean_exp_de': group_a_mean_expression.values, # 'dead-end' (reference)
|
| 55 |
+
'mean_exp_re': group_b_mean_expression.values, # 'reprogramming' (target)
|
| 56 |
+
'std_exp_de': group_a_std_expression.values,
|
| 57 |
+
'std_exp_re': group_b_std_expression.values,
|
| 58 |
+
'pval': de_results['pvals'][target],
|
| 59 |
+
'pval_adj': de_results['pvals_adj'][target],
|
| 60 |
+
'log_fc': de_results['logfoldchanges'][target],
|
| 61 |
+
})
|
| 62 |
+
|
| 63 |
+
df['group'] = df.apply(lambda row: reference if row['log_fc'] < 0 else target, axis=1)
|
| 64 |
+
|
| 65 |
+
df.sort_values(by='pval_adj', inplace=True)
|
| 66 |
+
df.reset_index(drop=True, inplace=True)
|
| 67 |
+
df['pval_adj_log'] = -np.log10(df['pval_adj'])
|
| 68 |
+
|
| 69 |
+
df = df[(df.pval_adj < p_val) & ((df.log_fc < -1) | ((df.log_fc > 1) & (df.log_fc < 7)))]
|
| 70 |
+
return df
|
| 71 |
+
|
| 72 |
+
def get_flux_degs(adata_Flux_labelled, labels):
|
| 73 |
+
dead_end = adata_Flux_labelled[labels.values == "dead-end"]
|
| 74 |
+
reprogramming = adata_Flux_labelled[labels.values == "reprogramming"]
|
| 75 |
+
|
| 76 |
+
features = []
|
| 77 |
+
log_fold_changes = []
|
| 78 |
+
p_values = []
|
| 79 |
+
mean_des = []
|
| 80 |
+
mean_res = []
|
| 81 |
+
std_des = []
|
| 82 |
+
std_res = []
|
| 83 |
+
|
| 84 |
+
for feature in adata_Flux_labelled.columns:
|
| 85 |
+
mean_de = dead_end[feature].mean()
|
| 86 |
+
mean_re = reprogramming[feature].mean()
|
| 87 |
+
std_de = dead_end[feature].std()
|
| 88 |
+
std_re = reprogramming[feature].std()
|
| 89 |
+
|
| 90 |
+
log_fold_change = np.log2(mean_re + 1e-10) - np.log2(mean_de + 1e-10)
|
| 91 |
+
t_stat, p_value = ttest_ind(dead_end[feature], reprogramming[feature], nan_policy="omit")
|
| 92 |
+
mean_des.append(mean_de)
|
| 93 |
+
mean_res.append(mean_re)
|
| 94 |
+
std_des.append(std_de)
|
| 95 |
+
std_res.append(std_re)
|
| 96 |
+
features.append(feature)
|
| 97 |
+
log_fold_changes.append(log_fold_change)
|
| 98 |
+
p_values.append(p_value)
|
| 99 |
+
|
| 100 |
+
adjusted_p_values = multipletests(p_values, method="fdr_bh")[1]
|
| 101 |
+
|
| 102 |
+
df_flux_degs = pd.DataFrame({
|
| 103 |
+
"feature": features,
|
| 104 |
+
"mean_de": mean_des,
|
| 105 |
+
"mean_re": mean_res,
|
| 106 |
+
"mean_diff": np.array(mean_res) - np.array(mean_des),
|
| 107 |
+
"std_de": std_des,
|
| 108 |
+
"std_re": std_res,
|
| 109 |
+
"log_fc": log_fold_changes,
|
| 110 |
+
"pval": p_values,
|
| 111 |
+
"pval_adj": adjusted_p_values,
|
| 112 |
+
'pval_adj_log' : -np.log10(adjusted_p_values)
|
| 113 |
+
})
|
| 114 |
+
df_flux_degs['group'] = df_flux_degs.apply(lambda row: 'dead-end' if row['mean_de'] > row['mean_re'] else 'reprogramming', axis=1)
|
| 115 |
+
df_flux_degs = df_flux_degs.sort_values(by="pval_adj").reset_index(drop=True)
|
| 116 |
+
return df_flux_degs
|
| 117 |
+
|
| 118 |
+
def get_atac_degs(adata, method='t-test', label_key='label',
|
| 119 |
+
reference='dead-end', target='reprogramming'):
|
| 120 |
+
"""
|
| 121 |
+
Get differentially expressed genes (DEGs) from the ATAC data.
|
| 122 |
+
"""
|
| 123 |
+
|
| 124 |
+
sc.tl.rank_genes_groups(adata, groupby=label_key, method=method,
|
| 125 |
+
n_genes=adata.shape[1], use_raw=False, reference=reference)
|
| 126 |
+
|
| 127 |
+
group_a_mean_expression = adata[adata.obs[label_key] == reference].to_df().mean()
|
| 128 |
+
group_a_std_expression = adata[adata.obs[label_key] == reference].to_df().std()
|
| 129 |
+
group_b_mean_expression = adata[adata.obs[label_key] == target].to_df().mean()
|
| 130 |
+
group_b_std_expression = adata[adata.obs[label_key] == target].to_df().std()
|
| 131 |
+
de_results = adata.uns['rank_genes_groups']
|
| 132 |
+
features = list(pd.DataFrame(de_results['names'])[target])
|
| 133 |
+
|
| 134 |
+
# Reindex the mean and std Series to this feature list
|
| 135 |
+
mean_de = group_a_mean_expression.reindex(features)
|
| 136 |
+
mean_re = group_b_mean_expression.reindex(features)
|
| 137 |
+
std_de = group_a_std_expression.reindex(features)
|
| 138 |
+
std_re = group_b_std_expression.reindex(features)
|
| 139 |
+
|
| 140 |
+
min_val = min(mean_de.min(), mean_re.min())
|
| 141 |
+
# Determine a shift value so that the smallest value becomes a small positive number.
|
| 142 |
+
shift = 0
|
| 143 |
+
if min_val <= 0:
|
| 144 |
+
shift = abs(min_val) + 1e-10
|
| 145 |
+
df = pd.DataFrame({
|
| 146 |
+
'feature': list(pd.DataFrame(de_results['names'])[target]),
|
| 147 |
+
'pval': de_results['pvals'][target],
|
| 148 |
+
'pval_adj': de_results['pvals_adj'][target],
|
| 149 |
+
'log_fc': np.log2(mean_re + shift) - np.log2(mean_de + shift),
|
| 150 |
+
'mean_de': mean_de,
|
| 151 |
+
'mean_re': mean_re,
|
| 152 |
+
'mean_diff': mean_re - mean_de,
|
| 153 |
+
'std_de': std_de,
|
| 154 |
+
'std_re': std_re,
|
| 155 |
+
|
| 156 |
+
})
|
| 157 |
+
|
| 158 |
+
df['group'] = df.apply(lambda row: 'dead-end' if row['mean_de'] > row['mean_re'] else 'reprogramming', axis=1)
|
| 159 |
+
|
| 160 |
+
df.sort_values(by='pval_adj', inplace=True)
|
| 161 |
+
df.reset_index(drop=True, inplace=True)
|
| 162 |
+
df['pval_adj_log'] = -np.log10(df['pval_adj'])
|
| 163 |
+
return df
|
interpretation/__init__.py
ADDED
|
File without changes
|
interpretation/attentions.py
ADDED
|
@@ -0,0 +1,244 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import torch
|
| 3 |
+
from torch.utils.data import DataLoader, Subset
|
| 4 |
+
from utils.helpers import create_multimodal_model
|
| 5 |
+
from models import SingleTransformer
|
| 6 |
+
from scipy.sparse import csr_matrix
|
| 7 |
+
|
| 8 |
+
def filter_idx(dataset, idx):
|
| 9 |
+
"""
|
| 10 |
+
Filter the idx to only return the samples that none of its modalities are all zeros
|
| 11 |
+
Args:
|
| 12 |
+
dataset: Dataset object containing the data.
|
| 13 |
+
idx: List of indices to filter.
|
| 14 |
+
Returns:
|
| 15 |
+
filtered_idx: List of filtered indices.
|
| 16 |
+
"""
|
| 17 |
+
rna = dataset.rna_data
|
| 18 |
+
atac = dataset.atac_data
|
| 19 |
+
flux = dataset.flux_data
|
| 20 |
+
mask = (rna != 0).any(axis=1) & (atac != 0).any(axis=1) & (flux != 0).any(axis=1)
|
| 21 |
+
# filter the idx if the id is in the mask
|
| 22 |
+
filtered_idx = [i for i in idx if mask[i]]
|
| 23 |
+
|
| 24 |
+
return filtered_idx
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def analyze_cls_attention(id, fold_results, dataset, model_config, device, indices,
|
| 28 |
+
average_heads=True, return_flow_attention=False):
|
| 29 |
+
"""
|
| 30 |
+
Extracts the attention weights of the validation set of each fold
|
| 31 |
+
Args:
|
| 32 |
+
id: The type of data to use. Must be one of 'RNA', 'ATAC', 'Flux', 'Multi'.
|
| 33 |
+
fold_results: List of dictionaries containing the results of each fold.
|
| 34 |
+
dataset: Dataset object containing the data.
|
| 35 |
+
model_config: Dictionary containing the model configuration.
|
| 36 |
+
device: Device to run the model on.
|
| 37 |
+
sample_type: The type of samples to analyze. Must be one of 'all', 'dead-end', or 'reprogramming'. Defaults to 'all'.
|
| 38 |
+
average_heads: Whether to average the attention weights across heads. Defaults to True.
|
| 39 |
+
Returns:
|
| 40 |
+
all_attention_weights: Numpy array containing the attention weights of the validation set
|
| 41 |
+
"""
|
| 42 |
+
if id not in ['RNA', 'ATAC', 'Flux', 'Multi']:
|
| 43 |
+
raise ValueError("id must be one of 'RNA', 'ATAC', 'Flux', 'Multi'")
|
| 44 |
+
|
| 45 |
+
all_attention_weights = []
|
| 46 |
+
|
| 47 |
+
for fold in fold_results:
|
| 48 |
+
|
| 49 |
+
val_idx = fold['val_idx']
|
| 50 |
+
# filter val_idx if is in indices
|
| 51 |
+
val_idx = [i for i in val_idx if i in indices]
|
| 52 |
+
|
| 53 |
+
if id == 'Multi':
|
| 54 |
+
val_idx = filter_idx(dataset, val_idx)
|
| 55 |
+
|
| 56 |
+
if len(val_idx) == 0:
|
| 57 |
+
print('No samples of the specified type in the validation set. Skipping...')
|
| 58 |
+
continue
|
| 59 |
+
|
| 60 |
+
val_ds = Subset(dataset, val_idx)
|
| 61 |
+
val_loader = DataLoader(val_ds, batch_size=32, shuffle=False)
|
| 62 |
+
|
| 63 |
+
if id=='Multi':
|
| 64 |
+
model = create_multimodal_model(model_config, device, use_mlm=False)
|
| 65 |
+
else:
|
| 66 |
+
model = SingleTransformer(id=id, **model_config).to(device)
|
| 67 |
+
|
| 68 |
+
model_path = fold['best_model_path']
|
| 69 |
+
state_dict = torch.load(model_path, map_location='cpu')
|
| 70 |
+
model.load_state_dict(state_dict)
|
| 71 |
+
model.eval()
|
| 72 |
+
|
| 73 |
+
with torch.no_grad():
|
| 74 |
+
for batch in val_loader:
|
| 75 |
+
x, b, _ = batch
|
| 76 |
+
if isinstance(x, list):
|
| 77 |
+
rna = x[0].to(device)
|
| 78 |
+
atac = x[1].to(device)
|
| 79 |
+
flux = x[2].to(device)
|
| 80 |
+
x = (rna, atac, flux)
|
| 81 |
+
else:
|
| 82 |
+
x = x.to(device)
|
| 83 |
+
b = b.to(device)
|
| 84 |
+
|
| 85 |
+
_, _, attention_weights = model(x, b, return_attention=True, return_flow_attention=return_flow_attention)
|
| 86 |
+
|
| 87 |
+
if not return_flow_attention:
|
| 88 |
+
if average_heads:
|
| 89 |
+
attention_weights = attention_weights.squeeze(-2).mean(dim=1) # Average across heads (batch, 1, seq_len) -> (batch, seq_len)
|
| 90 |
+
else:
|
| 91 |
+
attention_weights = attention_weights.squeeze(-2) # (batch, num_heads, 1, seq_len) -> (batch, num_heads, seq_len)
|
| 92 |
+
|
| 93 |
+
# if hasattr(attention_weights, 'numpy'):
|
| 94 |
+
# attention_weights = attention_weights.cpu().numpy()
|
| 95 |
+
all_attention_weights.append(attention_weights)
|
| 96 |
+
|
| 97 |
+
if not return_flow_attention:
|
| 98 |
+
return np.concatenate(all_attention_weights, axis=0) # (n_samples, seq_len) or (n_samples, num_heads, seq_len)
|
| 99 |
+
else:
|
| 100 |
+
att_w = {'rna': [], 'atac': [], 'flux': [], 'cls': []}
|
| 101 |
+
# noew we have a dict. So concatenating all values for each key
|
| 102 |
+
num_layers_mlm = len(all_attention_weights[0]['rna'])
|
| 103 |
+
num_layers_cls = len(all_attention_weights[0]['cls']) if isinstance(all_attention_weights[0]['cls'], list) else 1
|
| 104 |
+
|
| 105 |
+
for key in all_attention_weights[0].keys():
|
| 106 |
+
key_all_attentions = []
|
| 107 |
+
for batch_row in all_attention_weights:
|
| 108 |
+
modality_batch_attention_layers = batch_row[key]
|
| 109 |
+
if isinstance(modality_batch_attention_layers, list):
|
| 110 |
+
for i, modality_attention_layers in enumerate(modality_batch_attention_layers):
|
| 111 |
+
modality_batch_attention_layers[i] = modality_attention_layers.cpu()
|
| 112 |
+
key_all_attentions.append(modality_batch_attention_layers)
|
| 113 |
+
else:
|
| 114 |
+
key_all_attentions.append([modality_batch_attention_layers.cpu()])
|
| 115 |
+
# now I have a list of attention weights for each batch in each layer [[layer0_att_weights_batch1, layer1_att_weights_batch1, ...], [layer0_att_weights_batch2, layer1_att_weights_batch2, ...], ...]
|
| 116 |
+
# I want to concatenate all the attention weights for each layer
|
| 117 |
+
num_layers = num_layers_cls if key == 'cls' else num_layers_mlm
|
| 118 |
+
att_w[key] = [torch.cat([layer[i] for layer in key_all_attentions], axis=0) for i in range(num_layers)]
|
| 119 |
+
return att_w
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
# def compute_attention_rollout(attention_weights):
|
| 123 |
+
# num_layers = len(attention_weights)
|
| 124 |
+
# combined_attention = torch.eye(attention_weights[0].size(-1)).to(attention_weights[0].device)
|
| 125 |
+
# for layer in range(num_layers):
|
| 126 |
+
# layer_attention = attention_weights[layer].mean(dim=1) # Average over heads
|
| 127 |
+
# combined_attention = torch.matmul(layer_attention, combined_attention)
|
| 128 |
+
# return combined_attention
|
| 129 |
+
def compute_attention_rollout(attention_weights):
|
| 130 |
+
"""
|
| 131 |
+
Computes the attention rollout for a batch of samples.
|
| 132 |
+
Expects attention_weights to be a list (length=num_layers) of tensors
|
| 133 |
+
with shape (batch, num_heads, seq_len, seq_len). For each layer, we average
|
| 134 |
+
over the heads and then compute the rollout per sample.
|
| 135 |
+
|
| 136 |
+
Returns:
|
| 137 |
+
rollout: A tensor of shape (batch, seq_len, seq_len) representing the
|
| 138 |
+
effective attention from the input token (typically CLS) to all tokens.
|
| 139 |
+
"""
|
| 140 |
+
num_layers = len(attention_weights)
|
| 141 |
+
# Get batch size and sequence length from the first layer's tensor
|
| 142 |
+
batch_size, num_heads, seq_len, _ = attention_weights[0].shape
|
| 143 |
+
|
| 144 |
+
# Initialize the combined attention as the identity matrix for each sample
|
| 145 |
+
combined_attention = torch.eye(seq_len, device=attention_weights[0].device)
|
| 146 |
+
combined_attention = combined_attention.unsqueeze(0).repeat(batch_size, 1, 1)
|
| 147 |
+
|
| 148 |
+
for layer in range(num_layers):
|
| 149 |
+
# Average over heads to get a (batch, seq_len, seq_len) tensor for this layer
|
| 150 |
+
layer_attention = attention_weights[layer].mean(dim=1)
|
| 151 |
+
# Update the rollout for each sample using batched matrix multiplication
|
| 152 |
+
combined_attention = torch.bmm(layer_attention, combined_attention)
|
| 153 |
+
return combined_attention
|
| 154 |
+
def multimodal_attention_rollout(all_attention_weights):
|
| 155 |
+
rna_rollout = compute_attention_rollout(all_attention_weights['rna'])
|
| 156 |
+
atac_rollout = compute_attention_rollout(all_attention_weights['atac'])
|
| 157 |
+
flux_rollout = compute_attention_rollout(all_attention_weights['flux'])
|
| 158 |
+
|
| 159 |
+
cls_attention = all_attention_weights['cls'][0].mean(dim=1).squeeze(1) # Average over heads
|
| 160 |
+
|
| 161 |
+
# Split CLS attention for each modality
|
| 162 |
+
rna_cls_attn, atac_cls_attn, flux_cls_attn = cls_attention.split(
|
| 163 |
+
[rna_rollout.size(1), atac_rollout.size(1), flux_rollout.size(1)], dim=1)
|
| 164 |
+
|
| 165 |
+
final_rollout = torch.cat([
|
| 166 |
+
rna_cls_attn.unsqueeze(1) @ rna_rollout,
|
| 167 |
+
atac_cls_attn.unsqueeze(1) @ atac_rollout,
|
| 168 |
+
flux_cls_attn.unsqueeze(1) @ flux_rollout
|
| 169 |
+
], dim=2)
|
| 170 |
+
|
| 171 |
+
return final_rollout.squeeze(1) # remove head dimension [samples, tokens]
|
| 172 |
+
|
| 173 |
+
def print_top_features(attention_weights, feature_names, top_n=5, modality=None):
|
| 174 |
+
print(f"\nTop {top_n} attended features ({modality} samples):")
|
| 175 |
+
avg_attention = attention_weights.mean(axis=0).numpy() if hasattr(attention_weights, 'numpy') else attention_weights.mean(axis=0)
|
| 176 |
+
top_indices = avg_attention.argsort()[-top_n:][::-1]
|
| 177 |
+
for i in top_indices:
|
| 178 |
+
print(f"{feature_names[i]}: {avg_attention[i]:.4f}")
|
| 179 |
+
|
| 180 |
+
def get_top_features(attention_weights, feature_names, top_n=100, modality=None):
|
| 181 |
+
ls = []
|
| 182 |
+
avg_attention = attention_weights.mean(axis=0).numpy() if hasattr(attention_weights, 'numpy') else attention_weights.mean(axis=0)
|
| 183 |
+
if top_n:
|
| 184 |
+
top_indices = avg_attention.argsort()[-top_n:][::-1]
|
| 185 |
+
else:
|
| 186 |
+
top_indices = avg_attention.argsort()[::-1]
|
| 187 |
+
|
| 188 |
+
for i in top_indices:
|
| 189 |
+
ls.append((feature_names[i],avg_attention[i]))
|
| 190 |
+
return ls
|
| 191 |
+
|
| 192 |
+
from scipy.sparse.csgraph import maximum_flow
|
| 193 |
+
|
| 194 |
+
def compute_attention_flow(attention_weights):
|
| 195 |
+
num_layers = len(attention_weights)
|
| 196 |
+
num_tokens = attention_weights[0].size(-1)
|
| 197 |
+
|
| 198 |
+
# Create adjacency matrix for the flow network
|
| 199 |
+
adj_matrix = np.zeros((num_layers * num_tokens, num_layers * num_tokens))
|
| 200 |
+
|
| 201 |
+
for i in range(num_layers - 1):
|
| 202 |
+
layer_attention = attention_weights[i].mean(dim=1).cpu().numpy() # Average over heads
|
| 203 |
+
start_idx = i * num_tokens
|
| 204 |
+
end_idx = (i + 1) * num_tokens
|
| 205 |
+
adj_matrix[start_idx:end_idx, end_idx:(end_idx + num_tokens)] = layer_attention
|
| 206 |
+
|
| 207 |
+
for i in range(num_layers - 1):
|
| 208 |
+
start_idx = i * num_tokens
|
| 209 |
+
end_idx = (i + 1) * num_tokens
|
| 210 |
+
adj_matrix[start_idx:end_idx, end_idx:(end_idx + num_tokens)] += np.eye(num_tokens)
|
| 211 |
+
|
| 212 |
+
flows = np.zeros((num_tokens, num_tokens))
|
| 213 |
+
for i in range(num_tokens):
|
| 214 |
+
source = i
|
| 215 |
+
for j in range(num_tokens):
|
| 216 |
+
sink = (num_layers - 1) * num_tokens + j
|
| 217 |
+
_, flow = maximum_flow(csr_matrix(adj_matrix), source, sink)
|
| 218 |
+
flows[i, j] = flow
|
| 219 |
+
|
| 220 |
+
return torch.tensor(flows, device=attention_weights[0].device)
|
| 221 |
+
|
| 222 |
+
def multimodal_attention_flow(all_attention_weights):
|
| 223 |
+
rna_flow = compute_attention_flow(all_attention_weights['rna'])
|
| 224 |
+
atac_flow = compute_attention_flow(all_attention_weights['atac'])
|
| 225 |
+
flux_flow = compute_attention_flow(all_attention_weights['flux'])
|
| 226 |
+
|
| 227 |
+
cls_attention = all_attention_weights['cls'][0].mean(dim=1).squeeze(1) # Average over heads
|
| 228 |
+
|
| 229 |
+
# Split CLS attention for each modality
|
| 230 |
+
rna_cls_attn, atac_cls_attn, flux_cls_attn = cls_attention.split(
|
| 231 |
+
[rna_flow.size(1), atac_flow.size(1), flux_flow.size(1)], dim=1)
|
| 232 |
+
|
| 233 |
+
# Normalize flows
|
| 234 |
+
rna_flow = rna_flow / rna_flow.sum(dim=1, keepdim=True)
|
| 235 |
+
atac_flow = atac_flow / atac_flow.sum(dim=1, keepdim=True)
|
| 236 |
+
flux_flow = flux_flow / flux_flow.sum(dim=1, keepdim=True)
|
| 237 |
+
|
| 238 |
+
final_flow = torch.cat([
|
| 239 |
+
rna_cls_attn.unsqueeze(1) @ rna_flow,
|
| 240 |
+
atac_cls_attn.unsqueeze(1) @ atac_flow,
|
| 241 |
+
flux_cls_attn.unsqueeze(1) @ flux_flow
|
| 242 |
+
], dim=2)
|
| 243 |
+
|
| 244 |
+
return final_flow.squeeze(1)
|
interpretation/latentspace.py
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from torch.utils.data import DataLoader, Subset
|
| 3 |
+
import numpy as np
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
import matplotlib.pyplot as plt
|
| 6 |
+
from models import SingleTransformer
|
| 7 |
+
from utils.helpers import create_multimodal_model
|
| 8 |
+
from data.create_dataset import MultiModalDataset
|
| 9 |
+
from .attentions import filter_idx
|
| 10 |
+
|
| 11 |
+
def get_latent_space(id, fold_results, labelled_dataset,
|
| 12 |
+
model_config, device, batch_size=32, common_samples=True):
|
| 13 |
+
|
| 14 |
+
if id not in ['RNA', 'ATAC', 'Flux', 'Multi']:
|
| 15 |
+
raise ValueError("id must be one of 'RNA', 'ATAC', 'Flux', 'Multi'")
|
| 16 |
+
|
| 17 |
+
latent_space = []
|
| 18 |
+
labels = []
|
| 19 |
+
preds = []
|
| 20 |
+
for fold in fold_results:
|
| 21 |
+
model_path = fold['best_model_path']
|
| 22 |
+
val_idx = fold['val_idx']
|
| 23 |
+
if common_samples:
|
| 24 |
+
val_idx = filter_idx(labelled_dataset, val_idx)
|
| 25 |
+
val_ds = Subset(labelled_dataset, val_idx)
|
| 26 |
+
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
|
| 27 |
+
if id=='Multi':
|
| 28 |
+
model = create_multimodal_model(model_config, device, use_mlm=False)
|
| 29 |
+
else:
|
| 30 |
+
model = SingleTransformer(id=id, **model_config).to(device)
|
| 31 |
+
|
| 32 |
+
# Load weights to CPU first, then move to target device (handles CUDA->MPS/CPU transfer)
|
| 33 |
+
state_dict = torch.load(model_path, map_location='cpu')
|
| 34 |
+
model.load_state_dict(state_dict)
|
| 35 |
+
model = model.to(device)
|
| 36 |
+
model.eval()
|
| 37 |
+
with torch.no_grad():
|
| 38 |
+
for batch in val_loader:
|
| 39 |
+
x, b, y = batch
|
| 40 |
+
if isinstance(x, list):
|
| 41 |
+
rna= x[0].to(device)
|
| 42 |
+
atac = x[1].to(device)
|
| 43 |
+
flux = x[2].to(device)
|
| 44 |
+
x = (rna, atac, flux)
|
| 45 |
+
else:
|
| 46 |
+
x = x.to(device)
|
| 47 |
+
b = b.to(device)
|
| 48 |
+
|
| 49 |
+
ls, pred = model.get_latent_space(x, b)
|
| 50 |
+
latent_space.append(ls.cpu().numpy())
|
| 51 |
+
labels.append(y.numpy())
|
| 52 |
+
preds.append(pred.cpu().numpy())
|
| 53 |
+
latent_space = np.concatenate(latent_space)
|
| 54 |
+
labels = np.concatenate(labels)
|
| 55 |
+
preds = np.concatenate(preds)
|
| 56 |
+
preds = np.round(preds)
|
| 57 |
+
return latent_space, labels, preds
|
| 58 |
+
|
| 59 |
+
def get_latent_space_cached(models, fold_results, dataset, device, batch_size=64, common_samples=True):
|
| 60 |
+
"""
|
| 61 |
+
Compute latent space using preloaded models.
|
| 62 |
+
"""
|
| 63 |
+
latent_space = []
|
| 64 |
+
labels = []
|
| 65 |
+
preds = []
|
| 66 |
+
for model, fold in zip(models, fold_results):
|
| 67 |
+
val_idx = fold['val_idx']
|
| 68 |
+
if common_samples:
|
| 69 |
+
val_idx = filter_idx(dataset, val_idx)
|
| 70 |
+
val_ds = Subset(dataset, val_idx)
|
| 71 |
+
# Increase batch size to speed up inference
|
| 72 |
+
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
|
| 73 |
+
model.eval()
|
| 74 |
+
with torch.no_grad():
|
| 75 |
+
for batch in val_loader:
|
| 76 |
+
x, b, y = batch
|
| 77 |
+
if isinstance(x, list):
|
| 78 |
+
# For multimodal inputs, move each modality to device
|
| 79 |
+
rna = x[0].to(device)
|
| 80 |
+
atac = x[1].to(device)
|
| 81 |
+
flux = x[2].to(device)
|
| 82 |
+
x = (rna, atac, flux)
|
| 83 |
+
else:
|
| 84 |
+
x = x.to(device)
|
| 85 |
+
b = b.to(device)
|
| 86 |
+
ls, pred = model.get_latent_space(x, b)
|
| 87 |
+
latent_space.append(ls.cpu().numpy())
|
| 88 |
+
labels.append(y.numpy())
|
| 89 |
+
preds.append(pred.cpu().numpy())
|
| 90 |
+
latent_space = np.concatenate(latent_space)
|
| 91 |
+
labels = np.concatenate(labels)
|
| 92 |
+
preds = np.concatenate(preds)
|
| 93 |
+
preds = np.round(preds)
|
| 94 |
+
return latent_space, labels, preds
|
| 95 |
+
|
| 96 |
+
def measure_shift(original_latent, perturbed_latent):
|
| 97 |
+
return np.mean(np.linalg.norm(original_latent - perturbed_latent, axis=1))
|
| 98 |
+
|
| 99 |
+
def perturb_feature(data, feature_idx, perturbation_type='additive', scale=0.1, min_samples_threshold=10):
|
| 100 |
+
perturbed_data = data.clone()
|
| 101 |
+
non_zero_rows_mask = data[:, feature_idx] != 0
|
| 102 |
+
|
| 103 |
+
# Check if feature has enough non-zero samples
|
| 104 |
+
if non_zero_rows_mask.sum() < min_samples_threshold:
|
| 105 |
+
return None, True # Return None and flag indicating insufficient samples
|
| 106 |
+
|
| 107 |
+
if perturbation_type == 'shuffle':
|
| 108 |
+
# Shuffle only non-zero values (preserves sparsity pattern)
|
| 109 |
+
non_zero_values = perturbed_data[non_zero_rows_mask, feature_idx].clone()
|
| 110 |
+
shuffled_idx = torch.randperm(non_zero_values.size(0), device=perturbed_data.device)
|
| 111 |
+
perturbed_data[non_zero_rows_mask, feature_idx] = non_zero_values[shuffled_idx]
|
| 112 |
+
|
| 113 |
+
elif perturbation_type == 'shuffle_all':
|
| 114 |
+
# Shuffle all values (including zeros)
|
| 115 |
+
shuffled_idx = torch.randperm(perturbed_data.size(0), device=perturbed_data.device)
|
| 116 |
+
perturbed_data[:, feature_idx] = data[shuffled_idx, feature_idx]
|
| 117 |
+
|
| 118 |
+
elif perturbation_type == 'additive':
|
| 119 |
+
noise = torch.randn_like(perturbed_data[:, feature_idx].float()) * scale * torch.std(perturbed_data[:, feature_idx].float())
|
| 120 |
+
noise = noise.to(perturbed_data.device)
|
| 121 |
+
|
| 122 |
+
if data.dtype == torch.int32:
|
| 123 |
+
perturbed_data[non_zero_rows_mask, feature_idx] += torch.tensor(noise[non_zero_rows_mask], dtype=torch.int32).to(perturbed_data.device)
|
| 124 |
+
else:
|
| 125 |
+
perturbed_data[non_zero_rows_mask, feature_idx] += noise[non_zero_rows_mask]
|
| 126 |
+
|
| 127 |
+
elif perturbation_type == 'multiplicative':
|
| 128 |
+
factor = 1 + scale * (torch.rand(perturbed_data.shape[0], device=perturbed_data.device) - 0.5)
|
| 129 |
+
if data.dtype == torch.int32:
|
| 130 |
+
perturbed_data[non_zero_rows_mask, feature_idx] = torch.tensor(
|
| 131 |
+
perturbed_data[non_zero_rows_mask, feature_idx].float() * factor[non_zero_rows_mask],
|
| 132 |
+
dtype=torch.int32).to(perturbed_data.device)
|
| 133 |
+
else:
|
| 134 |
+
perturbed_data[non_zero_rows_mask, feature_idx] *= factor[non_zero_rows_mask]
|
| 135 |
+
|
| 136 |
+
return perturbed_data, False # Return perturbed data and flag indicating sufficient samples
|
| 137 |
+
|
| 138 |
+
def analyze_feature_importance_multi(id, model_config, fold_results, dataset, feature_names,
|
| 139 |
+
device, analyse_features='all', perturbation_scale=0.1, min_samples_threshold=10, common_samples=True):
|
| 140 |
+
if analyse_features not in ['all', 'RNA', 'ATAC', 'Flux']:
|
| 141 |
+
raise ValueError("analyse_features must be one of 'all', 'RNA', 'ATAC', 'Flux'")
|
| 142 |
+
|
| 143 |
+
models = []
|
| 144 |
+
for fold in fold_results:
|
| 145 |
+
model_path = fold['best_model_path']
|
| 146 |
+
if id == 'Multi':
|
| 147 |
+
model = create_multimodal_model(model_config, device, use_mlm=False)
|
| 148 |
+
else:
|
| 149 |
+
model = SingleTransformer(id=id, **model_config).to(device)
|
| 150 |
+
# Load weights to CPU first, then move to target device (handles CUDA->MPS/CPU transfer)
|
| 151 |
+
state_dict = torch.load(model_path, map_location='cpu')
|
| 152 |
+
model.load_state_dict(state_dict)
|
| 153 |
+
model = model.to(device)
|
| 154 |
+
model.eval()
|
| 155 |
+
models.append(model)
|
| 156 |
+
|
| 157 |
+
# Compute the original latent space once using the cached models
|
| 158 |
+
original_latent, _, _ = get_latent_space_cached(models, fold_results, dataset, device, batch_size=64, common_samples=common_samples)
|
| 159 |
+
|
| 160 |
+
feature_shifts = []
|
| 161 |
+
skipped_features = [] # Track features skipped due to insufficient samples
|
| 162 |
+
# Unpack multi-modal data
|
| 163 |
+
X, b, y = (dataset.rna_data, dataset.atac_data, dataset.flux_data), dataset.batch_no, dataset.labels
|
| 164 |
+
rna_input, atac_input, flux_input = X[0], X[1], X[2]
|
| 165 |
+
atac_start = rna_input.shape[1] + 1
|
| 166 |
+
flux_start = atac_start + atac_input.shape[1] + 1
|
| 167 |
+
print("atac start", atac_start, "flux start", flux_start)
|
| 168 |
+
perturb_type = 'shuffle'
|
| 169 |
+
if analyse_features in ['RNA', 'all']:
|
| 170 |
+
print("Analyzing RNA features")
|
| 171 |
+
print("Permuting RNA features with", perturb_type)
|
| 172 |
+
for i in tqdm(range(rna_input.shape[1])):
|
| 173 |
+
# Choose perturbation type based on the mean value
|
| 174 |
+
#if rna_input[:, i].float().mean() < 10 else 'multiplicative'
|
| 175 |
+
perturbed_rna, insufficient_samples = perturb_feature(rna_input, i, perturb_type, scale=perturbation_scale, min_samples_threshold=min_samples_threshold)
|
| 176 |
+
if insufficient_samples:
|
| 177 |
+
skipped_features.append((feature_names[i], "RNA", (rna_input[:, i] != 0).sum().item()))
|
| 178 |
+
feature_shifts.append((feature_names[i], 0.0)) # Add with 0 importance
|
| 179 |
+
else:
|
| 180 |
+
perturbed_dataset = MultiModalDataset((perturbed_rna, atac_input, flux_input), b, y)
|
| 181 |
+
perturbed_latent, _, _ = get_latent_space_cached(models, fold_results, perturbed_dataset, device, batch_size=64, common_samples=common_samples)
|
| 182 |
+
shift = measure_shift(original_latent, perturbed_latent)
|
| 183 |
+
feature_shifts.append((feature_names[i], shift))
|
| 184 |
+
|
| 185 |
+
if analyse_features in ['ATAC', 'all']:
|
| 186 |
+
print("Analyzing ATAC features")
|
| 187 |
+
print("Permuting ATAC features with", perturb_type)
|
| 188 |
+
for i in tqdm(range(atac_input.shape[1])):
|
| 189 |
+
perturbed_atac, insufficient_samples = perturb_feature(atac_input, i, perturb_type, perturbation_scale, min_samples_threshold=min_samples_threshold)
|
| 190 |
+
if insufficient_samples:
|
| 191 |
+
skipped_features.append((feature_names[atac_start + i], "ATAC", (atac_input[:, i] != 0).sum().item()))
|
| 192 |
+
feature_shifts.append((feature_names[atac_start + i], 0.0)) # Add with 0 importance
|
| 193 |
+
else:
|
| 194 |
+
perturbed_dataset = MultiModalDataset((rna_input, perturbed_atac, flux_input), b, y)
|
| 195 |
+
perturbed_latent, _, _ = get_latent_space_cached(models, fold_results, perturbed_dataset, device, batch_size=64, common_samples=common_samples)
|
| 196 |
+
shift = measure_shift(original_latent, perturbed_latent)
|
| 197 |
+
feature_shifts.append((feature_names[atac_start + i], shift))
|
| 198 |
+
|
| 199 |
+
if analyse_features in ['Flux', 'all']:
|
| 200 |
+
print("Permuting Flux features with", perturb_type)
|
| 201 |
+
print("Analyzing Flux features")
|
| 202 |
+
for i in tqdm(range(flux_input.shape[1])):
|
| 203 |
+
perturbed_flux, insufficient_samples = perturb_feature(flux_input, i, 'shuffle_all', perturbation_scale, min_samples_threshold=min_samples_threshold)
|
| 204 |
+
if insufficient_samples:
|
| 205 |
+
skipped_features.append((feature_names[flux_start + i], "Flux", (flux_input[:, i] != 0).sum().item()))
|
| 206 |
+
feature_shifts.append((feature_names[flux_start + i], 0.0)) # Add with 0 importance
|
| 207 |
+
else:
|
| 208 |
+
perturbed_dataset = MultiModalDataset((rna_input, atac_input, perturbed_flux), b, y)
|
| 209 |
+
perturbed_latent, _, _ = get_latent_space_cached(models, fold_results, perturbed_dataset, device, batch_size=64, common_samples=common_samples)
|
| 210 |
+
shift = measure_shift(original_latent, perturbed_latent)
|
| 211 |
+
feature_shifts.append((feature_names[flux_start + i], shift))
|
| 212 |
+
|
| 213 |
+
# Log skipped features
|
| 214 |
+
if skipped_features:
|
| 215 |
+
print(f"\nSkipped {len(skipped_features)} features due to insufficient samples (< {min_samples_threshold}):")
|
| 216 |
+
for feature_name, modality, sample_count in skipped_features:
|
| 217 |
+
print(f" {feature_name} ({modality}): {sample_count} samples")
|
| 218 |
+
|
| 219 |
+
return sorted(feature_shifts, key=lambda x: x[1], reverse=True)
|
interpretation/metrics.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import torch
|
| 3 |
+
from torch.utils.data import DataLoader, Subset
|
| 4 |
+
from sklearn.metrics import confusion_matrix
|
| 5 |
+
from models import SingleTransformer
|
| 6 |
+
from utils.helpers import create_multimodal_model
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def compute_confusion_matrices(id, model_config, fold_results, dataset, device):
|
| 10 |
+
"""
|
| 11 |
+
Get confusion matrices for each fold and aggregate them.
|
| 12 |
+
Args:
|
| 13 |
+
id (str): Model ID.
|
| 14 |
+
model_config (dict): Model configuration.
|
| 15 |
+
fold_results (list): List of dictionaries containing fold results.
|
| 16 |
+
cls_valid_loader (torch.utils.data.DataLoader): Validation data loader.
|
| 17 |
+
device (str): Device to use.
|
| 18 |
+
Returns:
|
| 19 |
+
list: List of confusion matrices for each fold and the aggregated confusion
|
| 20 |
+
matrix.
|
| 21 |
+
"""
|
| 22 |
+
if id not in ['RNA', 'ATAC', 'Flux', 'Multi']:
|
| 23 |
+
raise ValueError("id must be one of 'RNA', 'ATAC', 'Flux', 'Multi'")
|
| 24 |
+
# Initialize an empty confusion matrix for aggregation
|
| 25 |
+
agg_cm = np.zeros((2, 2), dtype=int)
|
| 26 |
+
cms = []
|
| 27 |
+
|
| 28 |
+
for i, fold in enumerate(fold_results, 1):
|
| 29 |
+
model_path = fold['best_model_path']
|
| 30 |
+
state_dict = torch.load(model_path)
|
| 31 |
+
val_subset = Subset(dataset, fold['val_idx'])
|
| 32 |
+
cls_valid_loader = DataLoader(val_subset, batch_size=32, shuffle=False)
|
| 33 |
+
|
| 34 |
+
if id=='Multi':
|
| 35 |
+
model = create_multimodal_model(model_config, device, use_mlm=False)
|
| 36 |
+
else:
|
| 37 |
+
model = SingleTransformer(id, **model_config).to(device)
|
| 38 |
+
|
| 39 |
+
model.load_state_dict(state_dict, strict=True)
|
| 40 |
+
model.eval()
|
| 41 |
+
|
| 42 |
+
val_preds, val_labels = [], []
|
| 43 |
+
with torch.no_grad():
|
| 44 |
+
for inputs, bi, y in cls_valid_loader:
|
| 45 |
+
if isinstance(inputs, list):
|
| 46 |
+
rna= inputs[0].to(device)
|
| 47 |
+
atac = inputs[1].to(device)
|
| 48 |
+
flux = inputs[2].to(device)
|
| 49 |
+
inputs = (rna, atac, flux)
|
| 50 |
+
else:
|
| 51 |
+
inputs = inputs.to(device)
|
| 52 |
+
bi, y = bi.to(device), y.to(device)
|
| 53 |
+
|
| 54 |
+
preds, _ = model(inputs, bi)
|
| 55 |
+
preds = preds.cpu().numpy()
|
| 56 |
+
val_preds.append(preds)
|
| 57 |
+
val_labels.append(y.cpu().numpy())
|
| 58 |
+
|
| 59 |
+
val_preds = np.concatenate(val_preds).ravel()
|
| 60 |
+
val_labels = np.concatenate(val_labels).ravel()
|
| 61 |
+
|
| 62 |
+
binary_preds = (val_preds >= 0.5).astype(int)
|
| 63 |
+
# print(f"Fold {i} Confusion Matrix:", val_preds)
|
| 64 |
+
cm = confusion_matrix(val_labels, binary_preds)
|
| 65 |
+
agg_cm += cm
|
| 66 |
+
cms.append(cm)
|
| 67 |
+
|
| 68 |
+
cms.append(agg_cm)
|
| 69 |
+
return cms
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def compute_metrics_from_confusion_matrix(cm):
|
| 73 |
+
"""
|
| 74 |
+
Compute classification metrics from a confusion matrix.
|
| 75 |
+
Args:
|
| 76 |
+
cm (np.array): Confusion matrix.
|
| 77 |
+
Returns:
|
| 78 |
+
dict: Dictionary containing classification metrics.
|
| 79 |
+
"""
|
| 80 |
+
# in cm results of 5 folds are saved in a list. compute this metrics for each fold
|
| 81 |
+
# then return the average of them and the std
|
| 82 |
+
metrics_list = []
|
| 83 |
+
for fold_cm in cm[:-1]: # Exclude the aggregated confusion matrix
|
| 84 |
+
tn, fp, fn, tp = fold_cm.ravel()
|
| 85 |
+
precision = tp / (tp + fp) if tp + fp > 0 else 0
|
| 86 |
+
recall = tp / (tp + fn) if tp + fn > 0 else 0
|
| 87 |
+
f1 = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
|
| 88 |
+
accuracy = (tp + tn) / (tp + tn + fp + fn) if tp + tn + fp + fn > 0 else 0
|
| 89 |
+
metrics_list.append({
|
| 90 |
+
'precision': precision,
|
| 91 |
+
'recall': recall,
|
| 92 |
+
'f1': f1,
|
| 93 |
+
'accuracy': accuracy,
|
| 94 |
+
})
|
| 95 |
+
|
| 96 |
+
avg_metrics = {
|
| 97 |
+
'precision': np.mean([m['precision'] for m in metrics_list]),
|
| 98 |
+
'recall': np.mean([m['recall'] for m in metrics_list]),
|
| 99 |
+
'f1': np.mean([m['f1'] for m in metrics_list]),
|
| 100 |
+
'accuracy': np.mean([m['accuracy'] for m in metrics_list]),
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
std_metrics = {
|
| 104 |
+
'precision': np.std([m['precision'] for m in metrics_list]),
|
| 105 |
+
'recall': np.std([m['recall'] for m in metrics_list]),
|
| 106 |
+
'f1': np.std([m['f1'] for m in metrics_list]),
|
| 107 |
+
'accuracy': np.std([m['accuracy'] for m in metrics_list]),
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
return {
|
| 111 |
+
'average': avg_metrics,
|
| 112 |
+
'std': std_metrics,
|
| 113 |
+
}
|
interpretation/predictions.py
ADDED
|
@@ -0,0 +1,359 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Validation Results Analysis
|
| 3 |
+
This module provides functions to create comprehensive DataFrames containing
|
| 4 |
+
sample-level predictions, labels, and metadata from cross-validation results.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import numpy as np
|
| 9 |
+
import torch
|
| 10 |
+
from torch.utils.data import DataLoader, Subset
|
| 11 |
+
from utils.helpers import create_multimodal_model
|
| 12 |
+
from models import SingleTransformer
|
| 13 |
+
|
| 14 |
+
def get_sample_predictions_dataframe(
|
| 15 |
+
model_type,
|
| 16 |
+
multimodal_dataset,
|
| 17 |
+
fold_results,
|
| 18 |
+
model_config,
|
| 19 |
+
device='cpu',
|
| 20 |
+
batch_size=32,
|
| 21 |
+
adata_rna=None,
|
| 22 |
+
adata_atac=None,
|
| 23 |
+
threshold=0.5
|
| 24 |
+
):
|
| 25 |
+
"""
|
| 26 |
+
Creates a comprehensive DataFrame with sample-level predictions and metadata.
|
| 27 |
+
|
| 28 |
+
Parameters
|
| 29 |
+
----------
|
| 30 |
+
model_type : str
|
| 31 |
+
Type of model: 'Multi', 'RNA', 'ATAC', or 'Flux'
|
| 32 |
+
multimodal_dataset : MultiModalDataset
|
| 33 |
+
The multimodal dataset containing all samples
|
| 34 |
+
fold_results : list
|
| 35 |
+
List of fold result dictionaries from cross-validation
|
| 36 |
+
model_config : dict
|
| 37 |
+
Model configuration dictionary
|
| 38 |
+
device : str, optional
|
| 39 |
+
Device to run predictions on ('cpu', 'cuda', 'mps')
|
| 40 |
+
batch_size : int, optional
|
| 41 |
+
Batch size for predictions
|
| 42 |
+
adata_rna : AnnData, optional
|
| 43 |
+
RNA AnnData object for additional metadata
|
| 44 |
+
adata_atac : AnnData, optional
|
| 45 |
+
ATAC AnnData object for additional metadata
|
| 46 |
+
threshold : float, optional
|
| 47 |
+
Classification threshold for binary predictions (default: 0.5)
|
| 48 |
+
|
| 49 |
+
Returns
|
| 50 |
+
-------
|
| 51 |
+
pd.DataFrame
|
| 52 |
+
DataFrame with columns:
|
| 53 |
+
- ind: Sample index in the dataset
|
| 54 |
+
- fold: Fold number
|
| 55 |
+
- label_numeric: Actual label (0 or 1)
|
| 56 |
+
- label: Actual label name ('dead-end' or 'reprogramming')
|
| 57 |
+
- predicted_value: Predicted probability [0, 1]
|
| 58 |
+
- predicted_class_numeric: Predicted class (0 or 1)
|
| 59 |
+
- predicted_class: Predicted class name ('dead-end' or 'reprogramming')
|
| 60 |
+
- correct: Whether prediction matches label
|
| 61 |
+
- abs_error: Absolute error of prediction
|
| 62 |
+
- modality: Available modalities for this sample (e.g., 'RAF', 'A', 'RF')
|
| 63 |
+
- batch_no: Batch number
|
| 64 |
+
- pct: Percentage metadata (if available)
|
| 65 |
+
- clone_size: Clone size (if available)
|
| 66 |
+
- clone_id: Clone ID (if available)
|
| 67 |
+
- (additional RNA/ATAC metadata if adata objects provided)
|
| 68 |
+
"""
|
| 69 |
+
|
| 70 |
+
# Collect all predictions across folds
|
| 71 |
+
all_predictions = []
|
| 72 |
+
all_labels = []
|
| 73 |
+
all_indices = []
|
| 74 |
+
all_folds = []
|
| 75 |
+
|
| 76 |
+
print(f"Processing {len(fold_results)} folds...")
|
| 77 |
+
|
| 78 |
+
for fold_idx, fold in enumerate(fold_results):
|
| 79 |
+
model_path = fold['best_model_path']
|
| 80 |
+
val_idx = fold['val_idx']
|
| 81 |
+
|
| 82 |
+
# Create validation subset
|
| 83 |
+
val_subset = Subset(multimodal_dataset, val_idx)
|
| 84 |
+
val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)
|
| 85 |
+
|
| 86 |
+
# Load model
|
| 87 |
+
if model_type == 'Multi':
|
| 88 |
+
model = create_multimodal_model(model_config, device, use_mlm=False)
|
| 89 |
+
else:
|
| 90 |
+
model = SingleTransformer(id=model_type, **model_config).to(device)
|
| 91 |
+
|
| 92 |
+
# Load weights
|
| 93 |
+
state_dict = torch.load(model_path, map_location='cpu')
|
| 94 |
+
model.load_state_dict(state_dict)
|
| 95 |
+
model = model.to(device)
|
| 96 |
+
model.eval()
|
| 97 |
+
|
| 98 |
+
# Get predictions
|
| 99 |
+
fold_preds = []
|
| 100 |
+
fold_labels = []
|
| 101 |
+
|
| 102 |
+
with torch.no_grad():
|
| 103 |
+
for batch in val_loader:
|
| 104 |
+
x, b, y = batch
|
| 105 |
+
|
| 106 |
+
if isinstance(x, list):
|
| 107 |
+
rna = x[0].to(device)
|
| 108 |
+
atac = x[1].to(device)
|
| 109 |
+
flux = x[2].to(device)
|
| 110 |
+
x = (rna, atac, flux)
|
| 111 |
+
else:
|
| 112 |
+
x = x.to(device)
|
| 113 |
+
|
| 114 |
+
b = b.to(device)
|
| 115 |
+
|
| 116 |
+
# Get predictions
|
| 117 |
+
preds, _ = model(x, b)
|
| 118 |
+
preds = preds.squeeze()
|
| 119 |
+
|
| 120 |
+
if preds.dim() == 0:
|
| 121 |
+
preds = preds.unsqueeze(0)
|
| 122 |
+
if y.dim() == 0:
|
| 123 |
+
y = y.unsqueeze(0)
|
| 124 |
+
|
| 125 |
+
fold_preds.extend(preds.cpu().numpy())
|
| 126 |
+
fold_labels.extend(y.numpy())
|
| 127 |
+
|
| 128 |
+
# Store results
|
| 129 |
+
all_predictions.extend(fold_preds)
|
| 130 |
+
all_labels.extend(fold_labels)
|
| 131 |
+
all_indices.extend(val_idx)
|
| 132 |
+
all_folds.extend([fold_idx + 1] * len(val_idx))
|
| 133 |
+
|
| 134 |
+
print(f" Fold {fold_idx + 1}: {len(val_idx)} samples processed")
|
| 135 |
+
|
| 136 |
+
# Convert to arrays
|
| 137 |
+
all_predictions = np.array(all_predictions)
|
| 138 |
+
all_labels = np.array(all_labels)
|
| 139 |
+
all_indices = np.array(all_indices)
|
| 140 |
+
all_folds = np.array(all_folds)
|
| 141 |
+
|
| 142 |
+
# Determine modality availability for each sample
|
| 143 |
+
modalities = _get_modality_info(multimodal_dataset, all_indices)
|
| 144 |
+
|
| 145 |
+
# Get additional metadata
|
| 146 |
+
df_indices = multimodal_dataset.df_indics if hasattr(multimodal_dataset, 'df_indics') else None
|
| 147 |
+
pcts = multimodal_dataset.pcts if hasattr(multimodal_dataset, 'pcts') else None
|
| 148 |
+
label_names = multimodal_dataset.label_names if hasattr(multimodal_dataset, 'label_names') else None
|
| 149 |
+
|
| 150 |
+
# Build base dataframe
|
| 151 |
+
samples_data = []
|
| 152 |
+
|
| 153 |
+
for i, (idx, pred, label, fold) in enumerate(zip(all_indices, all_predictions, all_labels, all_folds)):
|
| 154 |
+
# Compute error
|
| 155 |
+
abs_error = abs(label - pred)
|
| 156 |
+
|
| 157 |
+
# Determine if correct
|
| 158 |
+
pred_class = int(pred >= threshold)
|
| 159 |
+
is_correct = pred_class == int(label)
|
| 160 |
+
|
| 161 |
+
# Get batch number
|
| 162 |
+
batch_no = int(multimodal_dataset.batch_no[idx].item())
|
| 163 |
+
|
| 164 |
+
# Base sample info
|
| 165 |
+
sample_info = {
|
| 166 |
+
'ind': idx,
|
| 167 |
+
'fold': fold,
|
| 168 |
+
'label_numeric': int(label),
|
| 169 |
+
'label': 'reprogramming' if label == 1 else 'dead-end',
|
| 170 |
+
'predicted_value': float(pred),
|
| 171 |
+
'predicted_class_numeric': pred_class,
|
| 172 |
+
'predicted_class': 'reprogramming' if pred_class == 1 else 'dead-end',
|
| 173 |
+
'correct': int(is_correct),
|
| 174 |
+
'abs_error': float(abs_error),
|
| 175 |
+
'modality': modalities[i],
|
| 176 |
+
'batch_no': batch_no,
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
# Add percentage if available
|
| 180 |
+
if pcts is not None:
|
| 181 |
+
sample_info['pct'] = float(pcts[idx])
|
| 182 |
+
|
| 183 |
+
# Add additional metadata from AnnData objects if available
|
| 184 |
+
if df_indices is not None and (adata_rna is not None or adata_atac is not None):
|
| 185 |
+
rna_id = df_indices.iloc[idx, 0] if df_indices.shape[1] > 0 else None
|
| 186 |
+
atac_id = df_indices.iloc[idx, 1] if df_indices.shape[1] > 1 else None
|
| 187 |
+
|
| 188 |
+
# Try to get metadata from RNA or ATAC
|
| 189 |
+
metadata_added = False
|
| 190 |
+
|
| 191 |
+
if adata_rna is not None and rna_id is not None and rna_id in adata_rna.obs.index:
|
| 192 |
+
obs = adata_rna.obs.loc[rna_id]
|
| 193 |
+
_add_obs_metadata(sample_info, obs)
|
| 194 |
+
metadata_added = True
|
| 195 |
+
|
| 196 |
+
if not metadata_added and adata_atac is not None and atac_id is not None and atac_id in adata_atac.obs.index:
|
| 197 |
+
obs = adata_atac.obs.loc[atac_id]
|
| 198 |
+
_add_obs_metadata(sample_info, obs)
|
| 199 |
+
|
| 200 |
+
samples_data.append(sample_info)
|
| 201 |
+
|
| 202 |
+
# Create DataFrame
|
| 203 |
+
df_samples = pd.DataFrame(samples_data)
|
| 204 |
+
|
| 205 |
+
# Sort by index for easier analysis
|
| 206 |
+
df_samples = df_samples.sort_values('ind').reset_index(drop=True)
|
| 207 |
+
|
| 208 |
+
print(f"\nTotal samples: {len(df_samples)}")
|
| 209 |
+
print(f"Correct predictions: {df_samples['correct'].sum()} ({100 * df_samples['correct'].mean():.2f}%)")
|
| 210 |
+
print(f"Mean absolute error: {df_samples['abs_error'].mean():.4f}")
|
| 211 |
+
|
| 212 |
+
return df_samples
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
def _get_modality_info(dataset, indices):
|
| 216 |
+
"""
|
| 217 |
+
Determine which modalities are available for each sample.
|
| 218 |
+
|
| 219 |
+
Returns a list of modality strings:
|
| 220 |
+
- 'RAF': RNA, ATAC, Flux all available
|
| 221 |
+
- 'RA': RNA and ATAC available
|
| 222 |
+
- 'RF': RNA and Flux available
|
| 223 |
+
- 'AF': ATAC and Flux available
|
| 224 |
+
- 'R': Only RNA available
|
| 225 |
+
- 'A': Only ATAC available
|
| 226 |
+
- 'F': Only Flux available
|
| 227 |
+
"""
|
| 228 |
+
modalities = []
|
| 229 |
+
|
| 230 |
+
for idx in indices:
|
| 231 |
+
# Check if each modality has data
|
| 232 |
+
has_rna = (dataset.rna_data[idx] != 0).any().item()
|
| 233 |
+
has_atac = (dataset.atac_data[idx] != 0).any().item()
|
| 234 |
+
has_flux = (dataset.flux_data[idx] != 0).any().item()
|
| 235 |
+
|
| 236 |
+
# Build modality string
|
| 237 |
+
modality = ''
|
| 238 |
+
if has_rna:
|
| 239 |
+
modality += 'R'
|
| 240 |
+
if has_atac:
|
| 241 |
+
modality += 'A'
|
| 242 |
+
if has_flux:
|
| 243 |
+
modality += 'F'
|
| 244 |
+
|
| 245 |
+
modalities.append(modality if modality else 'None')
|
| 246 |
+
|
| 247 |
+
return modalities
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
def _add_obs_metadata(sample_info, obs):
|
| 251 |
+
"""Add metadata from AnnData obs to sample_info dictionary."""
|
| 252 |
+
metadata_fields = [
|
| 253 |
+
'clone_size', 'clone_id', 'cells_RNA', 'cells_ATAC',
|
| 254 |
+
'cells_RNA_D3', 'cells_ATAC_D3', 'n_genes', 'phase',
|
| 255 |
+
'G2M_score', 'pct_counts_mt', 'total_counts'
|
| 256 |
+
]
|
| 257 |
+
|
| 258 |
+
for field in metadata_fields:
|
| 259 |
+
if field in obs:
|
| 260 |
+
value = obs[field]
|
| 261 |
+
# Handle different data types
|
| 262 |
+
if pd.notna(value):
|
| 263 |
+
if isinstance(value, (int, float, np.integer, np.floating)):
|
| 264 |
+
sample_info[field] = value
|
| 265 |
+
else:
|
| 266 |
+
sample_info[field] = str(value)
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
def summarize_by_modality(df_samples):
|
| 270 |
+
"""
|
| 271 |
+
Summarize prediction performance by modality.
|
| 272 |
+
|
| 273 |
+
Parameters
|
| 274 |
+
----------
|
| 275 |
+
df_samples : pd.DataFrame
|
| 276 |
+
DataFrame from get_sample_predictions_dataframe
|
| 277 |
+
|
| 278 |
+
Returns
|
| 279 |
+
-------
|
| 280 |
+
pd.DataFrame
|
| 281 |
+
Summary statistics grouped by modality
|
| 282 |
+
"""
|
| 283 |
+
summary = df_samples.groupby('modality').agg({
|
| 284 |
+
'ind': 'count',
|
| 285 |
+
'correct': 'mean',
|
| 286 |
+
'abs_error': 'mean',
|
| 287 |
+
'predicted_value': ['mean', 'std']
|
| 288 |
+
}).round(4)
|
| 289 |
+
|
| 290 |
+
summary.columns = ['n_samples', 'accuracy', 'mean_abs_error', 'mean_pred', 'std_pred']
|
| 291 |
+
summary = summary.reset_index()
|
| 292 |
+
summary = summary.sort_values('n_samples', ascending=False)
|
| 293 |
+
|
| 294 |
+
return summary
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
def summarize_by_fold(df_samples):
|
| 298 |
+
"""
|
| 299 |
+
Summarize prediction performance by fold.
|
| 300 |
+
|
| 301 |
+
Parameters
|
| 302 |
+
----------
|
| 303 |
+
df_samples : pd.DataFrame
|
| 304 |
+
DataFrame from get_sample_predictions_dataframe
|
| 305 |
+
|
| 306 |
+
Returns
|
| 307 |
+
-------
|
| 308 |
+
pd.DataFrame
|
| 309 |
+
Summary statistics grouped by fold
|
| 310 |
+
"""
|
| 311 |
+
summary = df_samples.groupby('fold').agg({
|
| 312 |
+
'ind': 'count',
|
| 313 |
+
'correct': 'mean',
|
| 314 |
+
'abs_error': 'mean',
|
| 315 |
+
'predicted_value': ['mean', 'std']
|
| 316 |
+
}).round(4)
|
| 317 |
+
|
| 318 |
+
summary.columns = ['n_samples', 'accuracy', 'mean_abs_error', 'mean_pred', 'std_pred']
|
| 319 |
+
summary = summary.reset_index()
|
| 320 |
+
|
| 321 |
+
return summary
|
| 322 |
+
def get_misclassified_samples(df_samples):
|
| 323 |
+
"""
|
| 324 |
+
Get only misclassified samples.
|
| 325 |
+
|
| 326 |
+
Parameters
|
| 327 |
+
----------
|
| 328 |
+
df_samples : pd.DataFrame
|
| 329 |
+
DataFrame from get_sample_predictions_dataframe
|
| 330 |
+
|
| 331 |
+
Returns
|
| 332 |
+
-------
|
| 333 |
+
pd.DataFrame
|
| 334 |
+
DataFrame containing only misclassified samples
|
| 335 |
+
"""
|
| 336 |
+
return df_samples[df_samples['correct'] == 0].copy()
|
| 337 |
+
def get_samples_by_modality(df_samples, modality):
|
| 338 |
+
"""
|
| 339 |
+
Get samples filtered by modality.
|
| 340 |
+
|
| 341 |
+
Parameters
|
| 342 |
+
----------
|
| 343 |
+
df_samples : pd.DataFrame
|
| 344 |
+
DataFrame from get_sample_predictions_dataframe
|
| 345 |
+
modality : str
|
| 346 |
+
Modality string (e.g., 'RAF', 'A', 'RF')
|
| 347 |
+
|
| 348 |
+
Returns
|
| 349 |
+
-------
|
| 350 |
+
pd.DataFrame
|
| 351 |
+
Filtered DataFrame
|
| 352 |
+
"""
|
| 353 |
+
return df_samples[df_samples['modality'] == modality].copy()
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
if __name__ == "__main__":
|
| 357 |
+
# Example usage
|
| 358 |
+
print("This module provides functions to analyze validation results.")
|
| 359 |
+
print("Main function: get_sample_predictions_dataframe()")
|
interpretation/shapvalues.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from utils.helpers import create_multimodal_model
|
| 3 |
+
from models import SingleTransformer
|
| 4 |
+
from utils.helpers import get_all_modalities_available_samples
|
| 5 |
+
from data import create_dataset
|
| 6 |
+
import shap
|
| 7 |
+
|
| 8 |
+
def filter_ds(dataset, indices):
|
| 9 |
+
rna = dataset.rna_data[indices]
|
| 10 |
+
atac = dataset.atac_data[indices]
|
| 11 |
+
flux = dataset.flux_data[indices]
|
| 12 |
+
new_ds = create_dataset.MultiModalDataset((rna, atac, flux),
|
| 13 |
+
dataset.batch_no[indices],
|
| 14 |
+
dataset.labels[indices])
|
| 15 |
+
return new_ds
|
| 16 |
+
|
| 17 |
+
def get_background_data(id, dataset, samples=100, return_other_samples=False):
|
| 18 |
+
"""
|
| 19 |
+
Get background data with balanced samples from each label
|
| 20 |
+
Args:
|
| 21 |
+
dataset: MultiModalDataset object
|
| 22 |
+
samples: Number of samples to get
|
| 23 |
+
return_other_samples: If True, return other samples as well
|
| 24 |
+
Returns:
|
| 25 |
+
new_ds: MultiModalDataset object with background samples
|
| 26 |
+
background_indices: Indices of background samples
|
| 27 |
+
other_ds: MultiModalDataset object with other samples
|
| 28 |
+
other_indices: Indices of other samples
|
| 29 |
+
"""
|
| 30 |
+
if id not in ['RNA', 'ATAC', 'Flux', 'Multi']:
|
| 31 |
+
raise ValueError("id must be one of 'RNA', 'ATAC', 'Flux', 'Multi'")
|
| 32 |
+
|
| 33 |
+
if id == 'Multi':
|
| 34 |
+
dataset = get_all_modalities_available_samples(dataset)
|
| 35 |
+
labels = dataset.labels
|
| 36 |
+
|
| 37 |
+
# get a balance of samples between labels
|
| 38 |
+
samples_per_label = samples // len(torch.unique(labels))
|
| 39 |
+
background_indices = []
|
| 40 |
+
for label in torch.unique(labels):
|
| 41 |
+
label_indices = torch.where(labels == label)[0]
|
| 42 |
+
background_indices.extend(label_indices[:samples_per_label])
|
| 43 |
+
background_indices = torch.tensor(background_indices)
|
| 44 |
+
background_rna = dataset.rna_data[background_indices]
|
| 45 |
+
background_atac = dataset.atac_data[background_indices]
|
| 46 |
+
background_flux = dataset.flux_data[background_indices]
|
| 47 |
+
bg_ds = create_dataset.MultiModalDataset((background_rna, background_atac, background_flux),
|
| 48 |
+
dataset.batch_no[background_indices],
|
| 49 |
+
dataset.labels[background_indices])
|
| 50 |
+
if return_other_samples:
|
| 51 |
+
# create a new dataset of other samples
|
| 52 |
+
other_indices = torch.tensor([i for i in range(len(labels)) if i not in background_indices])
|
| 53 |
+
other_rna = dataset.rna_data[other_indices]
|
| 54 |
+
other_atac = dataset.atac_data[other_indices]
|
| 55 |
+
other_flux = dataset.flux_data[other_indices]
|
| 56 |
+
other_ds = create_dataset.MultiModalDataset((other_rna, other_atac, other_flux),
|
| 57 |
+
dataset.batch_no[other_indices],
|
| 58 |
+
dataset.labels[other_indices])
|
| 59 |
+
return bg_ds, background_indices, other_ds, other_indices
|
| 60 |
+
return bg_ds, background_indices
|
| 61 |
+
else:
|
| 62 |
+
raise ValueError("Not Implemented")
|
| 63 |
+
|
| 64 |
+
class ShapWrapper(torch.nn.Module):
|
| 65 |
+
def __init__(self, model):
|
| 66 |
+
super().__init__()
|
| 67 |
+
self.model = model
|
| 68 |
+
self.model.eval()
|
| 69 |
+
|
| 70 |
+
def forward(self, x):
|
| 71 |
+
inputs, b = x[:,:-2], x[:,-1].squeeze(-1).long()
|
| 72 |
+
inputs = (inputs[:,:944].long(), inputs[:,944:944+883].float(), inputs[:,944+883:].float())
|
| 73 |
+
preds, _ = self.model(inputs, b)
|
| 74 |
+
preds = torch.sigmoid(preds)
|
| 75 |
+
# print(preds.shape)
|
| 76 |
+
return preds
|
| 77 |
+
|
| 78 |
+
def compute_shap_values(id, fold_results, dataset, model_config, device):
|
| 79 |
+
|
| 80 |
+
if id not in ['RNA', 'ATAC', 'Flux', 'Multi']:
|
| 81 |
+
raise ValueError("id must be one of 'RNA', 'ATAC', 'Flux', 'Multi'")
|
| 82 |
+
|
| 83 |
+
all_shap_values = []
|
| 84 |
+
|
| 85 |
+
if id == 'Multi':
|
| 86 |
+
bg_ds, bg_idx, other_ds, other_idx = get_background_data(id, dataset, samples=50, return_other_samples=True)
|
| 87 |
+
print("total background samples: ", len(bg_idx), "total test samples: ", len(other_idx))
|
| 88 |
+
|
| 89 |
+
for fold in fold_results:
|
| 90 |
+
val_idx = fold['val_idx']
|
| 91 |
+
# filter val_idx if is in indices
|
| 92 |
+
val_idx = [i for i in val_idx if i in other_idx]
|
| 93 |
+
|
| 94 |
+
if len(val_idx) == 0:
|
| 95 |
+
print('No samples of the specified type in the validation set. Skipping...')
|
| 96 |
+
continue
|
| 97 |
+
else:
|
| 98 |
+
print(f'fold {fold["fold"]} -> {len(val_idx)} samples')
|
| 99 |
+
|
| 100 |
+
val_ds = filter_ds(dataset, val_idx)
|
| 101 |
+
val_loader = torch.utils.data.DataLoader(val_ds, batch_size=32, shuffle=False)
|
| 102 |
+
|
| 103 |
+
if id=='Multi':
|
| 104 |
+
model = create_multimodal_model(model_config, device, use_mlm=False)
|
| 105 |
+
else:
|
| 106 |
+
model = SingleTransformer(id=id, **model_config).to(device)
|
| 107 |
+
|
| 108 |
+
model_path = fold['best_model_path']
|
| 109 |
+
model.load_state_dict(torch.load(model_path))
|
| 110 |
+
model.eval()
|
| 111 |
+
wrapped_model = ShapWrapper(model).to(device)
|
| 112 |
+
|
| 113 |
+
bg_x = torch.cat([bg_ds.rna_data, bg_ds.atac_data, bg_ds.flux_data], dim=1).to(device)
|
| 114 |
+
bg_b = bg_ds.batch_no.to(device)
|
| 115 |
+
bgx = torch.cat([bg_x, bg_b[...,None]], dim=-1)
|
| 116 |
+
explainer = shap.GradientExplainer(wrapped_model, bgx)
|
| 117 |
+
|
| 118 |
+
inputs, batch_indices = (val_ds.rna_data, val_ds.atac_data, val_ds.flux_data), val_ds.batch_no
|
| 119 |
+
|
| 120 |
+
inputs = torch.cat([inputs[0], inputs[1], inputs[2]], dim=1).to(device)
|
| 121 |
+
batch_indices = batch_indices.to(device)
|
| 122 |
+
bgv = torch.cat([inputs, batch_indices[...,None]], dim=-1)
|
| 123 |
+
shap_values = explainer(bgv)
|
| 124 |
+
all_shap_values.append(shap_values)
|
| 125 |
+
|
| 126 |
+
return all_shap_values
|
interpretation/similarity.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from utils.helpers import get_token_embeddings
|
| 3 |
+
|
| 4 |
+
def compute_similarity_matrix(model, dataset, device):
|
| 5 |
+
"""
|
| 6 |
+
Compute the similarity matrix for the dataset.
|
| 7 |
+
Args:
|
| 8 |
+
model (torch.nn.Module): Model.
|
| 9 |
+
dataset (torch.utils.data.Dataset): Dataset.
|
| 10 |
+
device (str): Device to use.
|
| 11 |
+
Returns:
|
| 12 |
+
np.ndarray: Similarity matrix.
|
| 13 |
+
"""
|
| 14 |
+
embeddings = get_token_embeddings(model, dataset, device) # shape: (n_samples, seq_len, d_model)
|
| 15 |
+
|
| 16 |
+
# Compute the mean embedding for each token across all samples
|
| 17 |
+
mean_token_embeddings = embeddings.mean(dim=0) # shape: (seq_len, d_model)
|
| 18 |
+
|
| 19 |
+
# Normalize the mean token embeddings (for cosine similarity)
|
| 20 |
+
mean_token_embeddings = mean_token_embeddings / mean_token_embeddings.norm(dim=1, keepdim=True)
|
| 21 |
+
|
| 22 |
+
# Calculate cosine similarity for all pairs of tokens using matrix multiplication
|
| 23 |
+
similarity_matrix = torch.mm(mean_token_embeddings, mean_token_embeddings.T).cpu().numpy()
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
return similarity_matrix # Convert to numpy array if needed
|
interpretation/visualization.py
ADDED
|
@@ -0,0 +1,560 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import numpy as np
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
import seaborn as sns
|
| 5 |
+
from sklearn.metrics import roc_curve, roc_auc_score
|
| 6 |
+
from scipy import stats
|
| 7 |
+
from scipy.stats import ttest_rel
|
| 8 |
+
import pandas as pd
|
| 9 |
+
|
| 10 |
+
def plot_conf_matrix_mlm_vs_nomlm(cms_mlm, cms_nomlm, m_type, only_agg=True, suptitle="Confusion Matrix Comparison"):
|
| 11 |
+
|
| 12 |
+
labels = ['Dead-end', 'Reprogramming']
|
| 13 |
+
|
| 14 |
+
if only_agg:
|
| 15 |
+
# Plot only the aggregated confusion matrices (last one in each list)
|
| 16 |
+
cms_mlm_agg = cms_mlm[-1]
|
| 17 |
+
cms_nomlm_agg = cms_nomlm[-1]
|
| 18 |
+
|
| 19 |
+
f = plt.figure(figsize=(12, 5))
|
| 20 |
+
plt.suptitle(suptitle, fontsize=16)
|
| 21 |
+
|
| 22 |
+
# Plot confusion matrix for aggregated MLM
|
| 23 |
+
plt.subplot(1, 2, 1)
|
| 24 |
+
sns.heatmap(cms_mlm_agg, annot=True, cmap='Blues', fmt='g', xticklabels=labels, yticklabels=labels)
|
| 25 |
+
plt.xlabel('Predicted')
|
| 26 |
+
plt.ylabel('Actual')
|
| 27 |
+
plt.title('Confusion Matrix - MLM (Aggregated)')
|
| 28 |
+
|
| 29 |
+
# Plot confusion matrix for aggregated No MLM
|
| 30 |
+
plt.subplot(1, 2, 2)
|
| 31 |
+
sns.heatmap(cms_nomlm_agg, annot=True, cmap='Blues', fmt='g', xticklabels=labels, yticklabels=labels)
|
| 32 |
+
plt.xlabel('Predicted')
|
| 33 |
+
plt.ylabel('Actual')
|
| 34 |
+
plt.title('Confusion Matrix - No MLM (Aggregated)')
|
| 35 |
+
|
| 36 |
+
f.savefig(f'./figures/confusion_matrices_{m_type}.pdf', bbox_inches='tight')
|
| 37 |
+
plt.tight_layout()
|
| 38 |
+
plt.show()
|
| 39 |
+
|
| 40 |
+
else:
|
| 41 |
+
# Plot confusion matrices for each fold
|
| 42 |
+
n_folds = len(cms_mlm)
|
| 43 |
+
f = plt.figure(figsize=(15, 2 * n_folds)) # Adjust figure size according to the number of folds
|
| 44 |
+
plt.suptitle(suptitle, fontsize=16)
|
| 45 |
+
|
| 46 |
+
for i in range(n_folds):
|
| 47 |
+
# Plot confusion matrix for MLM in the first row (subplot)
|
| 48 |
+
plt.subplot(n_folds, 2, i*2 + 1) # First column (MLM)
|
| 49 |
+
sns.heatmap(cms_mlm[i], annot=True, cmap='Blues', fmt='g', xticklabels=labels, yticklabels=labels)
|
| 50 |
+
plt.xlabel('Predicted')
|
| 51 |
+
plt.ylabel('Actual')
|
| 52 |
+
plt.title(f'Confusion Matrix - MLM (Fold {i+1})')
|
| 53 |
+
|
| 54 |
+
# Plot confusion matrix for No MLM in the second column (subplot)
|
| 55 |
+
plt.subplot(n_folds, 2, i*2 + 2) # Second column (No MLM)
|
| 56 |
+
sns.heatmap(cms_nomlm[i], annot=True, cmap='Blues', fmt='g', xticklabels=labels, yticklabels=labels)
|
| 57 |
+
plt.xlabel('Predicted')
|
| 58 |
+
plt.ylabel('Actual')
|
| 59 |
+
plt.title(f'Confusion Matrix - No MLM (Fold {i+1})')
|
| 60 |
+
|
| 61 |
+
f.savefig(f'./figures/confusion_matrices_folds_{m_type}.pdf', bbox_inches='tight')
|
| 62 |
+
plt.tight_layout(rect=[0, 0, 1, 0.96])
|
| 63 |
+
plt.show()
|
| 64 |
+
|
| 65 |
+
def plot_training_vs_validation_losses(train_losses, val_losses, title="Losses"):
|
| 66 |
+
epochs = len(train_losses)
|
| 67 |
+
f = plt.figure(figsize=(10, 3))
|
| 68 |
+
plt.suptitle(title)
|
| 69 |
+
plt.subplot(1, 2, 1)
|
| 70 |
+
plt.plot(range(1, epochs+1), train_losses)
|
| 71 |
+
plt.xlabel('Epoch')
|
| 72 |
+
plt.ylabel('Train Loss')
|
| 73 |
+
plt.title('Train Loss')
|
| 74 |
+
|
| 75 |
+
plt.subplot(1, 2, 2)
|
| 76 |
+
plt.plot(range(1, epochs+1), val_losses)
|
| 77 |
+
plt.xlabel('Epoch')
|
| 78 |
+
plt.ylabel('Validation Loss')
|
| 79 |
+
plt.title('Validation Loss')
|
| 80 |
+
|
| 81 |
+
f.savefig('./figures/losses.pdf', bbox_inches='tight')
|
| 82 |
+
plt.tight_layout()
|
| 83 |
+
plt.show()
|
| 84 |
+
|
| 85 |
+
def plot_roc_auc_curve(val_preds, val_labels, m_type, aggregate=False):
|
| 86 |
+
|
| 87 |
+
if aggregate:
|
| 88 |
+
# Aggregate all folds into one list
|
| 89 |
+
all_labels = np.concatenate(val_labels).ravel()
|
| 90 |
+
all_preds = np.concatenate(val_preds).ravel()
|
| 91 |
+
auc = roc_auc_score(all_labels, all_preds)
|
| 92 |
+
fpr, tpr, _ = roc_curve(all_labels, all_preds)
|
| 93 |
+
|
| 94 |
+
f = plt.figure()
|
| 95 |
+
plt.plot(fpr, tpr, label=f'Aggregated AUC: {auc:.4f}')
|
| 96 |
+
plt.plot([0, 1], [0, 1], linestyle='--')
|
| 97 |
+
plt.xlabel('False Positive Rate')
|
| 98 |
+
plt.ylabel('True Positive Rate')
|
| 99 |
+
plt.title('ROC Curve (Aggregated)')
|
| 100 |
+
plt.legend()
|
| 101 |
+
f.savefig(f'./figures/roc_curve_{m_type}.pdf', bbox_inches='tight')
|
| 102 |
+
plt.show()
|
| 103 |
+
|
| 104 |
+
else:
|
| 105 |
+
# Plot AUC for each fold separately
|
| 106 |
+
f = plt.figure()
|
| 107 |
+
for i, (labels, preds) in enumerate(zip(val_labels, val_preds), 1):
|
| 108 |
+
auc = roc_auc_score(labels, preds)
|
| 109 |
+
fpr, tpr, _ = roc_curve(labels, preds)
|
| 110 |
+
|
| 111 |
+
plt.plot(fpr, tpr, label=f'Fold {i} AUC: {auc:.4f}')
|
| 112 |
+
|
| 113 |
+
plt.plot([0, 1], [0, 1], linestyle='--')
|
| 114 |
+
plt.xlabel('False Positive Rate')
|
| 115 |
+
plt.ylabel('True Positive Rate')
|
| 116 |
+
plt.title('ROC Curve (Each Fold)')
|
| 117 |
+
plt.legend()
|
| 118 |
+
f.savefig(f'./figures/roc_curve_{m_type}.pdf', bbox_inches='tight')
|
| 119 |
+
plt.show()
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def plot_auc_boxplot_comparison(fold_results1, fold_results2, title="AUC Comparison"):
|
| 123 |
+
"""Plot AUC box comparison between two models."""
|
| 124 |
+
|
| 125 |
+
train_auc_scores_mlm = [fold['train_auc'] for fold in fold_results1]
|
| 126 |
+
train_auc_scores_nomlm = [fold['train_auc'] for fold in fold_results2]
|
| 127 |
+
val_auc_scores_mlm = [fold['best_val_auc'] for fold in fold_results1]
|
| 128 |
+
val_auc_scores_nomlm = [fold['best_val_auc'] for fold in fold_results2]
|
| 129 |
+
|
| 130 |
+
train_p_value = ttest_rel(train_auc_scores_mlm, train_auc_scores_nomlm).pvalue
|
| 131 |
+
val_p_value = ttest_rel(val_auc_scores_mlm, val_auc_scores_nomlm).pvalue
|
| 132 |
+
|
| 133 |
+
df_train = pd.DataFrame({
|
| 134 |
+
'Fold': [f'Fold {i+1}' for i in range(len(val_auc_scores_mlm))],
|
| 135 |
+
'with MLM': train_auc_scores_mlm,
|
| 136 |
+
'without MLM': train_auc_scores_nomlm,
|
| 137 |
+
})
|
| 138 |
+
|
| 139 |
+
df_valid = pd.DataFrame({
|
| 140 |
+
'Fold': [f'Fold {i+1}' for i in range(len(val_auc_scores_mlm))],
|
| 141 |
+
'with MLM': val_auc_scores_mlm,
|
| 142 |
+
'without MLM': val_auc_scores_nomlm
|
| 143 |
+
})
|
| 144 |
+
f = plt.figure(figsize=(12, 8))
|
| 145 |
+
plt.suptitle(title)
|
| 146 |
+
|
| 147 |
+
plt.subplot(1, 2, 1)
|
| 148 |
+
sns.boxplot(data=df_train, palette=["#1f77b4", "#ff7f0e"]) # Custom colors
|
| 149 |
+
plt.title(f'Train AUC Comparison (p-value = {train_p_value:.4f})')
|
| 150 |
+
plt.ylabel('AUC')
|
| 151 |
+
plt.ylim(0.5, 1)
|
| 152 |
+
|
| 153 |
+
plt.subplot(1, 2, 2)
|
| 154 |
+
sns.boxplot(data=df_valid, palette=["#2ca02c", "#d62728"]) # Custom colors
|
| 155 |
+
plt.title(f'Validation AUC Comparison (p-value = {val_p_value:.4f})')
|
| 156 |
+
plt.ylabel('AUC')
|
| 157 |
+
plt.ylim(0.5, 1)
|
| 158 |
+
|
| 159 |
+
f.savefig('./figures/auc_comparison.pdf', bbox_inches='tight')
|
| 160 |
+
plt.tight_layout()
|
| 161 |
+
plt.show()
|
| 162 |
+
|
| 163 |
+
def plot_loss_comparison_mlm_vs_nomlm(fold_results1, fold_results2, title="Loss Comparison"):
|
| 164 |
+
"""Plot loss comparison between two models."""
|
| 165 |
+
|
| 166 |
+
f = plt.figure(figsize=(12, 8))
|
| 167 |
+
|
| 168 |
+
for i, fold in enumerate(fold_results1):
|
| 169 |
+
train_losses_mlm = fold['metrics']['train_loss']
|
| 170 |
+
val_losses_mlm = fold['metrics']['val_loss']
|
| 171 |
+
train_losses_nomlm = fold_results2[i]['metrics']['train_loss']
|
| 172 |
+
val_losses_nomlm = fold_results2[i]['metrics']['val_loss']
|
| 173 |
+
epochs = range(1, len(train_losses_mlm) + 1)
|
| 174 |
+
|
| 175 |
+
plt.plot(epochs, train_losses_mlm, 'o-', label=f'Train Loss w/ Pre-Training - Fold {fold["fold"]}', alpha=0.5)
|
| 176 |
+
plt.plot(epochs, val_losses_mlm, 'x-', label=f'Validation Loss w/ Pre-Training - Fold {fold["fold"]}', alpha=0.5)
|
| 177 |
+
plt.plot(epochs, train_losses_nomlm, 'o--', label=f'Train Loss w/o Pre-Training - Fold {fold["fold"]}', alpha=0.5)
|
| 178 |
+
plt.plot(epochs, val_losses_nomlm, 'x--', label=f'Validation Loss w/o Pre-Training - Fold {fold["fold"]}', alpha=0.5)
|
| 179 |
+
|
| 180 |
+
plt.xlabel('Epochs')
|
| 181 |
+
plt.ylabel('Loss')
|
| 182 |
+
plt.title(title)
|
| 183 |
+
plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1))
|
| 184 |
+
f.savefig('./figures/loss_comparison.pdf', bbox_inches='tight')
|
| 185 |
+
plt.show()
|
| 186 |
+
|
| 187 |
+
def plot_fold_losses(fold_results, title="Losses"):
|
| 188 |
+
"""Plot loss for each fold."""
|
| 189 |
+
|
| 190 |
+
f = plt.figure(figsize=(12, 8))
|
| 191 |
+
|
| 192 |
+
for i, fold in enumerate(fold_results):
|
| 193 |
+
train_losses = fold['metrics']['train_loss']
|
| 194 |
+
val_losses = fold['metrics']['val_loss']
|
| 195 |
+
epochs = range(1, len(train_losses) + 1)
|
| 196 |
+
|
| 197 |
+
plt.plot(epochs, train_losses, 'o-', label=f'Train Loss - Fold {fold["fold"]}', alpha=0.5)
|
| 198 |
+
plt.plot(epochs, val_losses, 'x-', label=f'Validation Loss - Fold {fold["fold"]}', alpha=0.5)
|
| 199 |
+
|
| 200 |
+
plt.xlabel('Epochs')
|
| 201 |
+
plt.ylabel('Loss')
|
| 202 |
+
plt.title(title)
|
| 203 |
+
plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1))
|
| 204 |
+
f.savefig('./figures/fold_losses.pdf', bbox_inches='tight')
|
| 205 |
+
plt.show()
|
| 206 |
+
|
| 207 |
+
def plot_data_distribution(adata_RNA, adata_ATAC, adata_Flux, title="Data Distribution"):
|
| 208 |
+
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
|
| 209 |
+
|
| 210 |
+
plt.suptitle(title)
|
| 211 |
+
|
| 212 |
+
data = adata_RNA.X.toarray().flatten()
|
| 213 |
+
sns.histplot(data, bins=100, ax=axes[0], color='skyblue')
|
| 214 |
+
var, mean = data.var(), data.mean()
|
| 215 |
+
axes[0].set_title(f'RNA Distribution, var:{var:.2f}, mean:{mean:.2f}')
|
| 216 |
+
axes[0].set_xlabel('Expression level')
|
| 217 |
+
axes[0].set_ylabel('Frequency')
|
| 218 |
+
|
| 219 |
+
data = adata_ATAC.X.toarray().flatten()
|
| 220 |
+
sns.histplot(data, bins=100, ax=axes[1], color='lightgreen')
|
| 221 |
+
var, mean = data.var(), data.mean()
|
| 222 |
+
axes[1].set_title(f'ATAC Distribution, var:{var:.3f}, mean:{mean:.2f}')
|
| 223 |
+
axes[1].set_xlabel('Accessibility level')
|
| 224 |
+
axes[1].set_ylabel('Frequency')
|
| 225 |
+
|
| 226 |
+
data = adata_Flux.values.flatten()
|
| 227 |
+
sns.histplot(data, bins=100, ax=axes[2], color='salmon')
|
| 228 |
+
var, mean = data.var(), data.mean()
|
| 229 |
+
axes[2].set_title(f'Fluxomic Distribution, var:{var:.5f}, mean:{mean:.2f}')
|
| 230 |
+
axes[2].set_xlabel('Flux value')
|
| 231 |
+
axes[2].set_ylabel('Frequency')
|
| 232 |
+
|
| 233 |
+
fig.savefig('./figures/data_distribution.pdf', bbox_inches='tight')
|
| 234 |
+
plt.tight_layout()
|
| 235 |
+
plt.show()
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
def plot_att_weights(all_attention, dead_end_attention, reprogramming_attention,
|
| 239 |
+
feature_names=None, print_top_features=False, top_n=5, scale_weights=False, fix_scale=False,
|
| 240 |
+
use_mean_contribution=False):
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
print(all_attention.shape, "all_attention.shape")
|
| 244 |
+
print(dead_end_attention.shape, "dead_end_attention.shape")
|
| 245 |
+
print(reprogramming_attention.shape, "reprogramming_attention.shape")
|
| 246 |
+
def minmax_scale(arr):
|
| 247 |
+
arr = np.asarray(arr)
|
| 248 |
+
min_val = arr.min()
|
| 249 |
+
max_val = arr.max()
|
| 250 |
+
if max_val - min_val == 0:
|
| 251 |
+
return np.zeros_like(arr) # avoid divide by zero
|
| 252 |
+
return (arr - min_val) / (max_val - min_val)
|
| 253 |
+
|
| 254 |
+
avg_all_attention = all_attention.mean(axis=0) # Average attention weights across samples
|
| 255 |
+
avg_dead_end_attention = dead_end_attention.mean(axis=0)
|
| 256 |
+
avg_reprogramming_attention = reprogramming_attention.mean(axis=0)
|
| 257 |
+
|
| 258 |
+
# Store original unscaled versions for modality contribution calculation
|
| 259 |
+
avg_all_attention_orig = avg_all_attention.copy() if hasattr(avg_all_attention, 'copy') else np.array(avg_all_attention)
|
| 260 |
+
avg_dead_end_attention_orig = avg_dead_end_attention.copy() if hasattr(avg_dead_end_attention, 'copy') else np.array(avg_dead_end_attention)
|
| 261 |
+
avg_reprogramming_attention_orig = avg_reprogramming_attention.copy() if hasattr(avg_reprogramming_attention, 'copy') else np.array(avg_reprogramming_attention)
|
| 262 |
+
|
| 263 |
+
if scale_weights:
|
| 264 |
+
avg_all_attention = minmax_scale(avg_all_attention)
|
| 265 |
+
avg_dead_end_attention = minmax_scale(avg_dead_end_attention)
|
| 266 |
+
avg_reprogramming_attention = minmax_scale(avg_reprogramming_attention)
|
| 267 |
+
vmin, vmax = 0.0, 1.0
|
| 268 |
+
elif fix_scale: # fix scale of all attention weights to the same range
|
| 269 |
+
vmin, vmax = avg_all_attention.min(), avg_all_attention.max()
|
| 270 |
+
else:
|
| 271 |
+
vmin, vmax = None, None
|
| 272 |
+
|
| 273 |
+
# Visualize average attention weights
|
| 274 |
+
f = plt.figure(figsize=(15, 3))
|
| 275 |
+
|
| 276 |
+
divider1 = 945
|
| 277 |
+
divider2 = 945 + 884
|
| 278 |
+
|
| 279 |
+
def add_modality_labels(ax, attention_weights, attention_weights_orig, use_mean=False):
|
| 280 |
+
|
| 281 |
+
rna_weights = attention_weights_orig[:divider1]
|
| 282 |
+
atac_weights = attention_weights_orig[divider1:divider2]
|
| 283 |
+
flux_weights = attention_weights_orig[divider2:]
|
| 284 |
+
|
| 285 |
+
# Calculate metric based on method
|
| 286 |
+
if use_mean is False or use_mean == 'sum':
|
| 287 |
+
# Sum of all attention weights (original behavior)
|
| 288 |
+
rna_metric = rna_weights.sum()
|
| 289 |
+
atac_metric = atac_weights.sum()
|
| 290 |
+
flux_metric = flux_weights.sum()
|
| 291 |
+
|
| 292 |
+
elif use_mean is True or use_mean == 'mean':
|
| 293 |
+
# Mean attention per feature
|
| 294 |
+
rna_metric = rna_weights.mean()
|
| 295 |
+
atac_metric = atac_weights.mean()
|
| 296 |
+
flux_metric = flux_weights.mean()
|
| 297 |
+
|
| 298 |
+
elif use_mean == 'median':
|
| 299 |
+
# Median attention per feature (robust to zeros and outliers)
|
| 300 |
+
rna_metric = np.median(rna_weights)
|
| 301 |
+
atac_metric = np.median(atac_weights)
|
| 302 |
+
flux_metric = np.median(flux_weights)
|
| 303 |
+
|
| 304 |
+
elif use_mean == 'trimmed_mean':
|
| 305 |
+
# Trimmed mean: exclude lowest 25% and highest 5%
|
| 306 |
+
rna_metric = stats.trim_mean(rna_weights, proportiontocut=0.15) # removes 15% from each tail
|
| 307 |
+
atac_metric = stats.trim_mean(atac_weights, proportiontocut=0.15)
|
| 308 |
+
flux_metric = stats.trim_mean(flux_weights, proportiontocut=0.15)
|
| 309 |
+
|
| 310 |
+
elif use_mean == 'active_mean':
|
| 311 |
+
# Mean of only "active" features (attention > threshold)
|
| 312 |
+
threshold = np.percentile(attention_weights_orig, 25) # bottom 25% considered inactive
|
| 313 |
+
|
| 314 |
+
rna_active = rna_weights[rna_weights > threshold]
|
| 315 |
+
atac_active = atac_weights[atac_weights > threshold]
|
| 316 |
+
flux_active = flux_weights[flux_weights > threshold]
|
| 317 |
+
|
| 318 |
+
rna_metric = rna_active.mean() if len(rna_active) > 0 else 0
|
| 319 |
+
atac_metric = atac_active.mean() if len(atac_active) > 0 else 0
|
| 320 |
+
flux_metric = flux_active.mean() if len(flux_active) > 0 else 0
|
| 321 |
+
|
| 322 |
+
else:
|
| 323 |
+
raise ValueError(f"Invalid use_mean value: {use_mean}")
|
| 324 |
+
|
| 325 |
+
# # Normalize to percentages
|
| 326 |
+
# print(rna_metric, atac_metric, flux_metric, "rna_metric, atac_metric, flux_metric")
|
| 327 |
+
# total_metric = rna_metric + atac_metric + flux_metric
|
| 328 |
+
# rna_pct = (rna_metric / total_metric * 100) if total_metric > 0 else 0
|
| 329 |
+
# atac_pct = (atac_metric / total_metric * 100) if total_metric > 0 else 0
|
| 330 |
+
# flux_pct = (flux_metric / total_metric * 100) if total_metric > 0 else 0
|
| 331 |
+
|
| 332 |
+
# Calculate center positions for each modality
|
| 333 |
+
n_rna = divider1
|
| 334 |
+
n_atac = divider2 - divider1
|
| 335 |
+
n_flux = len(attention_weights) - divider2
|
| 336 |
+
rna_center = n_rna / 2
|
| 337 |
+
atac_center = divider1 + n_atac / 2
|
| 338 |
+
flux_center = divider2 + n_flux / 2
|
| 339 |
+
rna_metric_mean = rna_metric / n_rna
|
| 340 |
+
atac_metric_mean = atac_metric / n_atac
|
| 341 |
+
flux_metric_mean = flux_metric / n_flux
|
| 342 |
+
|
| 343 |
+
ax.text(rna_center, -0.3, f'Sum: {rna_metric:.3f}\nMean: {rna_metric_mean:.3f}', ha='center', va='bottom', fontsize=10, fontweight='bold')
|
| 344 |
+
ax.text(atac_center, -0.3, f'Sum: {atac_metric:.3f}\nMean: {atac_metric_mean:.3f}', ha='center', va='bottom', fontsize=10, fontweight='bold')
|
| 345 |
+
ax.text(flux_center, -0.3, f'Sum: {flux_metric:.3f}\nMean: {flux_metric_mean:.3f}', ha='center', va='bottom', fontsize=10, fontweight='bold')
|
| 346 |
+
|
| 347 |
+
plt.subplot(1, 3, 1)
|
| 348 |
+
ax1 = plt.gca()
|
| 349 |
+
sns.heatmap(avg_all_attention.reshape(1, -1), cmap='viridis', yticklabels=['All'], vmin=vmin, vmax=vmax, ax=ax1)
|
| 350 |
+
plt.title('Avg Att. W. (All Samples)')
|
| 351 |
+
plt.xlabel('Features')
|
| 352 |
+
plt.xticks([])
|
| 353 |
+
plt.axvline(x=divider1, color='red', linestyle='--', linewidth=2)
|
| 354 |
+
plt.axvline(x=divider2, color='red', linestyle='--', linewidth=2)
|
| 355 |
+
add_modality_labels(ax1, avg_all_attention, avg_all_attention_orig, use_mean=use_mean_contribution)
|
| 356 |
+
|
| 357 |
+
plt.subplot(1, 3, 2)
|
| 358 |
+
ax2 = plt.gca()
|
| 359 |
+
sns.heatmap(avg_dead_end_attention.reshape(1, -1), cmap='viridis', yticklabels=['Dead-end'], vmin=vmin, vmax=vmax, ax=ax2)
|
| 360 |
+
plt.title('Avg Att. W. (Dead-end Samples)')
|
| 361 |
+
plt.xlabel('Features')
|
| 362 |
+
plt.xticks([])
|
| 363 |
+
plt.axvline(x=divider1, color='red', linestyle='--', linewidth=2)
|
| 364 |
+
plt.axvline(x=divider2, color='red', linestyle='--', linewidth=2)
|
| 365 |
+
add_modality_labels(ax2, avg_dead_end_attention, avg_dead_end_attention_orig, use_mean=use_mean_contribution)
|
| 366 |
+
|
| 367 |
+
plt.subplot(1, 3, 3)
|
| 368 |
+
ax3 = plt.gca()
|
| 369 |
+
sns.heatmap(avg_reprogramming_attention.reshape(1, -1), cmap='viridis', yticklabels=['Reprogramming'], vmin=vmin, vmax=vmax, ax=ax3)
|
| 370 |
+
plt.title('Avg Att. W. (Reprogramming Samples)')
|
| 371 |
+
plt.xlabel('Features')
|
| 372 |
+
plt.xticks([])
|
| 373 |
+
plt.axvline(x=divider1, color='red', linestyle='--', linewidth=2)
|
| 374 |
+
plt.axvline(x=divider2, color='red', linestyle='--', linewidth=2)
|
| 375 |
+
add_modality_labels(ax3, avg_reprogramming_attention, avg_reprogramming_attention_orig, use_mean=use_mean_contribution)
|
| 376 |
+
|
| 377 |
+
# f.savefig('./figures/attention_weights.pdf', bbox_inches='tight')
|
| 378 |
+
plt.tight_layout()
|
| 379 |
+
plt.show()
|
| 380 |
+
|
| 381 |
+
if print_top_features:
|
| 382 |
+
def get_top_features(attention_weights, feature_names, top_n=top_n):
|
| 383 |
+
avg_attention = attention_weights.mean(axis=0).numpy() if hasattr(attention_weights, 'numpy') else attention_weights.mean(axis=0)
|
| 384 |
+
print(avg_attention.shape, len(feature_names))
|
| 385 |
+
top_indices = avg_attention.argsort()[-top_n:][::-1]
|
| 386 |
+
print(top_indices)
|
| 387 |
+
return [(feature_names[i], avg_attention[i]) for i in top_indices]
|
| 388 |
+
|
| 389 |
+
top_all = get_top_features(all_attention, feature_names)
|
| 390 |
+
top_dead_end = get_top_features(dead_end_attention, feature_names)
|
| 391 |
+
top_reprogramming = get_top_features(reprogramming_attention, feature_names)
|
| 392 |
+
|
| 393 |
+
print(f"Top {top_n} attended features (All samples):")
|
| 394 |
+
for feature, weight in top_all:
|
| 395 |
+
print(f"{feature}: {weight:.4f}", end=", ")
|
| 396 |
+
|
| 397 |
+
print(f"\nTop {top_n} attended features (Dead-end samples):")
|
| 398 |
+
for feature, weight in top_dead_end:
|
| 399 |
+
print(f"{feature}: {weight:.4f}", end=", ")
|
| 400 |
+
|
| 401 |
+
print(f"\nTop {top_n} attended features (Reprogramming samples):")
|
| 402 |
+
for feature, weight in top_reprogramming:
|
| 403 |
+
print(f"{feature}: {weight:.4f}", end=", ")
|
| 404 |
+
return f
|
| 405 |
+
|
| 406 |
+
def plot_att_weights_distribution(
|
| 407 |
+
all_attention, dead_end_attention, reprogramming_attention,
|
| 408 |
+
feature_names=None, plot_type='violin', top_n=5, print_means=False
|
| 409 |
+
):
|
| 410 |
+
divider1 = 944 # RNA ends
|
| 411 |
+
divider2 = 944 + 883 # ATAC ends, Flux begins
|
| 412 |
+
divider1 = 945
|
| 413 |
+
divider2 = 945 + 884
|
| 414 |
+
|
| 415 |
+
# Prepare data for plotting
|
| 416 |
+
def prepare_modality_data(attention_weights, condition_name):
|
| 417 |
+
"""Extract attention weights by modality"""
|
| 418 |
+
rna_weights = attention_weights[:, :divider1].flatten()
|
| 419 |
+
atac_weights = attention_weights[:, divider1:divider2].flatten()
|
| 420 |
+
flux_weights = attention_weights[:, divider2:].flatten()
|
| 421 |
+
return {
|
| 422 |
+
'RNA': rna_weights,
|
| 423 |
+
'ATAC': atac_weights,
|
| 424 |
+
'Flux': flux_weights,
|
| 425 |
+
'condition': condition_name,
|
| 426 |
+
}
|
| 427 |
+
|
| 428 |
+
all_data = prepare_modality_data(all_attention, 'All')
|
| 429 |
+
de_data = prepare_modality_data(dead_end_attention, 'Dead-end')
|
| 430 |
+
re_data = prepare_modality_data(reprogramming_attention, 'Reprogramming')
|
| 431 |
+
|
| 432 |
+
if plot_type in ['violin', 'box']:
|
| 433 |
+
# Create DataFrame for seaborn plotting
|
| 434 |
+
data_list = []
|
| 435 |
+
for condition_data in [all_data, de_data, re_data]:
|
| 436 |
+
condition = condition_data['condition']
|
| 437 |
+
for modality in ['RNA', 'ATAC', 'Flux']:
|
| 438 |
+
weights = condition_data[modality]
|
| 439 |
+
for weight in weights:
|
| 440 |
+
data_list.append({
|
| 441 |
+
'Condition': condition,
|
| 442 |
+
'Modality': modality,
|
| 443 |
+
'Attention Weight': weight
|
| 444 |
+
})
|
| 445 |
+
df = pd.DataFrame(data_list)
|
| 446 |
+
|
| 447 |
+
# Create figure with subplots for each condition
|
| 448 |
+
f, axes = plt.subplots(1, 3, figsize=(18, 5))
|
| 449 |
+
|
| 450 |
+
conditions = ['All', 'Dead-end', 'Reprogramming']
|
| 451 |
+
colors = ['#1f77b4', '#ff7f0e', '#2ca02c'] # RNA, ATAC, Flux colors
|
| 452 |
+
|
| 453 |
+
# Optionally print means
|
| 454 |
+
if print_means:
|
| 455 |
+
print("Mean attention weight values per modality and per condition:")
|
| 456 |
+
|
| 457 |
+
for idx, (ax, condition) in enumerate(zip(axes, conditions)):
|
| 458 |
+
condition_df = df[df['Condition'] == condition]
|
| 459 |
+
|
| 460 |
+
if plot_type == 'violin':
|
| 461 |
+
sns.violinplot(data=condition_df, x='Modality', y='Attention Weight',
|
| 462 |
+
palette=colors, ax=ax)
|
| 463 |
+
else: # box
|
| 464 |
+
sns.boxplot(data=condition_df, x='Modality', y='Attention Weight',
|
| 465 |
+
palette=colors, ax=ax)
|
| 466 |
+
|
| 467 |
+
ax.set_title(f'{condition} Samples', fontsize=12, fontweight='bold')
|
| 468 |
+
ax.set_xlabel('Modality', fontsize=11)
|
| 469 |
+
ax.set_ylabel('Attention Weight', fontsize=11)
|
| 470 |
+
ax.grid(axis='y', alpha=0.3)
|
| 471 |
+
|
| 472 |
+
for i, modality in enumerate(['RNA', 'ATAC', 'Flux']):
|
| 473 |
+
mod_data = condition_df[condition_df['Modality'] == modality]['Attention Weight']
|
| 474 |
+
mean_val = mod_data.mean()
|
| 475 |
+
std_val = mod_data.std()
|
| 476 |
+
ax.hlines(mean_val, i - 0.4, i + 0.4, colors='red', linestyles='--',
|
| 477 |
+
linewidth=2, alpha=0.7, label='Mean' if i == 0 else '')
|
| 478 |
+
if print_means:
|
| 479 |
+
print(f"{condition} - {modality}: mean={mean_val:.8f}, std={std_val:.8f}")
|
| 480 |
+
|
| 481 |
+
if idx == 0:
|
| 482 |
+
ax.legend()
|
| 483 |
+
|
| 484 |
+
else:
|
| 485 |
+
raise ValueError(f"plot_type must be 'violin', 'box', or 'hist', got '{plot_type}'")
|
| 486 |
+
|
| 487 |
+
plt.tight_layout()
|
| 488 |
+
plt.show()
|
| 489 |
+
|
| 490 |
+
return f
|
| 491 |
+
|
| 492 |
+
def plot_att_heads(all_attention_heads, dead_end_attention_heads, reprogramming_attention_heads, stacked=False):
|
| 493 |
+
n_heads = all_attention_heads.shape[1] # Assuming the second dimension is the number of heads
|
| 494 |
+
|
| 495 |
+
if stacked:
|
| 496 |
+
|
| 497 |
+
# Visualize stacked attention weights
|
| 498 |
+
f = plt.figure(figsize=(15, 10)) # Adjust figure size
|
| 499 |
+
|
| 500 |
+
# Plot for "All Samples" attention weights (stacked)
|
| 501 |
+
plt.subplot(1, 3, 1)
|
| 502 |
+
stacked_all_attention = all_attention_heads.mean(axis=0).reshape(n_heads, -1) # Stack attention heads
|
| 503 |
+
sns.heatmap(stacked_all_attention, cmap='viridis', yticklabels=[f'Head {i+1}' for i in range(n_heads)])
|
| 504 |
+
plt.title('Stacked Attention Weights (All Samples)')
|
| 505 |
+
plt.xlabel('Features')
|
| 506 |
+
plt.ylabel('Heads')
|
| 507 |
+
plt.xticks(rotation=90)
|
| 508 |
+
|
| 509 |
+
# Plot for "Dead-end Samples" attention weights (stacked)
|
| 510 |
+
plt.subplot(1, 3, 2)
|
| 511 |
+
stacked_dead_end_attention = dead_end_attention_heads.mean(axis=0).reshape(n_heads, -1)
|
| 512 |
+
sns.heatmap(stacked_dead_end_attention, cmap='viridis', yticklabels=[f'Head {i+1}' for i in range(n_heads)])
|
| 513 |
+
plt.title('Stacked Attention Weights (Dead-end Samples)')
|
| 514 |
+
plt.xlabel('Features')
|
| 515 |
+
plt.ylabel('Heads')
|
| 516 |
+
plt.xticks(rotation=90)
|
| 517 |
+
|
| 518 |
+
# Plot for "Reprogramming Samples" attention weights (stacked)
|
| 519 |
+
plt.subplot(1, 3, 3)
|
| 520 |
+
stacked_reprogramming_attention = reprogramming_attention_heads.mean(axis=0).reshape(n_heads, -1)
|
| 521 |
+
sns.heatmap(stacked_reprogramming_attention, cmap='viridis', yticklabels=[f'Head {i+1}' for i in range(n_heads)])
|
| 522 |
+
plt.title('Stacked Attention Weights (Reprogramming Samples)')
|
| 523 |
+
plt.xlabel('Features')
|
| 524 |
+
plt.ylabel('Heads')
|
| 525 |
+
plt.xticks(rotation=90)
|
| 526 |
+
|
| 527 |
+
f.savefig('./figures/attention_heads_stacked.pdf', bbox_inches='tight')
|
| 528 |
+
plt.tight_layout()
|
| 529 |
+
plt.show()
|
| 530 |
+
|
| 531 |
+
else:
|
| 532 |
+
# Visualize attention weights for each head
|
| 533 |
+
f = plt.figure(figsize=(15, 15)) # Adjusting the figure size to accommodate more subplots
|
| 534 |
+
|
| 535 |
+
# Plot for "All Samples" attention weights
|
| 536 |
+
for head in range(n_heads):
|
| 537 |
+
plt.subplot(n_heads, 3, 3 * head + 1) # (n_heads rows, 3 columns for each category)
|
| 538 |
+
sns.heatmap(all_attention_heads[:, head, :].mean(axis=0).reshape(1, -1), cmap='viridis', yticklabels=[f'Head {head+1}'])
|
| 539 |
+
plt.title(f'Head {head+1} Attention (All Samples)')
|
| 540 |
+
plt.xlabel('Features')
|
| 541 |
+
plt.xticks(rotation=90)
|
| 542 |
+
|
| 543 |
+
# Plot for "Dead-end Samples" attention weights
|
| 544 |
+
plt.subplot(n_heads, 3, 3 * head + 2)
|
| 545 |
+
sns.heatmap(dead_end_attention_heads[:, head, :].mean(axis=0).reshape(1, -1), cmap='viridis', yticklabels=[f'Head {head+1}'])
|
| 546 |
+
plt.title(f'Head {head+1} Attention (Dead-end Samples)')
|
| 547 |
+
plt.xlabel('Features')
|
| 548 |
+
plt.xticks(rotation=90)
|
| 549 |
+
|
| 550 |
+
# Plot for "Reprogramming Samples" attention weights
|
| 551 |
+
plt.subplot(n_heads, 3, 3 * head + 3)
|
| 552 |
+
sns.heatmap(reprogramming_attention_heads[:, head, :].mean(axis=0).reshape(1, -1), cmap='viridis', yticklabels=[f'Head {head+1}'])
|
| 553 |
+
plt.title(f'Head {head+1} Attention (Reprogramming Samples)')
|
| 554 |
+
plt.xlabel('Features')
|
| 555 |
+
plt.xticks(rotation=90)
|
| 556 |
+
|
| 557 |
+
f.savefig('./figures/attention_heads.pdf', bbox_inches='tight')
|
| 558 |
+
plt.tight_layout()
|
| 559 |
+
plt.show()
|
| 560 |
+
|
models/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .transformers import SingleTransformer, MultiModalTransformer
|
models/transformers.py
ADDED
|
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from torch import nn
|
| 3 |
+
import math
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class CustomTransformerEncoderLayer(nn.TransformerEncoderLayer):
|
| 7 |
+
def __init__(self, *args, **kwargs):
|
| 8 |
+
super().__init__(*args, **kwargs)
|
| 9 |
+
def forward(self, src, src_mask=None, src_key_padding_mask=None):
|
| 10 |
+
# Obtain the output and attention weights directly from self.self_attn
|
| 11 |
+
src2, attn_weights = self.self_attn(
|
| 12 |
+
src, src, src,
|
| 13 |
+
attn_mask=src_mask,
|
| 14 |
+
key_padding_mask=src_key_padding_mask,
|
| 15 |
+
average_attn_weights=False,
|
| 16 |
+
need_weights=True
|
| 17 |
+
)
|
| 18 |
+
src = src + self.dropout1(src2)
|
| 19 |
+
src = self.norm1(src)
|
| 20 |
+
src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
|
| 21 |
+
src = src + self.dropout2(src2)
|
| 22 |
+
src = self.norm2(src)
|
| 23 |
+
return src, attn_weights
|
| 24 |
+
|
| 25 |
+
class SingleTransformer(nn.Module):
|
| 26 |
+
|
| 27 |
+
"""
|
| 28 |
+
Transformer-based model for each modality.
|
| 29 |
+
Args:
|
| 30 |
+
vocab_size (int): Vocabulary size. (set 1 if projection is used.)
|
| 31 |
+
seq_len (int): Sequence length.
|
| 32 |
+
n_encoder_layers (int): Number of transformer encoder layers.
|
| 33 |
+
n_heads (int): Number of attention heads.
|
| 34 |
+
n_batches (int): Number of batches.
|
| 35 |
+
d_tokens (int): Dimension of the token embeddings.
|
| 36 |
+
d_ff (int): Dimension of the feedforward layer.
|
| 37 |
+
d_batch (int): Dimension of the batch embeddings.
|
| 38 |
+
dropout_rate (float, optional): Dropout rate. Defaults to 0.1.
|
| 39 |
+
Attributes:
|
| 40 |
+
count_embedding (torch.Tensor): Count embeddings.
|
| 41 |
+
id_embeddings (torch.Tensor): ID embeddings.
|
| 42 |
+
batch_embedding (nn.Embedding): Batch embeddings.
|
| 43 |
+
layer_norm (nn.LayerNorm): Layer normalization.
|
| 44 |
+
cls_token (torch.Tensor): CLS token.
|
| 45 |
+
encoder (nn.TransformerEncoder): Transformer encoder.
|
| 46 |
+
mask_output_layer (nn.Linear): Mask output layer.
|
| 47 |
+
cls_attention (nn.MultiheadAttention): Multihead attention for CLS token.
|
| 48 |
+
cls_norm1 (nn.LayerNorm): Layer normalization for CLS token.
|
| 49 |
+
cls_norm2 (nn.LayerNorm): Layer normalization for CLS token.
|
| 50 |
+
cls_ffn (nn.Sequential): Feedforward network for CLS token.
|
| 51 |
+
cls_output_layer (nn.Linear): Output layer for CLS token.
|
| 52 |
+
pretrained (bool): Flag indicating if pretrained weights are frozen.
|
| 53 |
+
Methods:
|
| 54 |
+
forward(x, batch_indices, masked_lm=False, return_attention=False, return_embeddings=False):
|
| 55 |
+
Forward pass of the module.
|
| 56 |
+
freeze_pretrained_weights():
|
| 57 |
+
Freeze the pretrained weights.
|
| 58 |
+
unfreeze_pretrained_weights():
|
| 59 |
+
Unfreeze the pretrained weights.
|
| 60 |
+
create_count_embeddings(max_count, embed_size):
|
| 61 |
+
Create count embeddings.
|
| 62 |
+
get_latent_space(inputs, batch_indices, batch_size=32):
|
| 63 |
+
Get the latent space representation and predictions.
|
| 64 |
+
"""
|
| 65 |
+
def __init__(self, model_type, vocab_size, seq_len,
|
| 66 |
+
n_encoder_layers, n_heads, n_batches,
|
| 67 |
+
d_model, d_ff,
|
| 68 |
+
dropout_rate=0.0):
|
| 69 |
+
super(SingleTransformer, self).__init__()
|
| 70 |
+
|
| 71 |
+
if model_type not in ['RNA', 'ATAC', 'Flux']:
|
| 72 |
+
raise ValueError("model_type must be one of 'RNA', 'ATAC', 'Flux'")
|
| 73 |
+
|
| 74 |
+
self.model_type = model_type
|
| 75 |
+
|
| 76 |
+
if self.model_type == 'RNA':
|
| 77 |
+
self.count_embedding_fix = self.create_count_embeddings(vocab_size, d_model)
|
| 78 |
+
else:
|
| 79 |
+
self.count_embedding_proj = nn.Linear(1, d_model)
|
| 80 |
+
|
| 81 |
+
self.id_embeddings = nn.Parameter(torch.zeros(1, seq_len, d_model))
|
| 82 |
+
nn.init.normal_(self.id_embeddings, mean=0.0, std=0.02)
|
| 83 |
+
self.batch_embedding = nn.Embedding(n_batches, d_model)
|
| 84 |
+
|
| 85 |
+
self.layer_norm = nn.LayerNorm(d_model)
|
| 86 |
+
self.token_layer_norm = nn.LayerNorm(d_model)
|
| 87 |
+
self.batch_layer_norm = nn.LayerNorm(d_model)
|
| 88 |
+
# self.alpha = nn.Parameter(torch.tensor(1.0))
|
| 89 |
+
# self.beta = nn.Parameter(torch.tensor(1.0))
|
| 90 |
+
|
| 91 |
+
self.cls_token = nn.Parameter(torch.zeros(1, 1, d_model))
|
| 92 |
+
nn.init.normal_(self.cls_token, mean=0.0, std=0.02)
|
| 93 |
+
|
| 94 |
+
# encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=n_heads, dim_feedforward=d_ff, dropout=dropout_rate, batch_first=True)
|
| 95 |
+
encoder_layer = CustomTransformerEncoderLayer(
|
| 96 |
+
d_model=d_model,
|
| 97 |
+
nhead=n_heads,
|
| 98 |
+
dim_feedforward=d_ff,
|
| 99 |
+
dropout=dropout_rate,
|
| 100 |
+
batch_first=True
|
| 101 |
+
)
|
| 102 |
+
self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=n_encoder_layers)
|
| 103 |
+
|
| 104 |
+
self.mask_output_layer = nn.Linear(d_model, vocab_size)
|
| 105 |
+
|
| 106 |
+
self.cls_attention = nn.MultiheadAttention(embed_dim=d_model, num_heads=n_heads, batch_first=True)
|
| 107 |
+
self.cls_norm1 = nn.LayerNorm(d_model)
|
| 108 |
+
self.cls_norm2 = nn.LayerNorm(d_model)
|
| 109 |
+
self.cls_ffn = nn.Sequential(
|
| 110 |
+
nn.Linear(d_model, d_ff),
|
| 111 |
+
nn.ReLU(),
|
| 112 |
+
nn.Dropout(dropout_rate),
|
| 113 |
+
nn.Linear(d_ff, d_model)
|
| 114 |
+
)
|
| 115 |
+
self.dropout = nn.Dropout(dropout_rate)
|
| 116 |
+
self.cls_output_layer = nn.Linear(d_model, 1)
|
| 117 |
+
|
| 118 |
+
def forward(self, x, batch_indices, masked_lm=False, return_attention=False, return_embeddings=False, return_flow_attention=False):
|
| 119 |
+
|
| 120 |
+
# [batch_dim, seq_dim, embed_dim]
|
| 121 |
+
|
| 122 |
+
if self.model_type == 'RNA':
|
| 123 |
+
self.count_embedding_fix = self.count_embedding_fix.to(x.device)
|
| 124 |
+
x = x.long()
|
| 125 |
+
x = self.count_embedding_fix[x]
|
| 126 |
+
else:
|
| 127 |
+
x = x.unsqueeze(-1).float()
|
| 128 |
+
x = self.count_embedding_proj(x)
|
| 129 |
+
|
| 130 |
+
x = x + self.id_embeddings[:, :x.size(1), :]
|
| 131 |
+
|
| 132 |
+
batch_embeddings = self.batch_embedding(batch_indices).unsqueeze(1)#.expand(-1, x.size(1), -1) # repeat for the token dim
|
| 133 |
+
|
| 134 |
+
# token_embeddings = self.token_layer_norm(x)
|
| 135 |
+
# batch_embeddings = self.batch_layer_norm(batch_embeddings)
|
| 136 |
+
# x = token_embeddings + batch_embeddings
|
| 137 |
+
# print(batch_embeddings.shape, x.shape)
|
| 138 |
+
# print(torch.max(batch_embeddings.flatten()), torch.max(token_embeddings.flatten()))
|
| 139 |
+
# print(torch.min(batch_embeddings.flatten()), torch.min(token_embeddings.flatten()))
|
| 140 |
+
# print("===")
|
| 141 |
+
x = torch.cat((x, batch_embeddings), dim=1) #x + batch_embeddings #
|
| 142 |
+
|
| 143 |
+
x = self.layer_norm(x)
|
| 144 |
+
|
| 145 |
+
attention_flow = []
|
| 146 |
+
for layer in self.encoder.layers:
|
| 147 |
+
x, attn_weights = layer(x)
|
| 148 |
+
if return_flow_attention:
|
| 149 |
+
attention_flow.append(attn_weights)
|
| 150 |
+
|
| 151 |
+
other_tokens = x #self.encoder(x)
|
| 152 |
+
|
| 153 |
+
if return_embeddings:
|
| 154 |
+
return other_tokens, attention_flow
|
| 155 |
+
|
| 156 |
+
if masked_lm:
|
| 157 |
+
# exclude the batch embeddings
|
| 158 |
+
other_tokens = other_tokens[:, :-1, :]
|
| 159 |
+
return self.mask_output_layer(other_tokens)
|
| 160 |
+
|
| 161 |
+
cls_token = self.cls_token.expand(x.size(0), -1, -1) # repeat for the batch dim
|
| 162 |
+
attended_cls, attention_weights = self.cls_attention(cls_token, other_tokens, other_tokens, need_weights=True, average_attn_weights=False)
|
| 163 |
+
attended_cls = attended_cls.squeeze(1)
|
| 164 |
+
|
| 165 |
+
cls_output = self.cls_norm1(cls_token.squeeze(1) + self.dropout(attended_cls))
|
| 166 |
+
cls_output = self.cls_norm2(cls_output + self.dropout(self.cls_ffn(cls_output)))
|
| 167 |
+
|
| 168 |
+
preds = self.cls_output_layer(cls_output)
|
| 169 |
+
preds = torch.sigmoid(preds)
|
| 170 |
+
|
| 171 |
+
if return_flow_attention:
|
| 172 |
+
return preds, cls_output, attention_weights, attention_flow
|
| 173 |
+
elif return_attention:
|
| 174 |
+
return preds, cls_output, attention_weights
|
| 175 |
+
else:
|
| 176 |
+
return preds, cls_output
|
| 177 |
+
|
| 178 |
+
def freeze_pretrained_weights(self):
|
| 179 |
+
for name, param in self.named_parameters():
|
| 180 |
+
if not any(x in name for x in ['cls_attention', 'cls_norm', 'cls_ffn', 'cls_token', 'cls_ff_dim', 'cls_output_layer']):
|
| 181 |
+
param.requires_grad = False
|
| 182 |
+
self.pretrained = True
|
| 183 |
+
|
| 184 |
+
def unfreeze_pretrained_weights(self):
|
| 185 |
+
for param in self.parameters():
|
| 186 |
+
param.requires_grad = True
|
| 187 |
+
self.pretrained = False
|
| 188 |
+
|
| 189 |
+
def create_count_embeddings(self, max_count, embed_size):
|
| 190 |
+
embeddings = torch.zeros(max_count + 1, embed_size)
|
| 191 |
+
for i in range(max_count + 1):
|
| 192 |
+
embeddings[i] = torch.tensor([math.sin(i / (10000 ** (2 * (j // 2) / embed_size)))
|
| 193 |
+
if j % 2 == 0 else math.cos(i / (10000 ** (2 * (j // 2) / embed_size)))
|
| 194 |
+
for j in range(embed_size)])
|
| 195 |
+
return embeddings
|
| 196 |
+
|
| 197 |
+
def get_latent_space(self, inputs, batch_indices, batch_size=32):
|
| 198 |
+
"""
|
| 199 |
+
Get the latent space representation and predictions.
|
| 200 |
+
Args:
|
| 201 |
+
inputs (torch.Tensor): Input tensor.
|
| 202 |
+
batch_indices (torch.Tensor): Batch indices tensor.
|
| 203 |
+
batch_size (int, optional): Batch size. Defaults to 32.
|
| 204 |
+
Returns:
|
| 205 |
+
torch.Tensor: Latent space representation.
|
| 206 |
+
torch.Tensor: Predictions.
|
| 207 |
+
"""
|
| 208 |
+
self.eval()
|
| 209 |
+
latent_space_list, preds_list = [], []
|
| 210 |
+
with torch.no_grad():
|
| 211 |
+
for i in range(0, inputs.shape[0], batch_size):
|
| 212 |
+
inputs_batch = inputs[i:i + batch_size].float()
|
| 213 |
+
batch_indices_batch = batch_indices[i:i + batch_size].int()
|
| 214 |
+
preds, reduced_dim = self(inputs_batch, batch_indices_batch)
|
| 215 |
+
latent_space_list.append(reduced_dim)
|
| 216 |
+
preds_list.append(preds)
|
| 217 |
+
latent_space = torch.cat(latent_space_list, dim=0)
|
| 218 |
+
preds = torch.cat(preds_list, dim=0)
|
| 219 |
+
return latent_space, preds
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
class MultiModalTransformer(nn.Module):
|
| 223 |
+
def __init__(self, rna_model, atac_model, flux_model, d_model, n_heads_cls, d_ff_cls, dropout_rate=0.0):
|
| 224 |
+
super(MultiModalTransformer, self).__init__()
|
| 225 |
+
|
| 226 |
+
self.rna_model = rna_model
|
| 227 |
+
self.atac_model = atac_model
|
| 228 |
+
self.flux_model = flux_model
|
| 229 |
+
|
| 230 |
+
self.cls_token = nn.Parameter(torch.zeros(1, 1, d_model))
|
| 231 |
+
nn.init.normal_(self.cls_token, mean=0.0, std=0.02)
|
| 232 |
+
# self.modality_embeddings = nn.Embedding(3, d_model)
|
| 233 |
+
self.layer_norm = nn.LayerNorm(d_model)
|
| 234 |
+
|
| 235 |
+
self.cls_attention = nn.MultiheadAttention(embed_dim=d_model, num_heads=n_heads_cls, dropout=dropout_rate, batch_first=True)
|
| 236 |
+
self.cls_norm1 = nn.LayerNorm(d_model)
|
| 237 |
+
self.cls_norm2 = nn.LayerNorm(d_model)
|
| 238 |
+
self.cls_ffn = nn.Sequential(
|
| 239 |
+
nn.Linear(d_model, d_ff_cls),
|
| 240 |
+
nn.ReLU(),
|
| 241 |
+
nn.Dropout(dropout_rate),
|
| 242 |
+
nn.Linear(d_ff_cls, d_model))
|
| 243 |
+
self.cls_output_layer = nn.Linear(d_model, 1)
|
| 244 |
+
|
| 245 |
+
self.dropout = nn.Dropout(dropout_rate)
|
| 246 |
+
|
| 247 |
+
def forward(self, x, batch_indices, return_attention=False, return_embeddings=False, return_flow_attention=False):
|
| 248 |
+
rna_input, atac_input, flux_input = x[0], x[1], x[2]
|
| 249 |
+
|
| 250 |
+
rna_tokens, rna_attention = self.rna_model(rna_input, batch_indices, return_embeddings=True, return_flow_attention=return_flow_attention) # [32, 944, 128]
|
| 251 |
+
atac_tokens, atac_attention = self.atac_model(atac_input, batch_indices, return_embeddings=True, return_flow_attention=return_flow_attention) # [32, 883, 128]
|
| 252 |
+
flux_tokens, flux_attention = self.flux_model(flux_input, batch_indices, return_embeddings=True, return_flow_attention=return_flow_attention) # [32, 168, 128]
|
| 253 |
+
# rna_tokens += self.modality_embeddings(torch.tensor([0]).to(rna_tokens.device))
|
| 254 |
+
# atac_tokens += self.modality_embeddings(torch.tensor([1]).to(atac_tokens.device))
|
| 255 |
+
# flux_tokens += self.modality_embeddings(torch.tensor([2]).to(flux_tokens.device))
|
| 256 |
+
other_tokens = torch.cat((rna_tokens, atac_tokens, flux_tokens), dim=-2) # [32, 1995, 128]
|
| 257 |
+
|
| 258 |
+
if return_embeddings:
|
| 259 |
+
return other_tokens
|
| 260 |
+
|
| 261 |
+
# create mask
|
| 262 |
+
rna_mask = (rna_input.sum(dim=1) != 0).float() # [32]
|
| 263 |
+
# b1 = rna_mask.sum()
|
| 264 |
+
atac_mask = (atac_input.sum(dim=1) != 0).float() # [32]
|
| 265 |
+
# b2 = atac_mask.sum()
|
| 266 |
+
flux_mask = (flux_input.sum(dim=1) != 0).float() # [32]
|
| 267 |
+
|
| 268 |
+
rna_mask = rna_mask.unsqueeze(-1).expand(-1, rna_tokens.size(1)) # [32, 944]
|
| 269 |
+
atac_mask = atac_mask.unsqueeze(-1).expand(-1, atac_tokens.size(1)) # [32, 883]
|
| 270 |
+
flux_mask = flux_mask.unsqueeze(-1).expand(-1, flux_tokens.size(1)) # [32, 168]
|
| 271 |
+
other_tokens_mask = torch.cat((rna_mask, atac_mask, flux_mask), dim=1) # [32, 1995]
|
| 272 |
+
|
| 273 |
+
other_tokens = self.layer_norm(other_tokens)
|
| 274 |
+
cls_token = self.cls_token.expand(other_tokens.size(0), -1, -1) # [32, 1, 128]
|
| 275 |
+
attended_cls, attention_weights = self.cls_attention(cls_token, other_tokens, other_tokens,
|
| 276 |
+
key_padding_mask=(1 - other_tokens_mask).bool(),
|
| 277 |
+
need_weights=True, average_attn_weights=False)
|
| 278 |
+
|
| 279 |
+
attended_cls = attended_cls.squeeze(1)
|
| 280 |
+
cls_output = self.cls_norm1(cls_token.squeeze(1) + self.dropout(attended_cls))
|
| 281 |
+
cls_output = self.cls_norm2(cls_output + self.dropout(self.cls_ffn(cls_output)))
|
| 282 |
+
|
| 283 |
+
preds = self.cls_output_layer(cls_output)
|
| 284 |
+
|
| 285 |
+
preds = torch.sigmoid(preds)
|
| 286 |
+
|
| 287 |
+
if return_flow_attention:
|
| 288 |
+
return preds, cls_output, {
|
| 289 |
+
'rna': rna_attention,
|
| 290 |
+
'atac': atac_attention,
|
| 291 |
+
'flux': flux_attention,
|
| 292 |
+
'cls': attention_weights
|
| 293 |
+
}
|
| 294 |
+
elif return_attention:
|
| 295 |
+
return preds, cls_output, attention_weights
|
| 296 |
+
else:
|
| 297 |
+
return preds, cls_output
|
| 298 |
+
|
| 299 |
+
def freeze_pretrained_weights(self):
|
| 300 |
+
self.rna_model.freeze_pretrained_weights()
|
| 301 |
+
self.atac_model.freeze_pretrained_weights()
|
| 302 |
+
self.flux_model.freeze_pretrained_weights()
|
| 303 |
+
for name, param in self.named_parameters():
|
| 304 |
+
if not any(x in name for x in ['cls_attention', 'cls_norm', 'cls_ffn', 'cls_token', 'cls_output_layer']):
|
| 305 |
+
param.requires_grad = False
|
| 306 |
+
|
| 307 |
+
def unfreeze_pretrained_weights(self):
|
| 308 |
+
self.rna_model.unfreeze_pretrained_weights()
|
| 309 |
+
self.atac_model.unfreeze_pretrained_weights()
|
| 310 |
+
self.flux_model.unfreeze_pretrained_weights()
|
| 311 |
+
for param in self.parameters():
|
| 312 |
+
param.requires_grad = True
|
| 313 |
+
|
| 314 |
+
def get_latent_space(self, X, batch_indices, batch_size=32):
|
| 315 |
+
self.eval()
|
| 316 |
+
latent_space_list, preds_list = [], []
|
| 317 |
+
rna_input, atac_input, flux_input = X[0], X[1], X[2]
|
| 318 |
+
with torch.no_grad():
|
| 319 |
+
for i in range(0, rna_input.shape[0], batch_size):
|
| 320 |
+
rna_input_batch = rna_input[i:i + batch_size].float()
|
| 321 |
+
atac_input_batch = atac_input[i:i + batch_size].float()
|
| 322 |
+
flux_input_batch = flux_input[i:i + batch_size].float()
|
| 323 |
+
batch_indices_batch = batch_indices[i:i + batch_size].int()
|
| 324 |
+
preds, reduced_dim = self((rna_input_batch, atac_input_batch, flux_input_batch), batch_indices_batch)
|
| 325 |
+
latent_space_list.append(reduced_dim)
|
| 326 |
+
preds_list.append(preds)
|
| 327 |
+
latent_space = torch.cat(latent_space_list, dim=0)
|
| 328 |
+
preds = torch.cat(preds_list, dim=0)
|
| 329 |
+
return latent_space, preds
|
| 330 |
+
|
| 331 |
+
|
| 332 |
+
if __name__=='__main__':
|
| 333 |
+
model = SingleTransformer(model_type='ATAC', vocab_size=1, seq_len=883, n_encoder_layers=2, n_heads=2, n_batches=3, d_tokens=508, d_ff=128, d_batch=4)
|
| 334 |
+
x = torch.rand(32, 883)
|
| 335 |
+
batch_indices = torch.randint(1, 3, (32,))
|
| 336 |
+
print(model(x, batch_indices, masked_lm=True).shape)
|
| 337 |
+
print(model(x, batch_indices, return_attention=True)[0].shape)
|
| 338 |
+
print(model(x, batch_indices, return_embeddings=True).shape)
|
| 339 |
+
print(model(x, batch_indices).shape)
|
notebooks/analysis_plots.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
objects/degs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53d798eebf646f4c238db5a5a41e23e4c1ea47a950d0ed412e9ec4bae0bda3f3
|
| 3 |
+
size 185265
|
objects/fi_shift_atac.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db1ce30c3708df8397ffa064d0c4d9a8ff4b9514d02c8f64d721411f21a69b98
|
| 3 |
+
size 25453
|
objects/fi_shift_flux.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a5d75180a0d4ecf5ef7aadc4ca10ed7023c742fec6b0326c3f289341803874b0
|
| 3 |
+
size 7687
|
objects/fi_shift_rna.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0231c7701e898c975488279721546846fa919a471666ceabaaf90d8778050e46
|
| 3 |
+
size 24332
|
objects/fold_results_multi.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb4c67a606b1af107afb9061830971126b82c3cf7e2b78c431f668a380102dc5
|
| 3 |
+
size 50371
|
objects/mutlimodal_dataset.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09ffeeee89cfc06b4d0858434ba60c3eab008ba70bf5ea27101a6ff6c1ec2376
|
| 3 |
+
size 33966477
|
utils/__init__.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from . import helpers
|
| 2 |
+
from . import losses
|
| 3 |
+
from .losses import MLMLoss
|
| 4 |
+
from .helpers import create_masked_input, create_multimodal_model, get_max, get_token_embeddings
|
utils/helpers.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from torch.utils.data import TensorDataset, DataLoader
|
| 3 |
+
import numpy as np
|
| 4 |
+
from models import SingleTransformer, MultiModalTransformer
|
| 5 |
+
import config
|
| 6 |
+
from data import create_dataset
|
| 7 |
+
|
| 8 |
+
def create_masked_input(input_tensor, mask_token, mask_prob=0.20):
|
| 9 |
+
"""
|
| 10 |
+
Creates a masked input tensor by randomly replacing elements with a mask token.
|
| 11 |
+
Args:
|
| 12 |
+
input_tensor (torch.Tensor): The input tensor to be masked.
|
| 13 |
+
mask_token: The token to be used for masking.
|
| 14 |
+
mask_prob (float, optional): The probability of masking an element. Defaults to 0.20.
|
| 15 |
+
Returns:
|
| 16 |
+
torch.Tensor: The masked input tensor.
|
| 17 |
+
torch.Tensor: A boolean mask indicating which elements were masked.
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
mask = torch.rand(input_tensor.shape) < mask_prob
|
| 21 |
+
masked_input = input_tensor.clone()
|
| 22 |
+
masked_input[mask] = mask_token
|
| 23 |
+
return masked_input, mask
|
| 24 |
+
|
| 25 |
+
def get_max(adata):
|
| 26 |
+
"""
|
| 27 |
+
Get the maximum value in the data.
|
| 28 |
+
Args:
|
| 29 |
+
adata (list): A list of AnnData objects.
|
| 30 |
+
Returns:
|
| 31 |
+
float: The maximum value in the list data.
|
| 32 |
+
"""
|
| 33 |
+
assert(isinstance(adata, list)), "adata must be a list of AnnData objects."
|
| 34 |
+
x_s = []
|
| 35 |
+
for i in adata:
|
| 36 |
+
X = torch.tensor(i.X.toarray().copy())
|
| 37 |
+
x_s.append(np.array(X).flatten().max())
|
| 38 |
+
return max(x_s)
|
| 39 |
+
|
| 40 |
+
def get_token_embeddings(model, dataset, device):
|
| 41 |
+
"""
|
| 42 |
+
Get the token embeddings for the dataset.
|
| 43 |
+
Args:
|
| 44 |
+
model (torch.nn.Module): Model.
|
| 45 |
+
dataset (torch.utils.data.Dataset): Dataset.
|
| 46 |
+
device (str): Device to use.
|
| 47 |
+
Returns:
|
| 48 |
+
torch.Tensor: Embeddings.
|
| 49 |
+
"""
|
| 50 |
+
model.eval()
|
| 51 |
+
embeddings = []
|
| 52 |
+
loader = DataLoader(dataset, batch_size=32, shuffle=False)
|
| 53 |
+
with torch.no_grad():
|
| 54 |
+
for batch in loader:
|
| 55 |
+
if len(batch) == 3:
|
| 56 |
+
inputs, bi, _ = batch
|
| 57 |
+
elif len(batch) == 2:
|
| 58 |
+
inputs, bi = batch
|
| 59 |
+
if isinstance(inputs, list):
|
| 60 |
+
rna= inputs[0].to(device)
|
| 61 |
+
atac = inputs[1].to(device)
|
| 62 |
+
flux = inputs[2].to(device)
|
| 63 |
+
inputs = (rna, atac, flux)
|
| 64 |
+
else:
|
| 65 |
+
inputs = inputs.to(device)
|
| 66 |
+
bi = bi.to(device)
|
| 67 |
+
|
| 68 |
+
output = model(inputs, bi, return_embeddings=True)
|
| 69 |
+
embeddings.append(output.cpu().detach())
|
| 70 |
+
|
| 71 |
+
# Concatenate embeddings across batches
|
| 72 |
+
embeddings = torch.cat(embeddings, dim=0) # shape: (n_samples, seq_len, d_model)
|
| 73 |
+
return embeddings
|
| 74 |
+
|
| 75 |
+
def get_all_modalities_available_samples(dataset):
|
| 76 |
+
|
| 77 |
+
rna = dataset.rna_data
|
| 78 |
+
atac = dataset.atac_data
|
| 79 |
+
flux = dataset.flux_data
|
| 80 |
+
mask = (rna != 0).any(axis=1) & (atac != 0).any(axis=1) & (flux != 0).any(axis=1)
|
| 81 |
+
new_ds = create_dataset.MultiModalDataset((rna[mask], atac[mask], flux[mask]),
|
| 82 |
+
dataset.batch_no[mask],
|
| 83 |
+
dataset.labels[mask])
|
| 84 |
+
return new_ds
|
| 85 |
+
|
| 86 |
+
def separate_dataset(ds):
|
| 87 |
+
"""
|
| 88 |
+
Separate a dataset into two groups based on the labels.
|
| 89 |
+
Args:
|
| 90 |
+
ds (TensorDataset): Dataset.
|
| 91 |
+
Returns:
|
| 92 |
+
TensorDataset: Dataset with label 0.
|
| 93 |
+
TensorDataset: Dataset with label 1.
|
| 94 |
+
"""
|
| 95 |
+
X, b, y = ds.tensors
|
| 96 |
+
|
| 97 |
+
# Create masks for labels 0 and 1
|
| 98 |
+
mask_0 = (y == 0)
|
| 99 |
+
mask_1 = (y == 1)
|
| 100 |
+
|
| 101 |
+
# Filter the tensors based on the masks
|
| 102 |
+
X_0, b_0, y_0 = X[mask_0], b[mask_0], y[mask_0]
|
| 103 |
+
X_1, b_1, y_1 = X[mask_1], b[mask_1], y[mask_1]
|
| 104 |
+
|
| 105 |
+
# Create new datasets for each group
|
| 106 |
+
dataset_0 = TensorDataset(X_0, b_0, y_0) # Dataset with y == 0
|
| 107 |
+
dataset_1 = TensorDataset(X_1, b_1, y_1)
|
| 108 |
+
|
| 109 |
+
return dataset_0, dataset_1
|
| 110 |
+
|
| 111 |
+
def create_multimodal_model(model_config, device, use_mlm=False):
|
| 112 |
+
"""
|
| 113 |
+
Create a multimodal model.
|
| 114 |
+
Args:
|
| 115 |
+
model_config (dict): Model configuration.
|
| 116 |
+
device (str): Device to use.
|
| 117 |
+
use_mlm (bool, optional): Whether to use MLM pretraining. Defaults to False.
|
| 118 |
+
Returns:
|
| 119 |
+
MultiModalTransformer: Multimodal model.
|
| 120 |
+
"""
|
| 121 |
+
model_config_rna, model_config_atac, model_config_flux = model_config['RNA'], model_config['ATAC'], model_config['Flux']
|
| 122 |
+
share_config, model_config_multi = model_config['Share'], model_config['Multi']
|
| 123 |
+
rna_model = SingleTransformer("RNA", **model_config_rna, **share_config).to(device)
|
| 124 |
+
atac_model = SingleTransformer("ATAC", **model_config_atac, **share_config).to(device)
|
| 125 |
+
flux_model = SingleTransformer("Flux", **model_config_flux, **share_config).to(device)
|
| 126 |
+
if use_mlm:
|
| 127 |
+
rna_model.load_state_dict(torch.load(config.MLM_RNA_CKP), strict=False)
|
| 128 |
+
atac_model.load_state_dict(torch.load(config.MLM_ATAC_CKP), strict=False)
|
| 129 |
+
flux_model.load_state_dict(torch.load(config.MLM_FLUX_CKP), strict=False)
|
| 130 |
+
# print("Loaded MLM pretraining weights.: \n RNA: {}, ATAC: {}, Flux: {}".format(config.MLM_RNA_CKP, config.MLM_ATAC_CKP, config.MLM_FLUX_CKP))
|
| 131 |
+
model = MultiModalTransformer(rna_model, atac_model, flux_model, **model_config_multi).to(device)
|
| 132 |
+
return model
|
utils/losses.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from torch import nn
|
| 2 |
+
|
| 3 |
+
class MLMLoss(nn.Module):
|
| 4 |
+
"""
|
| 5 |
+
Masked Language Modeling loss.
|
| 6 |
+
"""
|
| 7 |
+
def __init__(self, mse_based=False):
|
| 8 |
+
super(MLMLoss, self).__init__()
|
| 9 |
+
self.mse_based = mse_based
|
| 10 |
+
if self.mse_based:
|
| 11 |
+
self.loss_fn = nn.MSELoss(reduction='none')
|
| 12 |
+
else:
|
| 13 |
+
self.loss_fn = nn.CrossEntropyLoss(reduction='none')
|
| 14 |
+
|
| 15 |
+
def forward(self, predictions, targets, mask):
|
| 16 |
+
if self.mse_based:
|
| 17 |
+
predictions = predictions.squeeze(-1)
|
| 18 |
+
else:
|
| 19 |
+
predictions = predictions.permute(0, 2, 1) # (batch_size, vocab_size, seq_len)
|
| 20 |
+
targets = targets.long()
|
| 21 |
+
|
| 22 |
+
masked_loss = self.loss_fn(predictions, targets)
|
| 23 |
+
masked_loss = masked_loss * mask.float()
|
| 24 |
+
return masked_loss.sum() / mask.sum()
|