kaveh commited on
Commit
ef814bf
·
0 Parent(s):
.gitattributes ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ *.pkl filter=lfs diff=lfs merge=lfs -text
2
+ *.pth filter=lfs diff=lfs merge=lfs -text
3
+ data/datasets/*.csv filter=lfs diff=lfs merge=lfs -text
4
+ *.parquet filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__/
analysis/de_all_48.tsv ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Category Term Count % PValue Genes List Total Pop Hits Pop Total Fold Enrichment Bonferroni Benjamini FDR
2
+ GOTERM_CC_DIRECT GO:0005615~extracellular space 23 48.93617021276596 3.0643470340330746E-15 SPARC, VCAM1, GPX3, SERPINF1, COL12A1, BGN, ADM, PTN, CXCL14, COL1A1, GREM2, VCAN, COL3A1, COL1A2, FLRT2, PTPRZ1, CRISPLD2, COL6A2, COL8A1, S100A4, TIMP1, IGFBP6, FBN1 47 1809 29722 8.040247932912271 3.979039320256561E-13 3.9223642035623355E-13 3.4627121484573744E-13
3
+ GOTERM_CC_DIRECT GO:0031012~extracellular matrix 12 25.53191489361702 1.6644129825397194E-13 COL1A1, COL3A1, VCAN, SPARC, PTPRZ1, CRISPLD2, COL6A2, COL12A1, BGN, TIMP1, PTN, FBN1 47 257 29722 29.52760990148191 2.1302071218087804E-11 1.0652243088254204E-11 9.403933351349415E-12
4
+ GOTERM_CC_DIRECT GO:0062023~collagen-containing extracellular matrix 12 25.53191489361702 2.6096757316181293E-12 COL1A1, COL3A1, VCAN, SPARC, COL1A2, COL6A2, COL12A1, BGN, COL8A1, IGFBP6, FBLN2, FBN1 47 331 29722 22.926271131966313 3.340403509355383E-10 1.1134616454904018E-10 9.829778589094954E-11
5
+ REACTOME_PATHWAY R-MMU-1474244~Extracellular matrix organization 14 29.78723404255319 9.412796932920763E-12 SPARC, VCAM1, COL12A1, BGN, FBLN2, COL1A1, ACTA2, VCAN, COL3A1, COL1A2, COL6A2, COL8A1, TIMP1, FBN1 35 287 9277 12.929616724738676 2.183770497765636E-9 2.1931816853705377E-9 1.97668735591336E-9
6
+ GOTERM_CC_DIRECT GO:0005576~extracellular region 19 40.42553191489361 5.071027105503714E-11 CTLA2A, VCAM1, GPX3, SERPINF1, COL12A1, BGN, PTN, FBLN2, COL1A1, VCAN, COL3A1, COL1A2, FLRT2, PTPRZ1, CRISPLD2, COL6A2, S100A4, TIMP1, FBN1 47 1780 29722 6.750155390867797 6.490921578006237E-9 1.6227286737611885E-9 1.4325651573047992E-9
7
+ GOTERM_MF_DIRECT GO:0005201~extracellular matrix structural constituent 7 14.893617021276595 4.554153190661396E-9 COL1A1, COL3A1, SPARC, COL1A2, IGFBP6, FBLN2, FBN1 44 91 28924 50.56643356643356 5.829314366767591E-7 4.787582223598892E-7 4.4883583346239615E-7
8
+ GOTERM_MF_DIRECT GO:0030020~extracellular matrix structural constituent conferring tensile strength 6 12.76595744680851 7.480597224373269E-9 COL1A1, COL3A1, COL1A2, COL6A2, COL12A1, COL8A1 44 46 28924 85.74308300395256 9.575159934938071E-7 4.787582223598892E-7 4.4883583346239615E-7
9
+ REACTOME_PATHWAY R-MMU-3000178~ECM proteoglycans 7 14.893617021276595 1.9030924365705653E-8 COL1A1, COL3A1, VCAN, SPARC, COL1A2, COL6A2, BGN 35 49 9277 37.86530612244898 4.415164754023593E-6 2.2171026886047086E-6 1.9982470583990936E-6
10
+ KEGG_PATHWAY mmu04820:Cytoskeleton in muscle cells 10 21.27659574468085 2.2847972312714302E-8 COL1A1, COL3A1, VCAN, COL1A2, COL6A2, BGN, MYH11, FBLN2, MYL9, FBN1 31 232 9565 13.299499443826473 1.5993567997751867E-6 1.5993580618900011E-6 1.439422255701001E-6
11
+ GOTERM_CC_DIRECT GO:0005604~basement membrane 7 14.893617021276595 3.460258164716589E-8 ACTA2, SPARC, SERPINF1, COL8A1, TIMP1, PTN, FBN1 47 122 29722 36.28426927101499 4.4291207241808905E-6 8.858260901674469E-7 7.820183452259493E-7
12
+ GOTERM_CC_DIRECT GO:0005581~collagen trimer 6 12.76595744680851 1.771586448374384E-7 COL1A1, COL3A1, COL1A2, COL6A2, COL12A1, COL8A1 47 82 29722 46.271925272444214 2.2676051445791323E-5 3.7793844231986857E-6 3.3364878111050898E-6
13
+ REACTOME_PATHWAY R-MMU-216083~Integrin cell surface interactions 7 14.893617021276595 3.2418830335220263E-7 COL1A1, COL3A1, VCAM1, COL1A2, COL6A2, COL8A1, FBN1 35 78 9277 23.78717948717949 7.520887023115819E-5 1.5661310271364195E-5 1.4115344021401206E-5
14
+ REACTOME_PATHWAY R-MMU-8948216~Collagen chain trimerization 6 12.76595744680851 3.3185861712795075E-7 COL1A1, COL3A1, COL1A2, COL6A2, COL12A1, COL8A1 35 41 9277 38.78885017421603 7.698824820467909E-5 1.5661310271364195E-5 1.4115344021401206E-5
15
+ REACTOME_PATHWAY R-MMU-1474228~Degradation of the extracellular matrix 8 17.02127659574468 3.3607961955717157E-7 COL1A1, COL3A1, COL1A2, COL6A2, COL12A1, COL8A1, TIMP1, FBN1 35 129 9277 16.43765227021041 7.796744522503563E-5 1.5661310271364195E-5 1.4115344021401206E-5
16
+ REACTOME_PATHWAY R-MMU-2022090~Assembly of collagen fibrils and other multimeric structures 6 12.76595744680851 9.16462494236312E-7 COL1A1, COL3A1, COL1A2, COL6A2, COL12A1, COL8A1 35 50 9277 31.806857142857144 2.125967941876139E-4 3.558929352617678E-5 3.207618729827092E-5
17
+ REACTOME_PATHWAY R-MMU-1442490~Collagen degradation 6 12.76595744680851 1.1183145403746236E-6 COL1A1, COL3A1, COL1A2, COL6A2, COL12A1, COL8A1 35 52 9277 30.583516483516483 2.5941546443741803E-4 3.722389827246961E-5 3.354943621123871E-5
18
+ REACTOME_PATHWAY R-MMU-9006934~Signaling by Receptor Tyrosine Kinases 11 23.404255319148938 2.068970107329577E-6 ACTA2, COL1A1, COL3A1, COL1A2, FLRT2, PTPRZ1, SH3KBP1, COL6A2, CAV1, TRIB3, PTN 35 432 9277 6.749140211640212 4.7988637914020416E-4 6.0258754375973924E-5 5.431046531740139E-5
19
+ REACTOME_PATHWAY R-MMU-1650814~Collagen biosynthesis and modifying enzymes 6 12.76595744680851 2.500401462818154E-6 COL1A1, COL3A1, COL1A2, COL6A2, COL12A1, COL8A1 35 61 9277 26.071194379391102 5.79925642680168E-4 6.473261564851443E-5 5.834270079909026E-5
20
+ GOTERM_MF_DIRECT GO:0005178~integrin binding 6 12.76595744680851 3.951504130333476E-6 COL3A1, VCAM1, PTPRZ1, PTN, THY1, FBN1 44 160 28924 24.65113636363636 5.056656360248324E-4 1.6859750956089496E-4 1.5806016521333904E-4
21
+ REACTOME_PATHWAY R-MMU-162582~Signal Transduction 22 46.808510638297875 4.113563511183184E-6 SH3KBP1, CAV1, ADM, PTN, GNG11, AKAP12, COL1A1, ACTA2, APBB1IP, GREM2, COL3A1, COL1A2, GNGT2, FLRT2, PTPRZ1, COL6A2, TRIB3, NCAM1, MYH11, TIMP1, MYL9, FBN1 35 2270 9277 2.568835745752045 9.538934515905817E-4 9.584602981056819E-5 8.638483373484686E-5
22
+ REACTOME_PATHWAY R-MMU-1474290~Collagen formation 6 12.76595744680851 7.46650304695044E-6 COL1A1, COL3A1, COL1A2, COL6A2, COL12A1, COL8A1 35 76 9277 20.925563909774436 0.0017307357203480978 1.581541099944957E-4 1.4254233089632657E-4
23
+ GOTERM_BP_DIRECT GO:0007155~cell adhesion 8 17.02127659574468 1.4860488733480935E-5 VCAN, VCAM1, FLRT2, COL6A2, COL12A1, COL8A1, NCAM1, THY1 47 523 29712 9.669907652251739 0.008331615419916782 0.008351594668216285 0.008232710758348439
24
+ GOTERM_MF_DIRECT GO:0050840~extracellular matrix binding 4 8.51063829787234 1.6185727179649803E-5 SPARC, BGN, CD248, FBLN2 44 33 28924 79.68044077134986 0.0020696451705318752 5.179432697487937E-4 4.855718153894941E-4
25
+ KEGG_PATHWAY mmu04974:Protein digestion and absorption 6 12.76595744680851 1.9031883608157167E-5 COL1A1, COL3A1, COL1A2, COL6A2, COL12A1, COL8A1 31 108 9565 17.1415770609319 0.0013313574863942357 6.661159262855009E-4 5.995043336569508E-4
26
+ KEGG_PATHWAY mmu04926:Relaxin signaling pathway 6 12.76595744680851 4.657577768124781E-5 ACTA2, COL1A1, COL3A1, GNGT2, COL1A2, GNG11 31 130 9565 14.240694789081886 0.0032550710972372165 0.0010867681458957821 9.78091331306204E-4
27
+ GOTERM_BP_DIRECT GO:0001501~skeletal system development 5 10.638297872340425 4.660868393689977E-5 COL1A1, COL3A1, VCAN, COL1A2, FBN1 47 128 29712 24.694148936170212 0.02589998966698781 0.013097040186268834 0.012910605450521235
28
+ GOTERM_MF_DIRECT GO:0048407~platelet-derived growth factor binding 3 6.382978723404255 1.411417082181159E-4 COL1A1, COL3A1, COL1A2 44 12 28924 164.34090909090907 0.017905176533635325 0.0030924960334964167 0.002899215031402891
29
+ GOTERM_MF_DIRECT GO:0008201~heparin binding 5 10.638297872340425 1.4496075157014455E-4 GREM2, CRISPLD2, NCAM1, PTN, FBN1 44 179 28924 18.362112747587606 0.01838521269043336 0.0030924960334964167 0.002899215031402891
30
+ GOTERM_BP_DIRECT GO:0030198~extracellular matrix organization 5 10.638297872340425 1.8179852502727632E-4 COL1A1, COL3A1, COL1A2, CRISPLD2, COL8A1 47 182 29712 17.367313537526304 0.09729717257054937 0.0340569236884431 0.03357212762170369
31
+ GOTERM_BP_DIRECT GO:0071560~cellular response to transforming growth factor beta stimulus 4 8.51063829787234 2.6246708451716275E-4 ACTA2, COL1A1, CAV1, FBN1 47 80 29712 31.608510638297872 0.13738634446279108 0.03687662537466137 0.03635169120562704
32
+ GOTERM_CC_DIRECT GO:0009986~cell surface 8 17.02127659574468 2.734695051633465E-4 VCAN, SPARC, VCAM1, CAV1, BGN, NCAM1, PTN, THY1 47 833 29722 6.0733059181119255 0.03440316195956494 0.004573569985160761 0.004037604752524734
33
+ GOTERM_CC_DIRECT GO:0005925~focal adhesion 5 10.638297872340425 2.8584812407254754E-4 ACTA2, APBB1IP, FLRT2, SH3KBP1, CAV1 47 205 29722 15.42397509081474 0.03593233050495981 0.004573569985160761 0.004037604752524734
34
+ REACTOME_PATHWAY R-MMU-76002~Platelet activation, signaling and aggregation 7 14.893617021276595 2.924308405792443E-4 APBB1IP, COL1A1, SPARC, GNGT2, COL1A2, TIMP1, GNG11 35 256 9277 7.24765625 0.06560299930047075 0.005678032154580326 0.005117539710136775
35
+ GOTERM_CC_DIRECT GO:0098978~glutamatergic synapse 8 17.02127659574468 3.5308599610066246E-4 ACTA2, SPARC, FLRT2, PTPRZ1, SH3KBP1, NCAM1, KIF21B, THY1 47 869 29722 5.82170751413951 0.04419655472052142 0.0050216675000983105 0.004433190839930539
36
+ GOTERM_MF_DIRECT GO:0005509~calcium ion binding 7 14.893617021276595 6.720928802693075E-4 VCAN, SPARC, S100A4, CD248, FBLN2, MYL9, FBN1 44 725 28924 6.346959247648902 0.08245789674491544 0.010974246337109035 0.010288355941039721
37
+ GOTERM_MF_DIRECT GO:0005539~glycosaminoglycan binding 3 6.382978723404255 6.858903960693147E-4 CRISPLD2, BGN, PTN 44 26 28924 75.84965034965035 0.08407802035171197 0.010974246337109035 0.010288355941039721
38
+ GOTERM_BP_DIRECT GO:0071711~basement membrane organization 3 6.382978723404255 6.876796015460026E-4 COL3A1, FLRT2, CAV1 47 25 29712 75.86042553191488 0.32111042871880047 0.0772951872137707 0.07619489985129708
39
+ REACTOME_PATHWAY R-MMU-109582~Hemostasis 9 19.148936170212767 0.001193372143584831 APBB1IP, COL1A1, SPARC, GNGT2, COL1A2, CAV1, TIMP1, KIF21B, GNG11 35 601 9277 3.969241739957214 0.24196647702718177 0.021388900727328124 0.01927755001175496
40
+ REACTOME_PATHWAY R-MMU-3000171~Non-integrin membrane-ECM interactions 4 8.51063829787234 0.0013345928767598207 ACTA2, COL1A1, COL3A1, COL1A2 35 60 9277 17.67047619047619 0.2664300662778777 0.022211438591788445 0.02001889315139731
41
+ GOTERM_BP_DIRECT GO:0030324~lung development 4 8.51063829787234 0.0013957293409172478 COL3A1, SPARC, CRISPLD2, FBN1 47 142 29712 17.80761162721007 0.5444931430558122 0.12253814172238674 0.12079382653772641
42
+ GOTERM_BP_DIRECT GO:0007507~heart development 5 10.638297872340425 0.0015262757865777707 COL3A1, VCAN, VCAM1, ADM, FBN1 47 321 29712 9.846888049313979 0.5768166300241827 0.12253814172238674 0.12079382653772641
43
+ GOTERM_CC_DIRECT GO:0005856~cytoskeleton 6 12.76595744680851 0.0015874202044843396 ACTA2, AKAP12, APBB1IP, CNN1, SH3KBP1, KIF21B 47 552 29722 6.873728029602219 0.18400841032032722 0.02008754888351565 0.017733539248728662
44
+ GOTERM_CC_DIRECT GO:0098685~Schaffer collateral - CA1 synapse 4 8.51063829787234 0.0017262737321771262 ACTA2, AKAP12, NCAM1, PTN 47 153 29722 16.53288833263802 0.19840675760949855 0.02008754888351565 0.017733539248728662
45
+ GOTERM_MF_DIRECT GO:0002020~protease binding 4 8.51063829787234 0.001805749921593141 COL1A1, COL3A1, COL1A2, TIMP1 44 162 28924 16.231200897867566 0.20653427005763692 0.025681776662658 0.02407666562124188
46
+ REACTOME_PATHWAY R-MMU-6806834~Signaling by MET 4 8.51063829787234 0.0019158848129537348 COL1A1, COL3A1, COL1A2, SH3KBP1 35 68 9277 15.591596638655462 0.3591190806267751 0.029760077427881348 0.026822387381352286
47
+ REACTOME_PATHWAY R-MMU-8874081~MET activates PTK2 signaling 3 6.382978723404255 0.0021441142870256523 COL1A1, COL3A1, COL1A2 35 19 9277 41.85112781954887 0.39223595505715503 0.03122366430481106 0.028141500017211687
48
+ GOTERM_BP_DIRECT GO:0010811~positive regulation of cell-substrate adhesion 3 6.382978723404255 0.0023239440007780917 COL8A1, PTN, FBLN2 47 46 29712 41.22849213691027 0.7301538058040932 0.16325706605466095 0.16093312205388285
49
+ REACTOME_PATHWAY R-MMU-419037~NCAM1 interactions 3 6.382978723404255 0.0026210681651430974 COL3A1, COL6A2, NCAM1 35 21 9277 37.865306122448985 0.456043571162699 0.03592405191049069 0.03237790086353238
50
+ GOTERM_CC_DIRECT GO:0005584~collagen type I trimer 2 4.25531914893617 0.003093006940036085 COL1A1, COL1A2 47 2 29722 632.3829787234042 0.3273421151461945 0.032813108433459165 0.028967822288913165
51
+ GOTERM_BP_DIRECT GO:0061870~positive regulation of hepatic stellate cell migration 2 4.25531914893617 0.003094047146850327 ACTA2, AKAP12 47 2 29712 632.1702127659574 0.8252936912696514 0.19320605516998707 0.19045579103945345
52
+ GOTERM_CC_DIRECT GO:0030426~growth cone 4 8.51063829787234 0.0033325813252731962 PTPRZ1, NCAM1, KIF21B, THY1 47 193 29722 13.106382978723403 0.3477209273273387 0.032813108433459165 0.028967822288913165
53
+ KEGG_PATHWAY mmu04510:Focal adhesion 5 10.638297872340425 0.003440887948325218 COL1A1, COL1A2, COL6A2, CAV1, MYL9 31 202 9565 7.637336314276589 0.21437641383300066 0.0528119397434033 0.047530745769062965
54
+ KEGG_PATHWAY mmu04933:AGE-RAGE signaling pathway in diabetic complications 4 8.51063829787234 0.0037722814102430927 COL1A1, COL3A1, VCAM1, COL1A2 31 101 9565 12.219738102842543 0.23245564414851905 0.0528119397434033 0.047530745769062965
55
+ GOTERM_BP_DIRECT GO:0030199~collagen fibril organization 3 6.382978723404255 0.004048871433762763 COL1A1, COL3A1, COL1A2 47 61 29712 31.090338332752005 0.8981385622095254 0.21354606501462295 0.2105062633774041
56
+ GOTERM_BP_DIRECT GO:0048144~fibroblast proliferation 3 6.382978723404255 0.004179727251175894 COL3A1, CAV1, CD248 47 62 29712 30.58888126286891 0.9054019132488886 0.21354606501462295 0.2105062633774041
57
+ GOTERM_BP_DIRECT GO:0009612~response to mechanical stimulus 3 6.382978723404255 0.0047225678923152655 COL1A1, COL3A1, CAV1 47 66 29712 28.735009671179878 0.9304078673769033 0.22117359629009828 0.2180252176952214
58
+ GOTERM_BP_DIRECT GO:0043588~skin development 3 6.382978723404255 0.0052961861338741084 COL1A1, COL3A1, BCL11B 47 70 29712 27.09300911854103 0.9496963048556181 0.22895820055671148 0.225699009089712
59
+ REACTOME_PATHWAY R-MMU-8875878~MET promotes cell motility 3 6.382978723404255 0.005318524831050393 COL1A1, COL3A1, COL1A2 35 30 9277 26.505714285714287 0.7098016718719486 0.0688453492019301 0.06204945636225458
60
+ KEGG_PATHWAY mmu04611:Platelet activation 4 8.51063829787234 0.006990280107083817 APBB1IP, COL1A1, COL3A1, COL1A2 31 126 9565 9.795186891961086 0.38800909648327053 0.08155326791597786 0.07339794112438008
61
+ GOTERM_BP_DIRECT GO:0001568~blood vessel development 3 6.382978723404255 0.00702920913564421 COL1A1, COL3A1, COL1A2 47 81 29712 23.413711583924346 0.9811533315568713 0.2821725381594319 0.278155847224778
62
+ REACTOME_PATHWAY R-MMU-76009~Platelet Aggregation (Plug Formation) 3 6.382978723404255 0.007597674902301182 APBB1IP, COL1A1, COL1A2 35 36 9277 22.08809523809524 0.8295632040329622 0.08851291261180877 0.07977558647416241
63
+ REACTOME_PATHWAY R-MMU-445355~Smooth Muscle Contraction 3 6.382978723404255 0.007597674902301182 ACTA2, MYH11, MYL9 35 36 9277 22.08809523809524 0.8295632040329622 0.08851291261180877 0.07977558647416241
64
+ GOTERM_CC_DIRECT GO:0030485~smooth muscle contractile fiber 2 4.25531914893617 0.007714977227092966 ACTA2, MYH11 47 5 29722 252.9531914893617 0.6289231621374838 0.07053693464770712 0.06227088761867894
65
+ GOTERM_CC_DIRECT GO:0030175~filopodium 3 6.382978723404255 0.008607069231545979 ACTA2, VCAM1, PTPRZ1 47 90 29722 21.079432624113473 0.6692767873049683 0.07344699077585902 0.06483992154431303
66
+ GOTERM_BP_DIRECT GO:0071333~cellular response to glucose stimulus 3 6.382978723404255 0.00861261412687383 COL1A1, VCAM1, SERPINF1 47 90 29712 21.072340425531912 0.9923258925516195 0.32268594262020617 0.31809254841920676
67
+ REACTOME_PATHWAY R-MMU-375165~NCAM signaling for neurite out-growth 3 6.382978723404255 0.009321043691936523 COL3A1, COL6A2, NCAM1 35 40 9277 19.879285714285714 0.8861215354469785 0.10218246897287005 0.09209578748627771
68
+ REACTOME_PATHWAY R-MMU-381426~Regulation of Insulin-like Growth Factor (IGF) transport and uptake by Insulin-like Growth Factor Binding Proteins (IGFBPs) 4 8.51063829787234 0.009648130117610046 VCAN, TIMP1, IGFBP6, FBN1 35 121 9277 8.762219598583235 0.8945200268145299 0.10218246897287005 0.09209578748627771
69
+ GOTERM_MF_DIRECT GO:0019901~protein kinase binding 5 10.638297872340425 0.00997328528044866 ACTA2, CAV1, TRIB3, PTN, THY1 44 569 28924 5.776481866112798 0.7227925154539387 0.12765805158974283 0.11967942336538391
70
+ KEGG_PATHWAY mmu04270:Vascular smooth muscle contraction 4 8.51063829787234 0.010077412705929597 ACTA2, MYH11, ADM, MYL9 31 144 9565 8.57078853046595 0.5078626079957046 0.10077412705929598 0.09069671435336638
71
+ REACTOME_PATHWAY R-MMU-9035034~RHOF GTPase cycle 3 6.382978723404255 0.010242106338136615 ACTA2, AKAP12, CAV1 35 42 9277 18.932653061224492 0.9082255082256379 0.10375699029503614 0.09351488395689952
72
+ GOTERM_MF_DIRECT GO:0042802~identical protein binding 9 19.148936170212767 0.011040722018064282 ACTA2, COL1A1, GREM2, COL1A2, GPX3, CAV1, S100A4, IGFBP6, FBN1 44 2094 28924 2.8253451419640534 0.7585449045018153 0.12847385621020255 0.12044424019706489
73
+ GOTERM_CC_DIRECT GO:0001725~stress fiber 3 6.382978723404255 0.011353928452809301 ACTA2, MYH11, MYL9 47 104 29722 18.24181669394435 0.7681387568481325 0.09083142762247441 0.08018711969796569
74
+ GOTERM_BP_DIRECT GO:0016525~negative regulation of angiogenesis 3 6.382978723404255 0.011361191063208454 SPARC, SERPINF1, PTN 47 104 29712 18.235679214402616 0.9983923201432293 0.39906183609519696 0.3933812405635927
75
+ GOTERM_MF_DIRECT GO:0005515~protein binding 16 34.04255319148936 0.01210659079383143 SPARC, BCL11B, SH3KBP1, CAV1, FBLN2, AKAP12, COL1A1, ACTA2, APBB1IP, COL1A2, FLRT2, PTPRZ1, TRIB3, S100A4, NCAM1, FBN1 44 5596 28924 1.8795243355643643 0.7896748214385103 0.12913696846753525 0.1210659079383143
76
+ GOTERM_BP_DIRECT GO:0007229~integrin-mediated signaling pathway 3 6.382978723404255 0.012642281717714013 COL3A1, PTN, THY1 47 110 29712 17.241005802707928 0.999225256705481 0.41793896031501626 0.41198965127138604
77
+ REACTOME_PATHWAY R-MMU-9856530~High laminar flow shear stress activates signaling by PIEZO1 and PECAM1:CDH5:KDR in endothelial cells 3 6.382978723404255 0.012713235397761534 GNGT2, ADM, GNG11 35 47 9277 16.91854103343465 0.9486134957461371 0.12342432698660157 0.11124080973041342
78
+ GOTERM_BP_DIRECT GO:0007165~signal transduction 6 12.76595744680851 0.013812657132885098 AKAP12, APBB1IP, PTPRZ1, TIMP1, CXCL14, FBN1 47 925 29712 4.10056354226567 0.9996026658524191 0.43126185048230137 0.4251228917565747
79
+ REACTOME_PATHWAY R-MMU-9851151~MDK and PTN in ALK signaling 2 4.25531914893617 0.014581860949365014 PTPRZ1, PTN 35 4 9277 132.52857142857144 0.9668893179133152 0.13590294404808193 0.12248763197466613
80
+ GOTERM_CC_DIRECT GO:0072534~perineuronal net 2 4.25531914893617 0.015371717738213922 VCAN, PTPRZ1 47 10 29722 126.47659574468085 0.8623249655426033 0.115739992381846 0.10217671202459844
81
+ GOTERM_BP_DIRECT GO:0071356~cellular response to tumor necrosis factor 3 6.382978723404255 0.015863396868583804 AKAP12, COL1A1, VCAM1 47 124 29712 15.294440631434455 0.9998769216432507 0.46922258106021564 0.46254325606291724
82
+ REACTOME_PATHWAY R-MMU-422475~Axon guidance 5 10.638297872340425 0.01658609753515625 ACTA2, COL3A1, SH3KBP1, COL6A2, NCAM1 35 271 9277 4.890353189246178 0.9793542001091287 0.1448953872170437 0.13059240907973896
83
+ REACTOME_PATHWAY R-MMU-9675108~Nervous system development 5 10.638297872340425 0.016790452595966437 ACTA2, COL3A1, SH3KBP1, COL6A2, NCAM1 35 272 9277 4.872373949579832 0.980326020912232 0.1448953872170437 0.13059240907973896
84
+ GOTERM_BP_DIRECT GO:0048251~elastic fiber assembly 2 4.25531914893617 0.01690175894231393 COL3A1, MYH11 47 11 29712 114.94003868471954 0.9999320694021577 0.47493942627902147 0.4681787227020959
85
+ GOTERM_MF_DIRECT GO:0030021~extracellular matrix structural constituent conferring compression resistance 2 4.25531914893617 0.017698045366969135 VCAN, BGN 44 12 28924 109.56060606060606 0.8982906091634677 0.17425767745938842 0.1633665726181766
86
+ GOTERM_CC_DIRECT GO:0043005~neuron projection 4 8.51063829787234 0.018217140041640493 FLRT2, BCL11B, SH3KBP1, NCAM1 47 360 29722 7.0264775413711575 0.9049445388655092 0.12954410696277682 0.11436315692807643
87
+ REACTOME_PATHWAY R-MMU-373080~Class B/2 (Secretin family receptors) 3 6.382978723404255 0.01895919255440626 GNGT2, ADM, GNG11 35 58 9277 13.709852216748768 0.9882130655965335 0.1577675666134521 0.14219394415804695
88
+ REACTOME_PATHWAY R-MMU-198933~Immunoregulatory interactions between a Lymphoid and a non-Lymphoid cell 4 8.51063829787234 0.021050515502072722 COL1A1, COL3A1, VCAM1, COL1A2 35 162 9277 6.544620811287478 0.9928156839319909 0.16913000386148086 0.15243476742880246
89
+ GOTERM_BP_DIRECT GO:0043589~skin morphogenesis 2 4.25531914893617 0.021462618908622375 COL1A1, COL1A2 47 14 29712 90.31003039513678 0.9999950444951733 0.5401315663764478 0.5324428608052527
90
+ GOTERM_CC_DIRECT GO:0042383~sarcolemma 3 6.382978723404255 0.021538058955703262 VCAM1, COL6A2, BGN 47 146 29722 12.994170795686388 0.9383938562638995 0.14509850243842196 0.12809477168391942
91
+ GOTERM_BP_DIRECT GO:0071230~cellular response to amino acid stimulus 3 6.382978723404255 0.021551542418191412 COL1A1, COL3A1, COL1A2 47 146 29712 12.98979889245118 0.9999952916632523 0.5401315663764478 0.5324428608052527
92
+ GOTERM_BP_DIRECT GO:0050804~modulation of chemical synaptic transmission 3 6.382978723404255 0.022105028517185585 AKAP12, NCAM1, PTN 47 148 29712 12.814261069580217 0.999996576141018 0.5401315663764478 0.5324428608052527
93
+ GOTERM_BP_DIRECT GO:0008285~negative regulation of cell population proliferation 4 8.51063829787234 0.02352574980497766 BCL11B, PTPRZ1, CAV1, ADM 47 397 29712 6.369473176483198 0.9999984898232426 0.5508946412665602 0.543052724664901
94
+ KEGG_PATHWAY mmu04814:Motor proteins 4 8.51063829787234 0.023528517164250855 ACTA2, MYH11, KIF21B, MYL9 31 198 9565 6.233300749429781 0.8111271567898347 0.20587452518719498 0.1852870726684755
95
+ GOTERM_BP_DIRECT GO:0043116~negative regulation of vascular permeability 2 4.25531914893617 0.02600277769491187 AKAP12, ADM 47 17 29712 74.3729662077597 0.9999996385939455 0.5845424425816188 0.576221553719247
96
+ KEGG_PATHWAY mmu04151:PI3K-Akt signaling pathway 5 10.638297872340425 0.026566971633494064 COL1A1, GNGT2, COL1A2, COL6A2, GNG11 31 367 9565 4.203656499956052 0.8481462794097964 0.20663200159384273 0.18596880143445846
97
+ REACTOME_PATHWAY R-MMU-9860931~Response of endothelial cells to shear stress 3 6.382978723404255 0.026953906695871145 GNGT2, ADM, GNG11 35 70 9277 11.359591836734694 0.9982341577907143 0.20148907322762719 0.1815995938961447
98
+ REACTOME_PATHWAY R-MMU-9855142~Cellular responses to mechanical stimuli 3 6.382978723404255 0.026953906695871145 GNGT2, ADM, GNG11 35 70 9277 11.359591836734694 0.9982341577907143 0.20148907322762719 0.1815995938961447
99
+ REACTOME_PATHWAY R-MMU-9009391~Extra-nuclear estrogen signaling 3 6.382978723404255 0.027672319069888714 GNGT2, CAV1, GNG11 35 71 9277 11.199597585513079 0.9985122336239464 0.20148907322762719 0.1815995938961447
100
+ GOTERM_BP_DIRECT GO:0010812~negative regulation of cell-substrate adhesion 2 4.25531914893617 0.029018094958165423 COL1A1, PTPRZ1 47 19 29712 66.54423292273236 0.9999999369231642 0.6272372833264988 0.6183086387239863
101
+ KEGG_PATHWAY mmu04512:ECM-receptor interaction 3 6.382978723404255 0.03145983615597811 COL1A1, COL1A2, COL6A2 31 89 9565 10.400507430228345 0.8932827941129542 0.22021885309184677 0.19819696778266208
102
+ GOTERM_BP_DIRECT GO:0010976~positive regulation of neuron projection development 3 6.382978723404255 0.03336511698960931 PTPRZ1, SERPINF1, PTN 47 185 29712 10.251408855664174 0.9999999949560944 0.6560667894799646 0.6467277604482212
103
+ GOTERM_CC_DIRECT GO:0043209~myelin sheath 3 6.382978723404255 0.03500646466076349 ACTA2, NCAM1, THY1 47 190 29722 9.984994400895856 0.9895498216097234 0.22404137382888634 0.1977865253333137
104
+ GOTERM_BP_DIRECT GO:1900006~positive regulation of dendrite development 2 4.25531914893617 0.035021358869037254 PTPRZ1, PTN 47 23 29712 54.97132284921369 0.9999999980792735 0.6560667894799646 0.6467277604482212
105
+ GOTERM_BP_DIRECT GO:0007413~axonal fasciculation 2 4.25531914893617 0.035021358869037254 PTPRZ1, NCAM1 47 23 29712 54.97132284921369 0.9999999980792735 0.6560667894799646 0.6467277604482212
106
+ GOTERM_BP_DIRECT GO:0002026~regulation of the force of heart contraction 2 4.25531914893617 0.035021358869037254 CAV1, ADM 47 23 29712 54.97132284921369 0.9999999980792735 0.6560667894799646 0.6467277604482212
107
+ GOTERM_MF_DIRECT GO:0050998~nitric-oxide synthase binding 2 4.25531914893617 0.035090027971754874 ACTA2, CAV1 44 24 28924 54.78030303030303 0.9896650180202876 0.3065780981312424 0.28741696699803976
108
+ GOTERM_BP_DIRECT GO:0032496~response to lipopolysaccharide 3 6.382978723404255 0.03638100432685884 AKAP12, VCAM1, ADM 47 194 29712 9.775828032463258 0.9999999991316126 0.6595524010224086 0.6501637547445096
109
+ GOTERM_MF_DIRECT GO:0031681~G-protein beta-subunit binding 2 4.25531914893617 0.036525707166659205 GNGT2, GNG11 44 25 28924 52.589090909090906 0.9914584473706006 0.3065780981312424 0.28741696699803976
110
+ GOTERM_CC_DIRECT GO:0016460~myosin II complex 2 4.25531914893617 0.03799680254416647 MYH11, MYL9 47 25 29722 50.59063829787234 0.9929758466043639 0.23159955836444324 0.20445898511861008
111
+ GOTERM_MF_DIRECT GO:0005516~calmodulin binding 3 6.382978723404255 0.0383222622664053 AKAP12, CNN1, MYH11 44 208 28924 9.481206293706293 0.9932735797026461 0.3065780981312424 0.28741696699803976
112
+ REACTOME_PATHWAY R-MMU-2022923~Dermatan sulfate biosynthesis 2 4.25531914893617 0.03960501750302037 VCAN, BGN 35 11 9277 48.19220779220779 0.9999152045147619 0.2796354266122347 0.2520319295646751
113
+ GOTERM_CC_DIRECT GO:0030027~lamellipodium 3 6.382978723404255 0.04089080075110019 ACTA2, APBB1IP, PTPRZ1 47 207 29722 9.164970706136293 0.9952234840173367 0.23791011346094654 0.2100300220397419
114
+ REACTOME_PATHWAY R-MMU-75892~Platelet Adhesion to exposed collagen 2 4.25531914893617 0.04312902240333435 COL1A1, COL1A2 35 12 9277 44.17619047619047 0.999963860285541 0.2871160634279115 0.2587741344200061
115
+ REACTOME_PATHWAY R-MMU-430116~GP1b-IX-V activation signalling 2 4.25531914893617 0.04312902240333435 COL1A1, COL1A2 35 12 9277 44.17619047619047 0.999963860285541 0.2871160634279115 0.2587741344200061
116
+ GOTERM_BP_DIRECT GO:0001666~response to hypoxia 3 6.382978723404255 0.04454169753211946 VCAM1, CAV1, ADM 47 217 29712 8.739680360819687 0.9999999999927688 0.7420384315816297 0.7314756069327809
117
+ KEGG_PATHWAY mmu05146:Amoebiasis 3 6.382978723404255 0.04475473166569457 COL1A1, COL3A1, COL1A2 31 108 9565 8.57078853046595 0.9594451629558901 0.2848028378726018 0.2563225540853416
118
+ GOTERM_CC_DIRECT GO:0016514~SWI/SNF complex 2 4.25531914893617 0.04542486433193445 ACTA2, BCL11B 47 30 29722 42.15886524822695 0.9973956755508281 0.25279924497772216 0.2231743334568953
119
+ GOTERM_BP_DIRECT GO:0034113~heterotypic cell-cell adhesion 2 4.25531914893617 0.045439818870130586 VCAM1, THY1 47 30 29712 42.144680851063825 0.9999999999957414 0.7420384315816297 0.7314756069327809
120
+ GOTERM_BP_DIRECT GO:0031641~regulation of myelination 2 4.25531914893617 0.045439818870130586 PTPRZ1, PTN 47 30 29712 42.144680851063825 0.9999999999957414 0.7420384315816297 0.7314756069327809
121
+ REACTOME_PATHWAY R-MMU-4420097~VEGFA-VEGFR2 Pathway 3 6.382978723404255 0.047986780957221205 ACTA2, CAV1, TRIB3 35 96 9277 8.283035714285715 0.9999889038818522 0.3105811100842372 0.2799228889171237
122
+ GOTERM_BP_DIRECT GO:0031032~actomyosin structure organization 2 4.25531914893617 0.04839625590584158 CNN1, MYH11 47 32 29712 39.51063829787234 0.9999999999992573 0.7420384315816297 0.7314756069327809
123
+ GOTERM_CC_DIRECT GO:0005911~cell-cell junction 3 6.382978723404255 0.04978098308079648 FLRT2, SH3KBP1, NCAM1 47 231 29722 8.212765957446807 0.9985497733648512 0.26336646882698656 0.23250321076132405
124
+ GOTERM_BP_DIRECT GO:0035987~endodermal cell differentiation 2 4.25531914893617 0.04987111345690221 COL12A1, COL8A1 47 33 29712 38.313346228239844 0.9999999999996898 0.7420384315816297 0.7314756069327809
125
+ REACTOME_PATHWAY R-MMU-2024101~CS/DS degradation 2 4.25531914893617 0.050139420245921884 VCAN, BGN 35 14 9277 37.86530612244898 0.9999934372096564 0.3157428356026973 0.2845750878822593
126
+ GOTERM_BP_DIRECT GO:0001935~endothelial cell proliferation 2 4.25531914893617 0.05134373479969049 CAV1, COL8A1 47 34 29712 37.18648310387985 0.9999999999998704 0.7420384315816297 0.7314756069327809
127
+ GOTERM_CC_DIRECT GO:0005834~heterotrimeric G-protein complex 2 4.25531914893617 0.052796808120785466 GNGT2, GNG11 47 35 29722 36.136170212765954 0.9990345638277794 0.26336646882698656 0.23250321076132405
128
+ GOTERM_BP_DIRECT GO:1904706~negative regulation of vascular associated smooth muscle cell proliferation 2 4.25531914893617 0.05281412324424411 CNN1, CAV1 47 35 29712 36.12401215805471 0.9999999999999459 0.7420384315816297 0.7314756069327809
129
+ GOTERM_BP_DIRECT GO:0001937~negative regulation of endothelial cell proliferation 2 4.25531914893617 0.05281412324424411 SPARC, CAV1 47 35 29712 36.12401215805471 0.9999999999999459 0.7420384315816297 0.7314756069327809
130
+ GOTERM_BP_DIRECT GO:0048714~positive regulation of oligodendrocyte differentiation 2 4.25531914893617 0.05281412324424411 PTPRZ1, PTN 47 35 29712 36.12401215805471 0.9999999999999459 0.7420384315816297 0.7314756069327809
131
+ KEGG_PATHWAY mmu04670:Leukocyte transendothelial migration 3 6.382978723404255 0.053248636904474234 VCAM1, THY1, MYL9 31 119 9565 7.7785307671455675 0.978297201653411 0.310617048609433 0.2795553437484897
132
+ REACTOME_PATHWAY R-MMU-1280218~Adaptive Immune System 7 14.893617021276595 0.05332719632891561 COL1A1, COL3A1, VCAM1, COL1A2, SH3KBP1, TRIB3, KIF21B 35 753 9277 2.464010624169987 0.9999969913132601 0.3269799143325615 0.29470292708084944
133
+ GOTERM_CC_DIRECT GO:0009897~external side of plasma membrane 4 8.51063829787234 0.053496313980481644 VCAM1, CD248, NCAM1, THY1 47 551 29722 4.590802023400394 0.9991216742227382 0.26336646882698656 0.23250321076132405
134
+ GOTERM_MF_DIRECT GO:0030246~carbohydrate binding 3 6.382978723404255 0.05405303258732842 VCAN, CD248, PTN 44 252 28924 7.825757575757574 0.9991853913375791 0.40698753948106103 0.38155081826349474
135
+ REACTOME_PATHWAY R-MMU-194138~Signaling by VEGF 3 6.382978723404255 0.05535634863677548 ACTA2, CAV1, TRIB3 35 104 9277 7.645879120879121 0.9999981711488538 0.33071869826586375 0.29807264650571413
136
+ GOTERM_BP_DIRECT GO:0043113~receptor clustering 2 4.25531914893617 0.05867341417023918 PTN, THY1 47 39 29712 32.4189852700491 0.9999999999999983 0.804255091796937 0.7928066207393294
137
+ GOTERM_BP_DIRECT GO:0060325~face morphogenesis 2 4.25531914893617 0.06304459867195403 COL1A1, CRISPLD2 47 42 29712 30.103343465045594 0.9999999999999999 0.8435967727056706 0.8315882777205365
138
+ REACTOME_PATHWAY R-MMU-8964315~G beta:gamma signalling through BTK 2 4.25531914893617 0.06401097702134163 GNGT2, GNG11 35 18 9277 29.45079365079365 0.9999997838201198 0.3513482431393882 0.31666579853764604
139
+ REACTOME_PATHWAY R-MMU-2243919~Crosslinking of collagen fibrils 2 4.25531914893617 0.06401097702134163 COL1A1, COL1A2 35 18 9277 29.45079365079365 0.9999997838201198 0.3513482431393882 0.31666579853764604
140
+ GOTERM_BP_DIRECT GO:0030335~positive regulation of cell migration 3 6.382978723404255 0.06473817961805624 COL1A1, PTPRZ1, CAV1 47 268 29712 7.076532232454746 1.0 0.8456168204255533 0.833579570312734
141
+ REACTOME_PATHWAY R-MMU-8957275~Post-translational protein phosphorylation 3 6.382978723404255 0.0660933225286726 VCAN, TIMP1, FBN1 35 115 9277 6.914534161490684 0.9999998710541089 0.3513482431393882 0.31666579853764604
142
+ GOTERM_BP_DIRECT GO:0008284~positive regulation of cell population proliferation 4 8.51063829787234 0.06620487562050595 ACTA2, ADM, TIMP1, PTN 47 602 29712 4.200466530006361 1.0 0.8456168204255533 0.833579570312734
143
+ REACTOME_PATHWAY R-MMU-392851~Prostacyclin signalling through prostacyclin receptor 2 4.25531914893617 0.06744802495190409 GNGT2, GNG11 35 19 9277 27.900751879699246 0.9999999079239411 0.3513482431393882 0.31666579853764604
144
+ REACTOME_PATHWAY R-MMU-201556~Signaling by ALK 2 4.25531914893617 0.06744802495190409 PTPRZ1, PTN 35 19 9277 27.900751879699246 0.9999999079239411 0.3513482431393882 0.31666579853764604
145
+ KEGG_PATHWAY mmu04371:Apelin signaling pathway 3 6.382978723404255 0.07001366206124844 ACTA2, GNGT2, GNG11 31 139 9565 6.6593177071246235 0.9937861638310146 0.3769966418682609 0.33929697768143474
146
+ GOTERM_CC_DIRECT GO:0045211~postsynaptic membrane 3 6.382978723404255 0.07069360841698624 FLRT2, PTPRZ1, NCAM1 47 282 29722 6.727478497057492 0.999915987812827 0.3351400695323792 0.2958658426340535
147
+ REACTOME_PATHWAY R-MMU-8964616~G beta:gamma signalling through CDC42 2 4.25531914893617 0.07087282157747317 GNGT2, GNG11 35 20 9277 26.505714285714284 0.9999999607862784 0.3513482431393882 0.31666579853764604
148
+ REACTOME_PATHWAY R-MMU-418217~G beta:gamma signalling through PLC beta 2 4.25531914893617 0.07087282157747317 GNGT2, GNG11 35 20 9277 26.505714285714284 0.9999999607862784 0.3513482431393882 0.31666579853764604
149
+ REACTOME_PATHWAY R-MMU-2022870~Chondroitin sulfate biosynthesis 2 4.25531914893617 0.07087282157747317 VCAN, BGN 35 20 9277 26.505714285714284 0.9999999607862784 0.3513482431393882 0.31666579853764604
150
+ GOTERM_BP_DIRECT GO:0001525~angiogenesis 3 6.382978723404255 0.07335901502654828 CAV1, COL8A1, THY1 47 288 29712 6.585106382978723 1.0 0.9161725876648918 0.9031309849935055
151
+ REACTOME_PATHWAY R-MMU-500657~Presynaptic function of Kainate receptors 2 4.25531914893617 0.07428540924757544 GNGT2, GNG11 35 21 9277 25.243537414965985 0.9999999833010456 0.35333368400170256 0.31845525167535427
152
+ REACTOME_PATHWAY R-MMU-9006936~Signaling by TGFB family members 3 6.382978723404255 0.074306225390916 GREM2, TIMP1, FBN1 35 123 9277 6.464808362369338 0.9999999833879362 0.35333368400170256 0.31845525167535427
153
+ REACTOME_PATHWAY R-MMU-392170~ADP signalling through P2Y purinoceptor 12 2 4.25531914893617 0.07768583017184705 GNGT2, GNG11 35 22 9277 24.096103896103894 0.9999999928894963 0.36201596860080726 0.32628048672175763
154
+ GOTERM_CC_DIRECT GO:0030016~myofibril 2 4.25531914893617 0.08030567201598654 MYH11, MYL9 47 54 29722 23.42159180457053 0.9999777991790781 0.3671116435016527 0.3240907477788028
155
+ REACTOME_PATHWAY R-MMU-5627123~RHO GTPases activate PAKs 2 4.25531914893617 0.0810741264198134 MYH11, MYL9 35 23 9277 23.048447204968944 0.9999999969725895 0.3632744510733947 0.3274147413107849
156
+ REACTOME_PATHWAY R-MMU-400042~Adrenaline,noradrenaline inhibits insulin secretion 2 4.25531914893617 0.0810741264198134 GNGT2, GNG11 35 23 9277 23.048447204968944 0.9999999969725895 0.3632744510733947 0.3274147413107849
157
+ GOTERM_MF_DIRECT GO:0046332~SMAD binding 2 4.25531914893617 0.08138174072253183 COL3A1, COL1A2 44 57 28924 23.065390749601278 0.9999808887356442 0.5787146006935596 0.5425449381502122
158
+ GOTERM_BP_DIRECT GO:0031623~receptor internalization 2 4.25531914893617 0.08318230175034744 CAV1, ADM 47 56 29712 22.577507598784194 1.0 1.0 0.9875222816399287
159
+ GOTERM_CC_DIRECT GO:0043025~neuronal cell body 4 8.51063829787234 0.08437953214468305 AKAP12, PTPRZ1, SERPINF1, NCAM1 47 668 29722 3.7867244234934385 0.999987422827345 0.37243379705239416 0.3287892114603167
160
+ REACTOME_PATHWAY R-MMU-428930~Thromboxane signalling through TP receptor 2 4.25531914893617 0.08445033991921927 GNGT2, GNG11 35 24 9277 22.088095238095235 0.9999999987111508 0.36438757779959424 0.3284179885747416
161
+ REACTOME_PATHWAY R-MMU-202040~G-protein activation 2 4.25531914893617 0.08445033991921927 GNGT2, GNG11 35 24 9277 22.088095238095235 0.9999999987111508 0.36438757779959424 0.3284179885747416
162
+ GOTERM_BP_DIRECT GO:0016477~cell migration 3 6.382978723404255 0.08462237292548197 SH3KBP1, CD248, IGFBP6 47 313 29712 6.05913941948202 1.0 1.0 0.9875222816399287
163
+ REACTOME_PATHWAY R-MMU-418592~ADP signalling through P2Y purinoceptor 1 2 4.25531914893617 0.08781451245013751 GNGT2, GNG11 35 25 9277 21.20457142857143 0.9999999994513533 0.36537109644432214 0.32930442168801566
164
+ REACTOME_PATHWAY R-MMU-392451~G beta:gamma signalling through PI3Kgamma 2 4.25531914893617 0.08781451245013751 GNGT2, GNG11 35 25 9277 21.20457142857143 0.9999999994513533 0.36537109644432214 0.32930442168801566
165
+ GOTERM_BP_DIRECT GO:0030514~negative regulation of BMP signaling pathway 2 4.25531914893617 0.09027122324048699 GREM2, CAV1 47 61 29712 20.72689222183467 1.0 1.0 0.9875222816399287
166
+ REACTOME_PATHWAY R-MMU-1971475~A tetrasaccharide linker sequence is required for GAG synthesis 2 4.25531914893617 0.09116668566564533 VCAN, BGN 35 26 9277 20.389010989010988 0.9999999997664697 0.36816123724365857 0.33181914086338327
167
+ REACTOME_PATHWAY R-MMU-418555~G alpha (s) signalling events 3 6.382978723404255 0.09164528652417252 GNGT2, ADM, GNG11 35 139 9277 5.720657759506681 0.9999999997933335 0.36816123724365857 0.33181914086338327
168
+ GOTERM_CC_DIRECT GO:0030133~transport vesicle 2 4.25531914893617 0.09165311839877811 CRISPLD2, BGN 47 62 29722 20.399450926561425 0.999995468714926 0.3910533051681199 0.3452267459687309
169
+ GOTERM_BP_DIRECT GO:0006469~negative regulation of protein kinase activity 2 4.25531914893617 0.09168255924097848 TRIB3, THY1 47 62 29712 20.392587508579275 1.0 1.0 0.9875222816399287
170
+ REACTOME_PATHWAY R-MMU-9634597~GPER1 signaling 2 4.25531914893617 0.09450690106454322 GNGT2, GNG11 35 27 9277 19.633862433862433 0.9999999999006075 0.3698855141266195 0.333373210157039
171
+ GOTERM_BP_DIRECT GO:0045597~positive regulation of cell differentiation 2 4.25531914893617 0.09870717283967474 ACTA2, PTN 47 67 29712 18.870752619879326 1.0 1.0 0.9875222816399287
analysis/final_results.csv ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model,seed,fold,combination,train_auc,val_auc,precision,recall,f1,accuracy,specificity,n_samples,tn,fp,fn,tp,model_path,selection_criteria,run_id,timestamp
2
+ ATAC,0,1,all_samples,0.9283870967741935,0.831578947368421,0.8947368421052632,0.8947368421052632,0.8947368421052632,0.8490566037735849,0.7333333333333333,53,11,4,4,34,ckp/ATAC_seed0/best_ATAC_seed0_fold1_AUC_0.832.pth,all_samples,ATAC_seed0_fold1_all_samples,2025-10-13 01:52:06
3
+ ATAC,0,1,common_samples,0.9283870967741935,0.8888888888888888,0.92,0.9583333333333334,0.9387755102040817,0.9090909090909091,0.7777777777777778,33,7,2,1,23,ckp/ATAC_seed0/best_ATAC_seed0_fold1_AUC_0.832.pth,all_samples,ATAC_seed0_fold1_common_samples,2025-10-13 01:52:06
4
+ ATAC,0,2,all_samples,0.8974193548387097,0.9666666666666667,0.9743589743589743,1.0,0.9870129870129869,0.9811320754716981,0.9333333333333333,53,14,1,0,38,ckp/ATAC_seed0/best_ATAC_seed0_fold2_AUC_0.967.pth,all_samples,ATAC_seed0_fold2_all_samples,2025-10-13 01:52:06
5
+ ATAC,0,2,common_samples,0.8974193548387097,0.949074074074074,0.96,1.0,0.9795918367346939,0.9696969696969697,0.8888888888888888,33,8,1,0,24,ckp/ATAC_seed0/best_ATAC_seed0_fold2_AUC_0.967.pth,all_samples,ATAC_seed0_fold2_common_samples,2025-10-13 01:52:06
6
+ ATAC,0,3,all_samples,0.9127130604711757,0.7905405405405406,0.7954545454545454,0.9459459459459459,0.8641975308641975,0.7924528301886793,0.4375,53,7,9,2,35,ckp/ATAC_seed0/best_ATAC_seed0_fold3_AUC_0.791.pth,all_samples,ATAC_seed0_fold3_all_samples,2025-10-13 01:52:06
7
+ ATAC,0,3,common_samples,0.9127130604711757,0.7922705314009663,0.8148148148148148,0.9565217391304348,0.8800000000000001,0.8125,0.4444444444444444,32,4,5,1,22,ckp/ATAC_seed0/best_ATAC_seed0_fold3_AUC_0.791.pth,all_samples,ATAC_seed0_fold3_common_samples,2025-10-13 01:52:06
8
+ ATAC,0,4,all_samples,0.8872000868526762,0.8817567567567568,0.9,0.972972972972973,0.935064935064935,0.9056603773584906,0.75,53,12,4,1,36,ckp/ATAC_seed0/best_ATAC_seed0_fold4_AUC_0.882.pth,all_samples,ATAC_seed0_fold4_all_samples,2025-10-13 01:52:06
9
+ ATAC,0,4,common_samples,0.8872000868526762,0.8194444444444444,0.8518518518518519,0.9583333333333334,0.9019607843137256,0.8484848484848485,0.5555555555555556,33,5,4,1,23,ckp/ATAC_seed0/best_ATAC_seed0_fold4_AUC_0.882.pth,all_samples,ATAC_seed0_fold4_common_samples,2025-10-13 01:52:06
10
+ ATAC,0,5,all_samples,0.896236559139785,0.9280701754385965,0.7169811320754716,1.0,0.8351648351648352,0.7169811320754716,0.0,53,0,15,0,38,ckp/ATAC_seed0/best_ATAC_seed0_fold5_AUC_0.928.pth,all_samples,ATAC_seed0_fold5_all_samples,2025-10-13 01:52:06
11
+ ATAC,0,5,common_samples,0.896236559139785,0.9166666666666667,0.75,1.0,0.8571428571428571,0.75,0.0,32,0,8,0,24,ckp/ATAC_seed0/best_ATAC_seed0_fold5_AUC_0.928.pth,all_samples,ATAC_seed0_fold5_common_samples,2025-10-13 01:52:06
12
+ ATAC,6,1,all_samples,0.890752688172043,0.9140350877192983,0.7169811320754716,1.0,0.8351648351648352,0.7169811320754716,0.0,53,0,15,0,38,ckp/ATAC_seed6/best_ATAC_seed6_fold1_AUC_0.914.pth,all_samples,ATAC_seed6_fold1_all_samples,2025-10-13 01:52:06
13
+ ATAC,6,1,common_samples,0.890752688172043,0.8842592592592593,0.7272727272727273,1.0,0.8421052631578948,0.7272727272727273,0.0,33,0,9,0,24,ckp/ATAC_seed6/best_ATAC_seed6_fold1_AUC_0.914.pth,all_samples,ATAC_seed6_fold1_common_samples,2025-10-13 01:52:06
14
+ ATAC,6,2,all_samples,0.8852688172043011,0.9456140350877194,0.8444444444444444,1.0,0.9156626506024096,0.8679245283018868,0.5333333333333333,53,8,7,0,38,ckp/ATAC_seed6/best_ATAC_seed6_fold2_AUC_0.946.pth,all_samples,ATAC_seed6_fold2_all_samples,2025-10-13 01:52:06
15
+ ATAC,6,2,common_samples,0.8852688172043011,0.9166666666666667,0.8571428571428571,1.0,0.923076923076923,0.8787878787878788,0.5555555555555556,33,5,4,0,24,ckp/ATAC_seed6/best_ATAC_seed6_fold2_AUC_0.946.pth,all_samples,ATAC_seed6_fold2_common_samples,2025-10-13 01:52:06
16
+ ATAC,6,3,all_samples,0.9215068939311692,0.8006756756756757,0.8333333333333334,0.9459459459459459,0.8860759493670887,0.8301886792452831,0.5625,53,9,7,2,35,ckp/ATAC_seed6/best_ATAC_seed6_fold3_AUC_0.801.pth,all_samples,ATAC_seed6_fold3_all_samples,2025-10-13 01:52:06
17
+ ATAC,6,3,common_samples,0.9215068939311692,0.7198067632850242,0.8148148148148148,0.9565217391304348,0.8800000000000001,0.8125,0.4444444444444444,32,4,5,1,22,ckp/ATAC_seed6/best_ATAC_seed6_fold3_AUC_0.801.pth,all_samples,ATAC_seed6_fold3_common_samples,2025-10-13 01:52:06
18
+ ATAC,6,4,all_samples,0.9110845727933992,0.9070945945945946,0.875,0.9459459459459459,0.9090909090909091,0.8679245283018868,0.6875,53,11,5,2,35,ckp/ATAC_seed6/best_ATAC_seed6_fold4_AUC_0.907.pth,all_samples,ATAC_seed6_fold4_all_samples,2025-10-13 01:52:06
19
+ ATAC,6,4,common_samples,0.9110845727933992,0.925925925925926,0.88,0.9166666666666666,0.8979591836734694,0.8484848484848485,0.6666666666666666,33,6,3,2,22,ckp/ATAC_seed6/best_ATAC_seed6_fold4_AUC_0.907.pth,all_samples,ATAC_seed6_fold4_common_samples,2025-10-13 01:52:06
20
+ ATAC,6,5,all_samples,0.9196774193548387,0.8192982456140351,0.9210526315789473,0.9210526315789473,0.9210526315789473,0.8867924528301887,0.8,53,12,3,3,35,ckp/ATAC_seed6/best_ATAC_seed6_fold5_AUC_0.819.pth,all_samples,ATAC_seed6_fold5_all_samples,2025-10-13 01:52:06
21
+ ATAC,6,5,common_samples,0.9196774193548387,0.796875,0.92,0.9583333333333334,0.9387755102040817,0.90625,0.75,32,6,2,1,23,ckp/ATAC_seed6/best_ATAC_seed6_fold5_AUC_0.819.pth,all_samples,ATAC_seed6_fold5_common_samples,2025-10-13 01:52:06
22
+ ATAC,42,1,all_samples,0.9063440860215053,0.8157894736842106,0.7169811320754716,1.0,0.8351648351648352,0.7169811320754716,0.0,53,0,15,0,38,ckp/ATAC_seed42/best_ATAC_seed42_fold1_AUC_0.816.pth,all_samples,ATAC_seed42_fold1_all_samples,2025-10-13 01:52:06
23
+ ATAC,42,1,common_samples,0.9063440860215053,0.8055555555555556,0.7272727272727273,1.0,0.8421052631578948,0.7272727272727273,0.0,33,0,9,0,24,ckp/ATAC_seed42/best_ATAC_seed42_fold1_AUC_0.816.pth,all_samples,ATAC_seed42_fold1_common_samples,2025-10-13 01:52:06
24
+ ATAC,42,2,all_samples,0.92,0.8789473684210527,0.9230769230769231,0.9473684210526315,0.935064935064935,0.9056603773584906,0.8,53,12,3,2,36,ckp/ATAC_seed42/best_ATAC_seed42_fold2_AUC_0.879.pth,all_samples,ATAC_seed42_fold2_all_samples,2025-10-13 01:52:06
25
+ ATAC,42,2,common_samples,0.92,0.875,0.9583333333333334,0.9583333333333334,0.9583333333333334,0.9393939393939394,0.8888888888888888,33,8,1,1,23,ckp/ATAC_seed42/best_ATAC_seed42_fold2_AUC_0.879.pth,all_samples,ATAC_seed42_fold2_common_samples,2025-10-13 01:52:06
26
+ ATAC,42,3,all_samples,0.906741938985995,0.8902027027027027,0.8409090909090909,1.0,0.9135802469135803,0.8679245283018868,0.5625,53,9,7,0,37,ckp/ATAC_seed42/best_ATAC_seed42_fold3_AUC_0.890.pth,all_samples,ATAC_seed42_fold3_all_samples,2025-10-13 01:52:06
27
+ ATAC,42,3,common_samples,0.906741938985995,0.8985507246376812,0.7931034482758621,1.0,0.8846153846153846,0.8125,0.3333333333333333,32,3,6,0,23,ckp/ATAC_seed42/best_ATAC_seed42_fold3_AUC_0.890.pth,all_samples,ATAC_seed42_fold3_common_samples,2025-10-13 01:52:06
28
+ ATAC,42,4,all_samples,0.9086961241993269,0.8226351351351351,0.8536585365853658,0.9459459459459459,0.8974358974358975,0.8490566037735849,0.625,53,10,6,2,35,ckp/ATAC_seed42/best_ATAC_seed42_fold4_AUC_0.823.pth,all_samples,ATAC_seed42_fold4_all_samples,2025-10-13 01:52:06
29
+ ATAC,42,4,common_samples,0.9086961241993269,0.7592592592592593,0.8846153846153846,0.9583333333333334,0.9199999999999999,0.8787878787878788,0.6666666666666666,33,6,3,1,23,ckp/ATAC_seed42/best_ATAC_seed42_fold4_AUC_0.823.pth,all_samples,ATAC_seed42_fold4_common_samples,2025-10-13 01:52:06
30
+ ATAC,42,5,all_samples,0.8983870967741936,0.980701754385965,0.925,0.9736842105263158,0.9487179487179489,0.9245283018867925,0.8,53,12,3,1,37,ckp/ATAC_seed42/best_ATAC_seed42_fold5_AUC_0.981.pth,all_samples,ATAC_seed42_fold5_all_samples,2025-10-13 01:52:06
31
+ ATAC,42,5,common_samples,0.8983870967741936,0.9791666666666667,0.8888888888888888,1.0,0.9411764705882353,0.90625,0.625,32,5,3,0,24,ckp/ATAC_seed42/best_ATAC_seed42_fold5_AUC_0.981.pth,all_samples,ATAC_seed42_fold5_common_samples,2025-10-13 01:52:06
32
+ ATAC,123,1,all_samples,0.8980645161290323,0.8649122807017544,0.9024390243902439,0.9736842105263158,0.9367088607594938,0.9056603773584906,0.7333333333333333,53,11,4,1,37,ckp/ATAC_seed123/best_ATAC_seed123_fold1_AUC_0.865.pth,all_samples,ATAC_seed123_fold1_all_samples,2025-10-13 01:52:06
33
+ ATAC,123,1,common_samples,0.8980645161290323,0.8935185185185185,0.9230769230769231,1.0,0.9600000000000001,0.9393939393939394,0.7777777777777778,33,7,2,0,24,ckp/ATAC_seed123/best_ATAC_seed123_fold1_AUC_0.865.pth,all_samples,ATAC_seed123_fold1_common_samples,2025-10-13 01:52:06
34
+ ATAC,123,2,all_samples,0.8958064516129032,0.9842105263157895,0.925,0.9736842105263158,0.9487179487179489,0.9245283018867925,0.8,53,12,3,1,37,ckp/ATAC_seed123/best_ATAC_seed123_fold2_AUC_0.984.pth,all_samples,ATAC_seed123_fold2_all_samples,2025-10-13 01:52:06
35
+ ATAC,123,2,common_samples,0.8958064516129032,0.9953703703703703,0.9230769230769231,1.0,0.9600000000000001,0.9393939393939394,0.7777777777777778,33,7,2,0,24,ckp/ATAC_seed123/best_ATAC_seed123_fold2_AUC_0.984.pth,all_samples,ATAC_seed123_fold2_common_samples,2025-10-13 01:52:06
36
+ ATAC,123,3,all_samples,0.9157529041363587,0.8395270270270271,0.8536585365853658,0.9459459459459459,0.8974358974358975,0.8490566037735849,0.625,53,10,6,2,35,ckp/ATAC_seed123/best_ATAC_seed123_fold3_AUC_0.840.pth,all_samples,ATAC_seed123_fold3_all_samples,2025-10-13 01:52:06
37
+ ATAC,123,3,common_samples,0.9157529041363587,0.7777777777777778,0.8148148148148148,0.9565217391304348,0.8800000000000001,0.8125,0.4444444444444444,32,4,5,1,22,ckp/ATAC_seed123/best_ATAC_seed123_fold3_AUC_0.840.pth,all_samples,ATAC_seed123_fold3_common_samples,2025-10-13 01:52:06
38
+ ATAC,123,4,all_samples,0.8875257843882316,0.9138513513513513,0.9210526315789473,0.9459459459459459,0.9333333333333332,0.9056603773584906,0.8125,53,13,3,2,35,ckp/ATAC_seed123/best_ATAC_seed123_fold4_AUC_0.914.pth,all_samples,ATAC_seed123_fold4_all_samples,2025-10-13 01:52:06
39
+ ATAC,123,4,common_samples,0.8875257843882316,0.9212962962962963,0.9230769230769231,1.0,0.9600000000000001,0.9393939393939394,0.7777777777777778,33,7,2,0,24,ckp/ATAC_seed123/best_ATAC_seed123_fold4_AUC_0.914.pth,all_samples,ATAC_seed123_fold4_common_samples,2025-10-13 01:52:06
40
+ ATAC,123,5,all_samples,0.9141935483870969,0.775438596491228,0.7169811320754716,1.0,0.8351648351648352,0.7169811320754716,0.0,53,0,15,0,38,ckp/ATAC_seed123/best_ATAC_seed123_fold5_AUC_0.775.pth,all_samples,ATAC_seed123_fold5_all_samples,2025-10-13 01:52:06
41
+ ATAC,123,5,common_samples,0.9141935483870969,0.7708333333333334,0.75,1.0,0.8571428571428571,0.75,0.0,32,0,8,0,24,ckp/ATAC_seed123/best_ATAC_seed123_fold5_AUC_0.775.pth,all_samples,ATAC_seed123_fold5_common_samples,2025-10-13 01:52:06
42
+ ATAC,1000,1,all_samples,0.8870967741935484,0.8596491228070176,0.8780487804878049,0.9473684210526315,0.9113924050632912,0.8679245283018868,0.6666666666666666,53,10,5,2,36,ckp/ATAC_seed1000/best_ATAC_seed1000_fold1_AUC_0.860.pth,all_samples,ATAC_seed1000_fold1_all_samples,2025-10-13 01:52:06
43
+ ATAC,1000,1,common_samples,0.8870967741935484,0.888888888888889,0.8846153846153846,0.9583333333333334,0.9199999999999999,0.8787878787878788,0.6666666666666666,33,6,3,1,23,ckp/ATAC_seed1000/best_ATAC_seed1000_fold1_AUC_0.860.pth,all_samples,ATAC_seed1000_fold1_common_samples,2025-10-13 01:52:06
44
+ ATAC,1000,2,all_samples,0.9063440860215054,0.8894736842105264,0.9024390243902439,0.9736842105263158,0.9367088607594938,0.9056603773584906,0.7333333333333333,53,11,4,1,37,ckp/ATAC_seed1000/best_ATAC_seed1000_fold2_AUC_0.889.pth,all_samples,ATAC_seed1000_fold2_all_samples,2025-10-13 01:52:06
45
+ ATAC,1000,2,common_samples,0.9063440860215054,0.8518518518518519,0.8888888888888888,1.0,0.9411764705882353,0.9090909090909091,0.6666666666666666,33,6,3,0,24,ckp/ATAC_seed1000/best_ATAC_seed1000_fold2_AUC_0.889.pth,all_samples,ATAC_seed1000_fold2_common_samples,2025-10-13 01:52:06
46
+ ATAC,1000,3,all_samples,0.9130387580067312,0.972972972972973,0.9722222222222222,0.9459459459459459,0.9589041095890412,0.9433962264150944,0.9375,53,15,1,2,35,ckp/ATAC_seed1000/best_ATAC_seed1000_fold3_AUC_0.973.pth,all_samples,ATAC_seed1000_fold3_all_samples,2025-10-13 01:52:06
47
+ ATAC,1000,3,common_samples,0.9130387580067312,0.9420289855072463,0.9545454545454546,0.9130434782608695,0.9333333333333332,0.90625,0.8888888888888888,32,8,1,2,21,ckp/ATAC_seed1000/best_ATAC_seed1000_fold3_AUC_0.973.pth,all_samples,ATAC_seed1000_fold3_common_samples,2025-10-13 01:52:06
48
+ ATAC,1000,4,all_samples,0.9325806101400499,0.8496621621621622,0.6981132075471698,1.0,0.8222222222222222,0.6981132075471698,0.0,53,0,16,0,37,ckp/ATAC_seed1000/best_ATAC_seed1000_fold4_AUC_0.850.pth,all_samples,ATAC_seed1000_fold4_all_samples,2025-10-13 01:52:06
49
+ ATAC,1000,4,common_samples,0.9325806101400499,0.8796296296296297,0.7272727272727273,1.0,0.8421052631578948,0.7272727272727273,0.0,33,0,9,0,24,ckp/ATAC_seed1000/best_ATAC_seed1000_fold4_AUC_0.850.pth,all_samples,ATAC_seed1000_fold4_common_samples,2025-10-13 01:52:06
50
+ ATAC,1000,5,all_samples,0.9166666666666666,0.7964912280701755,0.875,0.9210526315789473,0.8974358974358975,0.8490566037735849,0.6666666666666666,53,10,5,3,35,ckp/ATAC_seed1000/best_ATAC_seed1000_fold5_AUC_0.796.pth,all_samples,ATAC_seed1000_fold5_all_samples,2025-10-13 01:52:06
51
+ ATAC,1000,5,common_samples,0.9166666666666666,0.75,0.8888888888888888,1.0,0.9411764705882353,0.90625,0.625,32,5,3,0,24,ckp/ATAC_seed1000/best_ATAC_seed1000_fold5_AUC_0.796.pth,all_samples,ATAC_seed1000_fold5_common_samples,2025-10-13 01:52:06
52
+ Flux,0,1,all_samples,0.8590538365883988,0.8308255269320842,0.8503401360544217,0.8928571428571429,0.8710801393728222,0.8159203980099502,0.639344262295082,402,78,44,30,250,ckp/Flux_seed0/best_Flux_seed0_fold1_AUC_0.831.pth,all_samples,Flux_seed0_fold1_all_samples,2025-10-13 01:52:06
53
+ Flux,0,1,common_samples,0.8590538365883988,0.9114583333333333,0.88,0.9166666666666666,0.8979591836734694,0.84375,0.625,32,5,3,2,22,ckp/Flux_seed0/best_Flux_seed0_fold1_AUC_0.831.pth,all_samples,Flux_seed0_fold1_common_samples,2025-10-13 01:52:06
54
+ Flux,0,2,all_samples,0.8484016308656146,0.8173208613806568,0.8661710037174721,0.8351254480286738,0.8503649635036497,0.7960199004975125,0.7073170731707317,402,87,36,46,233,ckp/Flux_seed0/best_Flux_seed0_fold2_AUC_0.817.pth,all_samples,Flux_seed0_fold2_all_samples,2025-10-13 01:52:06
55
+ Flux,0,2,common_samples,0.8484016308656146,0.7053140096618358,0.8518518518518519,1.0,0.92,0.875,0.5555555555555556,32,5,4,0,23,ckp/Flux_seed0/best_Flux_seed0_fold2_AUC_0.817.pth,all_samples,Flux_seed0_fold2_common_samples,2025-10-13 01:52:06
56
+ Flux,0,3,all_samples,0.8441450496418064,0.8383891365795377,0.8875502008032129,0.7921146953405018,0.8371212121212123,0.7860696517412935,0.7723577235772358,402,95,28,58,221,ckp/Flux_seed0/best_Flux_seed0_fold3_AUC_0.838.pth,all_samples,Flux_seed0_fold3_all_samples,2025-10-13 01:52:06
57
+ Flux,0,3,common_samples,0.8441450496418064,0.9351851851851852,0.9545454545454546,0.875,0.9130434782608695,0.8787878787878788,0.8888888888888888,33,8,1,3,21,ckp/Flux_seed0/best_Flux_seed0_fold3_AUC_0.838.pth,all_samples,Flux_seed0_fold3_common_samples,2025-10-13 01:52:06
58
+ Flux,0,4,all_samples,0.8507207717464783,0.8417650860802632,0.7575757575757576,0.985663082437276,0.8566978193146417,0.770573566084788,0.2786885245901639,401,34,88,4,275,ckp/Flux_seed0/best_Flux_seed0_fold4_AUC_0.842.pth,all_samples,Flux_seed0_fold4_all_samples,2025-10-13 01:52:06
59
+ Flux,0,4,common_samples,0.8507207717464783,0.5138888888888888,0.7666666666666667,0.9583333333333334,0.8518518518518519,0.7575757575757576,0.2222222222222222,33,2,7,1,23,ckp/Flux_seed0/best_Flux_seed0_fold4_AUC_0.842.pth,all_samples,Flux_seed0_fold4_common_samples,2025-10-13 01:52:06
60
+ Flux,0,5,all_samples,0.8503663237900353,0.8269874845760621,0.8811475409836066,0.7706093189964157,0.8221797323135754,0.7680798004987531,0.7622950819672131,401,93,29,64,215,ckp/Flux_seed0/best_Flux_seed0_fold5_AUC_0.827.pth,all_samples,Flux_seed0_fold5_all_samples,2025-10-13 01:52:06
61
+ Flux,0,5,common_samples,0.8503663237900353,0.8842592592592593,0.9230769230769231,1.0,0.9600000000000001,0.9393939393939394,0.7777777777777778,33,7,2,0,24,ckp/Flux_seed0/best_Flux_seed0_fold5_AUC_0.827.pth,all_samples,Flux_seed0_fold5_common_samples,2025-10-13 01:52:06
62
+ Flux,6,1,all_samples,0.8451174017994295,0.8394613583138173,0.8727272727272727,0.8571428571428571,0.8648648648648648,0.8134328358208955,0.7131147540983607,402,87,35,40,240,ckp/Flux_seed6/best_Flux_seed6_fold1_AUC_0.839.pth,all_samples,Flux_seed6_fold1_all_samples,2025-10-13 01:52:06
63
+ Flux,6,1,common_samples,0.8451174017994295,0.7708333333333334,0.9166666666666666,0.9166666666666666,0.9166666666666666,0.875,0.75,32,6,2,2,22,ckp/Flux_seed6/best_Flux_seed6_fold1_AUC_0.839.pth,all_samples,Flux_seed6_fold1_common_samples,2025-10-13 01:52:06
64
+ Flux,6,2,all_samples,0.8480199116461893,0.8381560159687618,0.8581818181818182,0.8458781362007168,0.851985559566787,0.7960199004975125,0.6829268292682927,402,84,39,43,236,ckp/Flux_seed6/best_Flux_seed6_fold2_AUC_0.838.pth,all_samples,Flux_seed6_fold2_all_samples,2025-10-13 01:52:06
65
+ Flux,6,2,common_samples,0.8480199116461893,0.9420289855072465,0.9545454545454546,0.9130434782608695,0.9333333333333332,0.90625,0.8888888888888888,32,8,1,2,21,ckp/Flux_seed6/best_Flux_seed6_fold2_AUC_0.838.pth,all_samples,Flux_seed6_fold2_common_samples,2025-10-13 01:52:06
66
+ Flux,6,3,all_samples,0.8609187258450458,0.8010606987790309,0.8680851063829788,0.7311827956989247,0.7937743190661478,0.736318407960199,0.7479674796747967,402,92,31,75,204,ckp/Flux_seed6/best_Flux_seed6_fold3_AUC_0.801.pth,all_samples,Flux_seed6_fold3_all_samples,2025-10-13 01:52:06
67
+ Flux,6,3,common_samples,0.8609187258450458,0.8425925925925927,0.88,0.9166666666666666,0.8979591836734694,0.8484848484848485,0.6666666666666666,33,6,3,2,22,ckp/Flux_seed6/best_Flux_seed6_fold3_AUC_0.801.pth,all_samples,Flux_seed6_fold3_common_samples,2025-10-13 01:52:06
68
+ Flux,6,4,all_samples,0.836520928872892,0.8746988659733239,0.8701754385964913,0.8888888888888888,0.8794326241134752,0.830423940149626,0.6967213114754098,401,85,37,31,248,ckp/Flux_seed6/best_Flux_seed6_fold4_AUC_0.875.pth,all_samples,Flux_seed6_fold4_all_samples,2025-10-13 01:52:06
69
+ Flux,6,4,common_samples,0.836520928872892,0.8472222222222222,0.8518518518518519,0.9583333333333334,0.9019607843137256,0.8484848484848485,0.5555555555555556,33,5,4,1,23,ckp/Flux_seed6/best_Flux_seed6_fold4_AUC_0.875.pth,all_samples,Flux_seed6_fold4_common_samples,2025-10-13 01:52:06
70
+ Flux,6,5,all_samples,0.857159300604754,0.8207297726070863,0.8715953307392996,0.8028673835125448,0.8358208955223881,0.7805486284289277,0.7295081967213115,401,89,33,55,224,ckp/Flux_seed6/best_Flux_seed6_fold5_AUC_0.821.pth,all_samples,Flux_seed6_fold5_all_samples,2025-10-13 01:52:06
71
+ Flux,6,5,common_samples,0.857159300604754,0.7037037037037037,0.8461538461538461,0.9166666666666666,0.8799999999999999,0.8181818181818182,0.5555555555555556,33,5,4,2,22,ckp/Flux_seed6/best_Flux_seed6_fold5_AUC_0.821.pth,all_samples,Flux_seed6_fold5_common_samples,2025-10-13 01:52:06
72
+ Flux,42,1,all_samples,0.8687861165971764,0.8190573770491805,0.8692307692307693,0.8071428571428572,0.837037037037037,0.7810945273631841,0.7213114754098361,402,88,34,54,226,ckp/Flux_seed42/best_Flux_seed42_fold1_AUC_0.819.pth,all_samples,Flux_seed42_fold1_all_samples,2025-10-13 01:52:06
73
+ Flux,42,1,common_samples,0.8687861165971764,0.5885416666666666,0.84,0.875,0.8571428571428572,0.78125,0.5,32,4,4,3,21,ckp/Flux_seed42/best_Flux_seed42_fold1_AUC_0.819.pth,all_samples,Flux_seed42_fold1_common_samples,2025-10-13 01:52:06
74
+ Flux,42,2,all_samples,0.8456517878556535,0.8228866159629339,0.8316498316498316,0.8853046594982079,0.857638888888889,0.7960199004975125,0.5934959349593496,402,73,50,32,247,ckp/Flux_seed42/best_Flux_seed42_fold2_AUC_0.823.pth,all_samples,Flux_seed42_fold2_all_samples,2025-10-13 01:52:06
75
+ Flux,42,2,common_samples,0.8456517878556535,0.8695652173913044,0.8214285714285714,1.0,0.9019607843137255,0.84375,0.4444444444444444,32,4,5,0,23,ckp/Flux_seed42/best_Flux_seed42_fold2_AUC_0.823.pth,all_samples,Flux_seed42_fold2_common_samples,2025-10-13 01:52:06
76
+ Flux,42,3,all_samples,0.846706321526584,0.8512107701722178,0.9,0.7741935483870968,0.8323699421965317,0.7835820895522388,0.8048780487804879,402,99,24,63,216,ckp/Flux_seed42/best_Flux_seed42_fold3_AUC_0.851.pth,all_samples,Flux_seed42_fold3_all_samples,2025-10-13 01:52:06
77
+ Flux,42,3,common_samples,0.846706321526584,0.8287037037037037,0.8636363636363636,0.7916666666666666,0.8260869565217391,0.7575757575757576,0.6666666666666666,33,6,3,5,19,ckp/Flux_seed42/best_Flux_seed42_fold3_AUC_0.851.pth,all_samples,Flux_seed42_fold3_common_samples,2025-10-13 01:52:06
78
+ Flux,42,4,all_samples,0.8438583669815285,0.8358305423350374,0.85,0.8530465949820788,0.851520572450805,0.7930174563591023,0.6557377049180327,401,80,42,41,238,ckp/Flux_seed42/best_Flux_seed42_fold4_AUC_0.836.pth,all_samples,Flux_seed42_fold4_all_samples,2025-10-13 01:52:06
79
+ Flux,42,4,common_samples,0.8438583669815285,0.9259259259259259,0.9166666666666666,0.9166666666666666,0.9166666666666666,0.8787878787878788,0.7777777777777778,33,7,2,2,22,ckp/Flux_seed42/best_Flux_seed42_fold4_AUC_0.836.pth,all_samples,Flux_seed42_fold4_common_samples,2025-10-13 01:52:06
80
+ Flux,42,5,all_samples,0.8543931449034403,0.8448792526000352,0.879245283018868,0.8351254480286738,0.8566176470588235,0.8054862842892768,0.7377049180327869,401,90,32,46,233,ckp/Flux_seed42/best_Flux_seed42_fold5_AUC_0.845.pth,all_samples,Flux_seed42_fold5_all_samples,2025-10-13 01:52:06
81
+ Flux,42,5,common_samples,0.8543931449034403,0.8472222222222222,0.8846153846153846,0.9583333333333334,0.9199999999999999,0.8787878787878788,0.6666666666666666,33,6,3,1,23,ckp/Flux_seed42/best_Flux_seed42_fold5_AUC_0.845.pth,all_samples,Flux_seed42_fold5_common_samples,2025-10-13 01:52:06
82
+ Flux,123,1,all_samples,0.8501755540926047,0.8135831381733022,0.8148148148148148,0.8642857142857143,0.8388214904679375,0.7686567164179104,0.5491803278688525,402,67,55,38,242,ckp/Flux_seed123/best_Flux_seed123_fold1_AUC_0.814.pth,all_samples,Flux_seed123_fold1_all_samples,2025-10-13 01:52:06
83
+ Flux,123,1,common_samples,0.8501755540926047,0.75,0.8214285714285714,0.9583333333333334,0.8846153846153847,0.8125,0.375,32,3,5,1,23,ckp/Flux_seed123/best_Flux_seed123_fold1_AUC_0.814.pth,all_samples,Flux_seed123_fold1_common_samples,2025-10-13 01:52:06
84
+ Flux,123,2,all_samples,0.844562469219883,0.8737943293411429,0.8680555555555556,0.8960573476702509,0.8818342151675486,0.8333333333333334,0.6910569105691057,402,85,38,29,250,ckp/Flux_seed123/best_Flux_seed123_fold2_AUC_0.874.pth,all_samples,Flux_seed123_fold2_all_samples,2025-10-13 01:52:06
85
+ Flux,123,2,common_samples,0.844562469219883,0.8695652173913044,0.875,0.9130434782608695,0.8936170212765957,0.84375,0.6666666666666666,32,6,3,2,21,ckp/Flux_seed123/best_Flux_seed123_fold2_AUC_0.874.pth,all_samples,Flux_seed123_fold2_common_samples,2025-10-13 01:52:06
86
+ Flux,123,3,all_samples,0.8554007319488917,0.8134160911501589,0.8252427184466019,0.9139784946236559,0.8673469387755103,0.8059701492537313,0.5609756097560976,402,69,54,24,255,ckp/Flux_seed123/best_Flux_seed123_fold3_AUC_0.813.pth,all_samples,Flux_seed123_fold3_all_samples,2025-10-13 01:52:06
87
+ Flux,123,3,common_samples,0.8554007319488917,0.8611111111111112,0.8888888888888888,1.0,0.9411764705882353,0.9090909090909091,0.6666666666666666,33,6,3,0,24,ckp/Flux_seed123/best_Flux_seed123_fold3_AUC_0.813.pth,all_samples,Flux_seed123_fold3_common_samples,2025-10-13 01:52:06
88
+ Flux,123,4,all_samples,0.8516598761259202,0.84382161114049,0.8745098039215686,0.7992831541218638,0.8352059925093633,0.7805486284289277,0.7377049180327869,401,90,32,56,223,ckp/Flux_seed123/best_Flux_seed123_fold4_AUC_0.844.pth,all_samples,Flux_seed123_fold4_all_samples,2025-10-13 01:52:06
89
+ Flux,123,4,common_samples,0.8516598761259202,0.625,0.8518518518518519,0.9583333333333334,0.9019607843137256,0.8484848484848485,0.5555555555555556,33,5,4,1,23,ckp/Flux_seed123/best_Flux_seed123_fold4_AUC_0.844.pth,all_samples,Flux_seed123_fold4_common_samples,2025-10-13 01:52:06
90
+ Flux,123,5,all_samples,0.84918970273875,0.8277513367412893,0.8614232209737828,0.8243727598566308,0.8424908424908425,0.7855361596009975,0.6967213114754098,401,85,37,49,230,ckp/Flux_seed123/best_Flux_seed123_fold5_AUC_0.828.pth,all_samples,Flux_seed123_fold5_all_samples,2025-10-13 01:52:06
91
+ Flux,123,5,common_samples,0.84918970273875,0.875,0.9166666666666666,0.9166666666666666,0.9166666666666666,0.8787878787878788,0.7777777777777778,33,7,2,2,22,ckp/Flux_seed123/best_Flux_seed123_fold5_AUC_0.828.pth,all_samples,Flux_seed123_fold5_common_samples,2025-10-13 01:52:06
92
+ Flux,1000,1,all_samples,0.8517207958452198,0.8416861826697892,0.8355263157894737,0.9071428571428571,0.8698630136986301,0.8109452736318408,0.5901639344262295,402,72,50,26,254,ckp/Flux_seed1000/best_Flux_seed1000_fold1_AUC_0.842.pth,all_samples,Flux_seed1000_fold1_all_samples,2025-10-13 01:52:06
93
+ Flux,1000,1,common_samples,0.8517207958452198,0.8645833333333334,0.8888888888888888,1.0,0.9411764705882353,0.90625,0.625,32,5,3,0,24,ckp/Flux_seed1000/best_Flux_seed1000_fold1_AUC_0.842.pth,all_samples,Flux_seed1000_fold1_common_samples,2025-10-13 01:52:06
94
+ Flux,1000,2,all_samples,0.8500182163368504,0.8088702392400268,0.8526315789473684,0.8709677419354839,0.8617021276595744,0.8059701492537313,0.6585365853658537,402,81,42,36,243,ckp/Flux_seed1000/best_Flux_seed1000_fold2_AUC_0.809.pth,all_samples,Flux_seed1000_fold2_all_samples,2025-10-13 01:52:06
95
+ Flux,1000,2,common_samples,0.8500182163368504,0.8599033816425121,0.9166666666666666,0.9565217391304348,0.9361702127659574,0.90625,0.7777777777777778,32,7,2,1,22,ckp/Flux_seed1000/best_Flux_seed1000_fold2_AUC_0.809.pth,all_samples,Flux_seed1000_fold2_common_samples,2025-10-13 01:52:06
96
+ Flux,1000,3,all_samples,0.8456462954927839,0.8495789258967859,0.8769230769230769,0.8172043010752689,0.8460111317254174,0.7935323383084577,0.7398373983739838,402,91,32,51,228,ckp/Flux_seed1000/best_Flux_seed1000_fold3_AUC_0.850.pth,all_samples,Flux_seed1000_fold3_all_samples,2025-10-13 01:52:06
97
+ Flux,1000,3,common_samples,0.8456462954927839,0.8981481481481481,0.8846153846153846,0.9583333333333334,0.9199999999999999,0.8787878787878788,0.6666666666666666,33,6,3,1,23,ckp/Flux_seed1000/best_Flux_seed1000_fold3_AUC_0.850.pth,all_samples,Flux_seed1000_fold3_common_samples,2025-10-13 01:52:06
98
+ Flux,1000,4,all_samples,0.8509071309813093,0.826987484576062,0.8392857142857143,0.8422939068100358,0.8407871198568874,0.7780548628428927,0.6311475409836066,401,77,45,44,235,ckp/Flux_seed1000/best_Flux_seed1000_fold4_AUC_0.827.pth,all_samples,Flux_seed1000_fold4_all_samples,2025-10-13 01:52:06
99
+ Flux,1000,4,common_samples,0.8509071309813093,0.7222222222222222,0.8260869565217391,0.7916666666666666,0.8085106382978724,0.7272727272727273,0.5555555555555556,33,5,4,5,19,ckp/Flux_seed1000/best_Flux_seed1000_fold4_AUC_0.827.pth,all_samples,Flux_seed1000_fold4_common_samples,2025-10-13 01:52:06
100
+ Flux,1000,5,all_samples,0.8567189812361828,0.8422351489511721,0.8478260869565217,0.8387096774193549,0.8432432432432434,0.7830423940149626,0.6557377049180327,401,80,42,45,234,ckp/Flux_seed1000/best_Flux_seed1000_fold5_AUC_0.842.pth,all_samples,Flux_seed1000_fold5_all_samples,2025-10-13 01:52:06
101
+ Flux,1000,5,common_samples,0.8567189812361828,0.7592592592592593,0.8461538461538461,0.9166666666666666,0.8799999999999999,0.8181818181818182,0.5555555555555556,33,5,4,2,22,ckp/Flux_seed1000/best_Flux_seed1000_fold5_AUC_0.842.pth,all_samples,Flux_seed1000_fold5_common_samples,2025-10-13 01:52:06
102
+ RNA,0,1,all_samples,0.9977031672884208,0.9213407494145199,0.8877887788778878,0.9607142857142857,0.9228130360205833,0.8880597014925373,0.7213114754098361,402,88,34,11,269,ckp/RNA_seed0/best_RNA_seed0_fold1_AUC_0.921.pth,all_samples,RNA_seed0_fold1_all_samples,2025-10-13 01:52:06
103
+ RNA,0,1,common_samples,0.9977031672884208,0.9270833333333333,0.92,0.9583333333333334,0.9387755102040817,0.90625,0.75,32,6,2,1,23,ckp/RNA_seed0/best_RNA_seed0_fold1_AUC_0.921.pth,all_samples,RNA_seed0_fold1_common_samples,2025-10-13 01:52:06
104
+ RNA,0,2,all_samples,0.9974844978058011,0.8986799545414809,0.8640776699029126,0.956989247311828,0.9081632653061223,0.8656716417910447,0.6585365853658537,402,81,42,12,267,ckp/RNA_seed0/best_RNA_seed0_fold2_AUC_0.899.pth,all_samples,RNA_seed0_fold2_all_samples,2025-10-13 01:52:06
105
+ RNA,0,2,common_samples,0.9974844978058011,0.748792270531401,0.8214285714285714,1.0,0.9019607843137255,0.84375,0.4444444444444444,32,4,5,0,23,ckp/RNA_seed0/best_RNA_seed0_fold2_AUC_0.899.pth,all_samples,RNA_seed0_fold2_common_samples,2025-10-13 01:52:06
106
+ RNA,0,3,all_samples,0.9918017330235641,0.9314334003555089,0.9190140845070423,0.9354838709677419,0.9271758436944938,0.8980099502487562,0.8130081300813008,402,100,23,18,261,ckp/RNA_seed0/best_RNA_seed0_fold3_AUC_0.931.pth,all_samples,RNA_seed0_fold3_all_samples,2025-10-13 01:52:06
107
+ RNA,0,3,common_samples,0.9918017330235641,0.9490740740740741,0.9583333333333334,0.9583333333333334,0.9583333333333334,0.9393939393939394,0.8888888888888888,33,8,1,1,23,ckp/RNA_seed0/best_RNA_seed0_fold3_AUC_0.931.pth,all_samples,RNA_seed0_fold3_common_samples,2025-10-13 01:52:06
108
+ RNA,0,4,all_samples,0.990707616976961,0.9351019448851285,0.8973509933774835,0.9713261648745519,0.9328743545611016,0.9027431421446384,0.7459016393442623,401,91,31,8,271,ckp/RNA_seed0/best_RNA_seed0_fold4_AUC_0.935.pth,all_samples,RNA_seed0_fold4_all_samples,2025-10-13 01:52:06
109
+ RNA,0,4,common_samples,0.990707616976961,0.6481481481481481,0.8518518518518519,0.9583333333333334,0.9019607843137256,0.8484848484848485,0.5555555555555556,33,5,4,1,23,ckp/RNA_seed0/best_RNA_seed0_fold4_AUC_0.935.pth,all_samples,RNA_seed0_fold4_common_samples,2025-10-13 01:52:06
110
+ RNA,0,5,all_samples,0.9906966546690297,0.9159175039661555,0.8770226537216829,0.9713261648745519,0.9217687074829931,0.885286783042394,0.6885245901639344,401,84,38,8,271,ckp/RNA_seed0/best_RNA_seed0_fold5_AUC_0.916.pth,all_samples,RNA_seed0_fold5_all_samples,2025-10-13 01:52:06
111
+ RNA,0,5,common_samples,0.9906966546690297,0.9351851851851851,0.8888888888888888,1.0,0.9411764705882353,0.9090909090909091,0.6666666666666666,33,6,3,0,24,ckp/RNA_seed0/best_RNA_seed0_fold5_AUC_0.916.pth,all_samples,RNA_seed0_fold5_common_samples,2025-10-13 01:52:06
112
+ RNA,6,1,all_samples,0.9952088362226611,0.9352751756440281,0.9381818181818182,0.9214285714285714,0.9297297297297297,0.9029850746268657,0.860655737704918,402,105,17,22,258,ckp/RNA_seed6/best_RNA_seed6_fold1_AUC_0.935.pth,all_samples,RNA_seed6_fold1_all_samples,2025-10-13 01:52:06
113
+ RNA,6,1,common_samples,0.9952088362226611,0.890625,0.92,0.9583333333333334,0.9387755102040817,0.90625,0.75,32,6,2,1,23,ckp/RNA_seed6/best_RNA_seed6_fold1_AUC_0.935.pth,all_samples,RNA_seed6_fold1_common_samples,2025-10-13 01:52:06
114
+ RNA,6,2,all_samples,0.9930741304216488,0.9105982457674038,0.8969072164948454,0.9354838709677419,0.9157894736842105,0.8805970149253731,0.7560975609756098,402,93,30,18,261,ckp/RNA_seed6/best_RNA_seed6_fold2_AUC_0.911.pth,all_samples,RNA_seed6_fold2_all_samples,2025-10-13 01:52:06
115
+ RNA,6,2,common_samples,0.9930741304216488,0.9420289855072463,0.92,1.0,0.9583333333333334,0.9375,0.7777777777777778,32,7,2,0,23,ckp/RNA_seed6/best_RNA_seed6_fold2_AUC_0.911.pth,all_samples,RNA_seed6_fold2_common_samples,2025-10-13 01:52:06
116
+ RNA,6,3,all_samples,0.9984328457945892,0.9058775533991898,0.8758169934640523,0.9605734767025089,0.9162393162393162,0.8781094527363185,0.6910569105691057,402,85,38,11,268,ckp/RNA_seed6/best_RNA_seed6_fold3_AUC_0.906.pth,all_samples,RNA_seed6_fold3_all_samples,2025-10-13 01:52:06
117
+ RNA,6,3,common_samples,0.9984328457945892,0.9166666666666667,0.8571428571428571,1.0,0.923076923076923,0.8787878787878788,0.5555555555555556,33,5,4,0,24,ckp/RNA_seed6/best_RNA_seed6_fold3_AUC_0.906.pth,all_samples,RNA_seed6_fold3_common_samples,2025-10-13 01:52:06
118
+ RNA,6,4,all_samples,0.9914932490453657,0.9473529584581937,0.9206896551724137,0.956989247311828,0.9384885764499121,0.912718204488778,0.8114754098360656,401,99,23,12,267,ckp/RNA_seed6/best_RNA_seed6_fold4_AUC_0.947.pth,all_samples,RNA_seed6_fold4_all_samples,2025-10-13 01:52:06
119
+ RNA,6,4,common_samples,0.9914932490453657,0.7916666666666666,0.8518518518518519,0.9583333333333334,0.9019607843137256,0.8484848484848485,0.5555555555555556,33,5,4,1,23,ckp/RNA_seed6/best_RNA_seed6_fold4_AUC_0.947.pth,all_samples,RNA_seed6_fold4_common_samples,2025-10-13 01:52:06
120
+ RNA,6,5,all_samples,0.9894177187437195,0.9125389270814972,0.9106529209621993,0.9498207885304659,0.9298245614035087,0.9002493765586035,0.7868852459016393,401,96,26,14,265,ckp/RNA_seed6/best_RNA_seed6_fold5_AUC_0.913.pth,all_samples,RNA_seed6_fold5_all_samples,2025-10-13 01:52:06
121
+ RNA,6,5,common_samples,0.9894177187437195,0.7407407407407408,0.88,0.9166666666666666,0.8979591836734694,0.8484848484848485,0.6666666666666666,33,6,3,2,22,ckp/RNA_seed6/best_RNA_seed6_fold5_AUC_0.913.pth,all_samples,RNA_seed6_fold5_common_samples,2025-10-13 01:52:06
122
+ RNA,42,1,all_samples,0.9967787652695487,0.9374414519906323,0.9290780141843972,0.9357142857142857,0.9323843416370108,0.9054726368159204,0.8360655737704918,402,102,20,18,262,ckp/RNA_seed42/best_RNA_seed42_fold1_AUC_0.937.pth,all_samples,RNA_seed42_fold1_all_samples,2025-10-13 01:52:06
123
+ RNA,42,1,common_samples,0.9967787652695487,0.7864583333333333,0.8888888888888888,1.0,0.9411764705882353,0.90625,0.625,32,5,3,0,24,ckp/RNA_seed42/best_RNA_seed42_fold1_AUC_0.937.pth,all_samples,RNA_seed42_fold1_common_samples,2025-10-13 01:52:06
124
+ RNA,42,2,all_samples,0.9959978982558086,0.9107148060727919,0.8786885245901639,0.9605734767025089,0.9178082191780821,0.8805970149253731,0.6991869918699187,402,86,37,11,268,ckp/RNA_seed42/best_RNA_seed42_fold2_AUC_0.911.pth,all_samples,RNA_seed42_fold2_all_samples,2025-10-13 01:52:06
125
+ RNA,42,2,common_samples,0.9959978982558086,0.7729468599033817,0.88,0.9565217391304348,0.9166666666666666,0.875,0.6666666666666666,32,6,3,1,22,ckp/RNA_seed42/best_RNA_seed42_fold2_AUC_0.911.pth,all_samples,RNA_seed42_fold2_common_samples,2025-10-13 01:52:06
126
+ RNA,42,3,all_samples,0.9824317619683164,0.9190197278316868,0.8881578947368421,0.967741935483871,0.9262435677530019,0.8930348258706468,0.7235772357723578,402,89,34,9,270,ckp/RNA_seed42/best_RNA_seed42_fold3_AUC_0.919.pth,all_samples,RNA_seed42_fold3_all_samples,2025-10-13 01:52:06
127
+ RNA,42,3,common_samples,0.9824317619683164,0.7824074074074074,0.8888888888888888,1.0,0.9411764705882353,0.9090909090909091,0.6666666666666666,33,6,3,0,24,ckp/RNA_seed42/best_RNA_seed42_fold3_AUC_0.919.pth,all_samples,RNA_seed42_fold3_common_samples,2025-10-13 01:52:06
128
+ RNA,42,4,all_samples,0.993795333710924,0.9258769610435396,0.9,0.967741935483871,0.9326424870466321,0.9027431421446384,0.7540983606557377,401,92,30,9,270,ckp/RNA_seed42/best_RNA_seed42_fold4_AUC_0.926.pth,all_samples,RNA_seed42_fold4_all_samples,2025-10-13 01:52:06
129
+ RNA,42,4,common_samples,0.993795333710924,0.9722222222222222,0.9230769230769231,1.0,0.9600000000000001,0.9393939393939394,0.7777777777777778,33,7,2,0,24,ckp/RNA_seed42/best_RNA_seed42_fold4_AUC_0.926.pth,all_samples,RNA_seed42_fold4_common_samples,2025-10-13 01:52:06
130
+ RNA,42,5,all_samples,0.983532786435971,0.9294024325753569,0.9190140845070423,0.9354838709677419,0.9271758436944938,0.8977556109725686,0.8114754098360656,401,99,23,18,261,ckp/RNA_seed42/best_RNA_seed42_fold5_AUC_0.929.pth,all_samples,RNA_seed42_fold5_all_samples,2025-10-13 01:52:06
131
+ RNA,42,5,common_samples,0.983532786435971,0.8611111111111112,0.8888888888888888,1.0,0.9411764705882353,0.9090909090909091,0.6666666666666666,33,6,3,0,24,ckp/RNA_seed42/best_RNA_seed42_fold5_AUC_0.929.pth,all_samples,RNA_seed42_fold5_common_samples,2025-10-13 01:52:06
132
+ RNA,123,1,all_samples,0.9946181698485846,0.9142271662763466,0.8881578947368421,0.9642857142857143,0.9246575342465754,0.8905472636815921,0.7213114754098361,402,88,34,10,270,ckp/RNA_seed123/best_RNA_seed123_fold1_AUC_0.914.pth,all_samples,RNA_seed123_fold1_all_samples,2025-10-13 01:52:06
133
+ RNA,123,1,common_samples,0.9946181698485846,0.8958333333333333,0.92,0.9583333333333334,0.9387755102040817,0.90625,0.75,32,6,2,1,23,ckp/RNA_seed123/best_RNA_seed123_fold1_AUC_0.914.pth,all_samples,RNA_seed123_fold1_common_samples,2025-10-13 01:52:06
134
+ RNA,123,2,all_samples,0.9941085254287247,0.9536381385319229,0.9432624113475178,0.953405017921147,0.948306595365419,0.927860696517413,0.8699186991869918,402,107,16,13,266,ckp/RNA_seed123/best_RNA_seed123_fold2_AUC_0.954.pth,all_samples,RNA_seed123_fold2_all_samples,2025-10-13 01:52:06
135
+ RNA,123,2,common_samples,0.9941085254287247,0.9903381642512077,0.92,1.0,0.9583333333333334,0.9375,0.7777777777777778,32,7,2,0,23,ckp/RNA_seed123/best_RNA_seed123_fold2_AUC_0.954.pth,all_samples,RNA_seed123_fold2_common_samples,2025-10-13 01:52:06
136
+ RNA,123,3,all_samples,0.9964592567368408,0.9091412419500539,0.865814696485623,0.9713261648745519,0.9155405405405406,0.8756218905472637,0.6585365853658537,402,81,42,8,271,ckp/RNA_seed123/best_RNA_seed123_fold3_AUC_0.909.pth,all_samples,RNA_seed123_fold3_all_samples,2025-10-13 01:52:06
137
+ RNA,123,3,common_samples,0.9964592567368408,0.8101851851851852,0.8888888888888888,1.0,0.9411764705882353,0.9090909090909091,0.6666666666666666,33,6,3,0,24,ckp/RNA_seed123/best_RNA_seed123_fold3_AUC_0.909.pth,all_samples,RNA_seed123_fold3_common_samples,2025-10-13 01:52:06
138
+ RNA,123,4,all_samples,0.9955977198399504,0.9346025030847877,0.9054054054054054,0.9605734767025089,0.9321739130434782,0.9027431421446384,0.7704918032786885,401,94,28,11,268,ckp/RNA_seed123/best_RNA_seed123_fold4_AUC_0.935.pth,all_samples,RNA_seed123_fold4_all_samples,2025-10-13 01:52:06
139
+ RNA,123,4,common_samples,0.9955977198399504,0.7453703703703705,0.8275862068965517,1.0,0.9056603773584906,0.8484848484848485,0.4444444444444444,33,4,5,0,24,ckp/RNA_seed123/best_RNA_seed123_fold4_AUC_0.935.pth,all_samples,RNA_seed123_fold4_common_samples,2025-10-13 01:52:06
140
+ RNA,123,5,all_samples,0.992043191493249,0.9150067571537693,0.92,0.9068100358422939,0.9133574007220215,0.8802992518703242,0.819672131147541,401,100,22,26,253,ckp/RNA_seed123/best_RNA_seed123_fold5_AUC_0.915.pth,all_samples,RNA_seed123_fold5_all_samples,2025-10-13 01:52:06
141
+ RNA,123,5,common_samples,0.992043191493249,0.9074074074074074,0.9230769230769231,1.0,0.9600000000000001,0.9393939393939394,0.7777777777777778,33,7,2,0,24,ckp/RNA_seed123/best_RNA_seed123_fold5_AUC_0.915.pth,all_samples,RNA_seed123_fold5_common_samples,2025-10-13 01:52:06
142
+ RNA,1000,1,all_samples,0.9847469095164948,0.93711943793911,0.9146757679180887,0.9571428571428572,0.9354275741710297,0.9079601990049752,0.7950819672131147,402,97,25,12,268,ckp/RNA_seed1000/best_RNA_seed1000_fold1_AUC_0.937.pth,all_samples,RNA_seed1000_fold1_all_samples,2025-10-13 01:52:06
143
+ RNA,1000,1,common_samples,0.9847469095164948,0.8020833333333333,0.8846153846153846,0.9583333333333334,0.9199999999999999,0.875,0.625,32,5,3,1,23,ckp/RNA_seed1000/best_RNA_seed1000_fold1_AUC_0.937.pth,all_samples,RNA_seed1000_fold1_common_samples,2025-10-13 01:52:06
144
+ RNA,1000,2,all_samples,0.9944197593246591,0.9213509339394469,0.8918918918918919,0.946236559139785,0.9182608695652175,0.8830845771144279,0.7398373983739838,402,91,32,15,264,ckp/RNA_seed1000/best_RNA_seed1000_fold2_AUC_0.921.pth,all_samples,RNA_seed1000_fold2_all_samples,2025-10-13 01:52:06
145
+ RNA,1000,2,common_samples,0.9944197593246591,0.9323671497584541,0.92,1.0,0.9583333333333334,0.9375,0.7777777777777778,32,7,2,0,23,ckp/RNA_seed1000/best_RNA_seed1000_fold2_AUC_0.921.pth,all_samples,RNA_seed1000_fold2_common_samples,2025-10-13 01:52:06
146
+ RNA,1000,3,all_samples,0.9929423137127823,0.8946586240055949,0.869281045751634,0.953405017921147,0.9094017094017094,0.8681592039800995,0.6747967479674797,402,83,40,13,266,ckp/RNA_seed1000/best_RNA_seed1000_fold3_AUC_0.895.pth,all_samples,RNA_seed1000_fold3_all_samples,2025-10-13 01:52:06
147
+ RNA,1000,3,common_samples,0.9929423137127823,0.8796296296296297,0.8846153846153846,0.9583333333333334,0.9199999999999999,0.8787878787878788,0.6666666666666666,33,6,3,1,23,ckp/RNA_seed1000/best_RNA_seed1000_fold3_AUC_0.895.pth,all_samples,RNA_seed1000_fold3_common_samples,2025-10-13 01:52:06
148
+ RNA,1000,4,all_samples,0.9947234757824348,0.937951701040014,0.9228070175438596,0.942652329749104,0.9326241134751773,0.9052369077306733,0.819672131147541,401,100,22,16,263,ckp/RNA_seed1000/best_RNA_seed1000_fold4_AUC_0.938.pth,all_samples,RNA_seed1000_fold4_all_samples,2025-10-13 01:52:06
149
+ RNA,1000,4,common_samples,0.9947234757824348,0.7685185185185186,0.8571428571428571,1.0,0.923076923076923,0.8787878787878788,0.5555555555555556,33,5,4,0,24,ckp/RNA_seed1000/best_RNA_seed1000_fold4_AUC_0.938.pth,all_samples,RNA_seed1000_fold4_common_samples,2025-10-13 01:52:06
150
+ RNA,1000,5,all_samples,0.9877313503736319,0.9158881250367237,0.9078014184397163,0.9175627240143369,0.9126559714795008,0.8778054862842892,0.7868852459016393,401,96,26,23,256,ckp/RNA_seed1000/best_RNA_seed1000_fold5_AUC_0.916.pth,all_samples,RNA_seed1000_fold5_all_samples,2025-10-13 01:52:06
151
+ RNA,1000,5,common_samples,0.9877313503736319,0.925925925925926,0.8888888888888888,1.0,0.9411764705882353,0.9090909090909091,0.6666666666666666,33,6,3,0,24,ckp/RNA_seed1000/best_RNA_seed1000_fold5_AUC_0.916.pth,all_samples,RNA_seed1000_fold5_common_samples,2025-10-13 01:52:06
152
+ Multi,0,1,all_samples,0.9959861712864237,0.9038052721088435,0.9078947368421053,0.9387755102040817,0.923076923076923,0.8909952606635071,0.78125,422,100,28,18,276,ckp/Multi_seed0/multi_seed0_fold1.pth,all_samples,Multi_seed0_fold1_all_samples,2025-10-13 01:52:06
153
+ Multi,0,1,common_samples,0.9959861712864237,0.8802083333333334,0.9230769230769231,1.0,0.9600000000000001,0.9375,0.75,32,6,2,0,24,ckp/Multi_seed0/multi_seed0_fold1.pth,all_samples,Multi_seed0_fold1_common_samples,2025-10-13 01:52:06
154
+ Multi,0,2,all_samples,0.9977147657221472,0.919226393629124,0.9303135888501742,0.9112627986348123,0.9206896551724137,0.8909952606635071,0.8449612403100775,422,109,20,26,267,ckp/Multi_seed0/multi_seed0_fold2.pth,all_samples,Multi_seed0_fold2_all_samples,2025-10-13 01:52:06
155
+ Multi,0,2,common_samples,0.9977147657221472,0.8985507246376812,0.9166666666666666,0.9565217391304348,0.9361702127659574,0.90625,0.7777777777777778,32,7,2,1,22,ckp/Multi_seed0/multi_seed0_fold2.pth,all_samples,Multi_seed0_fold2_common_samples,2025-10-13 01:52:06
156
+ Multi,0,3,all_samples,0.9947706900404738,0.9427555321390937,0.9249146757679181,0.928082191780822,0.9264957264957264,0.8981042654028436,0.8307692307692308,422,108,22,21,271,ckp/Multi_seed0/multi_seed0_fold3.pth,all_samples,Multi_seed0_fold3_all_samples,2025-10-13 01:52:06
157
+ Multi,0,3,common_samples,0.9947706900404738,0.949074074074074,0.9583333333333334,0.9583333333333334,0.9583333333333334,0.9393939393939394,0.8888888888888888,33,8,1,1,23,ckp/Multi_seed0/multi_seed0_fold3.pth,all_samples,Multi_seed0_fold3_common_samples,2025-10-13 01:52:06
158
+ Multi,0,4,all_samples,0.9942133304230494,0.9401275233484139,0.93,0.9522184300341296,0.9409780775716695,0.9170616113744076,0.8372093023255814,422,108,21,14,279,ckp/Multi_seed0/multi_seed0_fold4.pth,all_samples,Multi_seed0_fold4_all_samples,2025-10-13 01:52:06
159
+ Multi,0,4,common_samples,0.9942133304230494,0.9490740740740742,0.8888888888888888,1.0,0.9411764705882353,0.9090909090909091,0.6666666666666666,33,6,3,0,24,ckp/Multi_seed0/multi_seed0_fold4.pth,all_samples,Multi_seed0_fold4_common_samples,2025-10-13 01:52:06
160
+ Multi,0,5,all_samples,0.9918892372410509,0.8986692065507845,0.871875,0.9522184300341296,0.9102773246329526,0.8696682464454977,0.6821705426356589,422,88,41,14,279,ckp/Multi_seed0/multi_seed0_fold5.pth,all_samples,Multi_seed0_fold5_all_samples,2025-10-13 01:52:06
161
+ Multi,0,5,common_samples,0.9918892372410509,0.8101851851851852,0.8846153846153846,0.9583333333333334,0.9199999999999999,0.8787878787878788,0.6666666666666666,33,6,3,1,23,ckp/Multi_seed0/multi_seed0_fold5.pth,all_samples,Multi_seed0_fold5_common_samples,2025-10-13 01:52:06
162
+ Multi,6,1,all_samples,0.979498089714853,0.9367559523809523,0.9188311688311688,0.9625850340136054,0.9401993355481727,0.9146919431279621,0.8046875,422,103,25,11,283,ckp/Multi_seed6/best_Multi_seed6_fold1_AUC_0.937.pth,all_samples,Multi_seed6_fold1_all_samples,2025-10-13 01:52:06
163
+ Multi,6,1,common_samples,0.979498089714853,0.84375,0.92,0.9583333333333334,0.9387755102040817,0.90625,0.75,32,6,2,1,23,ckp/Multi_seed6/best_Multi_seed6_fold1_AUC_0.937.pth,all_samples,Multi_seed6_fold1_common_samples,2025-10-13 01:52:06
164
+ Multi,6,2,all_samples,0.9946854247691616,0.9230362198058046,0.8814102564102564,0.9385665529010239,0.9090909090909091,0.8696682464454977,0.7131782945736435,422,92,37,18,275,ckp/Multi_seed6/best_Multi_seed6_fold2_AUC_0.923.pth,all_samples,Multi_seed6_fold2_all_samples,2025-10-13 01:52:06
165
+ Multi,6,2,common_samples,0.9946854247691616,0.9710144927536232,0.92,1.0,0.9583333333333334,0.9375,0.7777777777777778,32,7,2,0,23,ckp/Multi_seed6/best_Multi_seed6_fold2_AUC_0.923.pth,all_samples,Multi_seed6_fold2_common_samples,2025-10-13 01:52:06
166
+ Multi,6,3,all_samples,0.9967289913010371,0.9115121180189673,0.8706624605678234,0.9452054794520548,0.9064039408866994,0.8649289099526066,0.6846153846153846,422,89,41,16,276,ckp/Multi_seed6/best_Multi_seed6_fold3_AUC_0.912.pth,all_samples,Multi_seed6_fold3_all_samples,2025-10-13 01:52:06
167
+ Multi,6,3,common_samples,0.9967289913010371,0.9398148148148148,0.9230769230769231,1.0,0.9600000000000001,0.9393939393939394,0.7777777777777778,33,7,2,0,24,ckp/Multi_seed6/best_Multi_seed6_fold3_AUC_0.912.pth,all_samples,Multi_seed6_fold3_common_samples,2025-10-13 01:52:06
168
+ Multi,6,4,all_samples,0.9915419874593222,0.9445987776807683,0.9038461538461539,0.962457337883959,0.9322314049586776,0.9028436018957346,0.7674418604651163,422,99,30,11,282,ckp/Multi_seed6/best_Multi_seed6_fold4_AUC_0.945.pth,all_samples,Multi_seed6_fold4_all_samples,2025-10-13 01:52:06
169
+ Multi,6,4,common_samples,0.9915419874593222,0.875,0.8846153846153846,0.9583333333333334,0.9199999999999999,0.8787878787878788,0.6666666666666666,33,6,3,1,23,ckp/Multi_seed6/best_Multi_seed6_fold4_AUC_0.945.pth,all_samples,Multi_seed6_fold4_common_samples,2025-10-13 01:52:06
170
+ Multi,6,5,all_samples,0.9950029102838849,0.9193851363864857,0.9347826086956522,0.8805460750853242,0.9068541300527242,0.8744075829383886,0.8604651162790697,422,111,18,35,258,ckp/Multi_seed6/best_Multi_seed6_fold5_AUC_0.919.pth,all_samples,Multi_seed6_fold5_all_samples,2025-10-13 01:52:06
171
+ Multi,6,5,common_samples,0.9950029102838849,0.9537037037037037,0.9166666666666666,0.9166666666666666,0.9166666666666666,0.8787878787878788,0.7777777777777778,33,7,2,2,22,ckp/Multi_seed6/best_Multi_seed6_fold5_AUC_0.919.pth,all_samples,Multi_seed6_fold5_common_samples,2025-10-13 01:52:06
172
+ Multi,42,1,all_samples,0.9918781910351219,0.9233896683673469,0.9009584664536742,0.9591836734693877,0.9291598023064251,0.8981042654028436,0.7578125,422,97,31,12,282,ckp/Multi_seed42/best_Multi_seed42_fold1_AUC_0.923.pth,all_samples,Multi_seed42_fold1_all_samples,2025-10-13 01:52:06
173
+ Multi,42,1,common_samples,0.9918781910351219,0.9166666666666666,0.9230769230769231,1.0,0.9600000000000001,0.9375,0.75,32,6,2,0,24,ckp/Multi_seed42/best_Multi_seed42_fold1_AUC_0.923.pth,all_samples,Multi_seed42_fold1_common_samples,2025-10-13 01:52:06
174
+ Multi,42,2,all_samples,0.989594081540863,0.9052570309812948,0.9081632653061225,0.9112627986348123,0.909710391822828,0.8744075829383886,0.7906976744186046,422,102,27,26,267,ckp/Multi_seed42/best_Multi_seed42_fold2_AUC_0.905.pth,all_samples,Multi_seed42_fold2_all_samples,2025-10-13 01:52:06
175
+ Multi,42,2,common_samples,0.989594081540863,0.9227053140096619,0.9130434782608695,0.9130434782608695,0.9130434782608695,0.875,0.7777777777777778,32,7,2,2,21,ckp/Multi_seed42/best_Multi_seed42_fold2_AUC_0.905.pth,all_samples,Multi_seed42_fold2_common_samples,2025-10-13 01:52:06
176
+ Multi,42,3,all_samples,0.9926667163277298,0.9284246575342465,0.898360655737705,0.9383561643835616,0.9179229480737019,0.8838862559241706,0.7615384615384615,422,99,31,18,274,ckp/Multi_seed42/best_Multi_seed42_fold3_AUC_0.928.pth,all_samples,Multi_seed42_fold3_all_samples,2025-10-13 01:52:06
177
+ Multi,42,3,common_samples,0.9926667163277298,0.9212962962962963,0.8846153846153846,0.9583333333333334,0.9199999999999999,0.8787878787878788,0.6666666666666666,33,6,3,1,23,ckp/Multi_seed42/best_Multi_seed42_fold3_AUC_0.928.pth,all_samples,Multi_seed42_fold3_common_samples,2025-10-13 01:52:06
178
+ Multi,42,4,all_samples,0.9914667500066143,0.9289361589544144,0.9201388888888888,0.9044368600682594,0.9122203098106714,0.8791469194312796,0.8217054263565892,422,106,23,28,265,ckp/Multi_seed42/best_Multi_seed42_fold4_AUC_0.929.pth,all_samples,Multi_seed42_fold4_all_samples,2025-10-13 01:52:06
179
+ Multi,42,4,common_samples,0.9914667500066143,0.949074074074074,0.8888888888888888,1.0,0.9411764705882353,0.9090909090909091,0.6666666666666666,33,6,3,0,24,ckp/Multi_seed42/best_Multi_seed42_fold4_AUC_0.929.pth,all_samples,Multi_seed42_fold4_common_samples,2025-10-13 01:52:06
180
+ Multi,42,5,all_samples,0.9991550255311269,0.9331428420244993,0.9105960264900662,0.9385665529010239,0.9243697478991597,0.8933649289099526,0.7906976744186046,422,102,27,18,275,ckp/Multi_seed42/best_Multi_seed42_fold5_AUC_0.933.pth,all_samples,Multi_seed42_fold5_all_samples,2025-10-13 01:52:06
181
+ Multi,42,5,common_samples,0.9991550255311269,0.9722222222222222,0.8888888888888888,1.0,0.9411764705882353,0.9090909090909091,0.6666666666666666,33,6,3,0,24,ckp/Multi_seed42/best_Multi_seed42_fold5_AUC_0.933.pth,all_samples,Multi_seed42_fold5_common_samples,2025-10-13 01:52:06
182
+ Multi,123,1,all_samples,0.9849753967165891,0.9181813350340136,0.8711656441717791,0.9659863945578231,0.9161290322580644,0.8767772511848341,0.671875,422,86,42,10,284,ckp/Multi_seed123/best_Multi_seed123_fold1_AUC_0.918.pth,all_samples,Multi_seed123_fold1_all_samples,2025-10-13 01:52:06
183
+ Multi,123,1,common_samples,0.9849753967165891,0.75,0.875,0.875,0.875,0.8125,0.625,32,5,3,3,21,ckp/Multi_seed123/best_Multi_seed123_fold1_AUC_0.918.pth,all_samples,Multi_seed123_fold1_common_samples,2025-10-13 01:52:06
184
+ Multi,123,2,all_samples,0.9950343281212795,0.9552080852977749,0.9096774193548387,0.962457337883959,0.9353233830845771,0.9075829383886256,0.7829457364341085,422,101,28,11,282,ckp/Multi_seed123/best_Multi_seed123_fold2_AUC_0.955.pth,all_samples,Multi_seed123_fold2_all_samples,2025-10-13 01:52:06
185
+ Multi,123,2,common_samples,0.9950343281212795,0.9903381642512077,0.92,1.0,0.9583333333333334,0.9375,0.7777777777777778,32,7,2,0,23,ckp/Multi_seed123/best_Multi_seed123_fold2_AUC_0.955.pth,all_samples,Multi_seed123_fold2_common_samples,2025-10-13 01:52:06
186
+ Multi,123,3,all_samples,0.9933106547811189,0.9123551106427819,0.9,0.9246575342465754,0.9121621621621622,0.8767772511848341,0.7692307692307693,422,100,30,22,270,ckp/Multi_seed123/best_Multi_seed123_fold3_AUC_0.912.pth,all_samples,Multi_seed123_fold3_all_samples,2025-10-13 01:52:06
187
+ Multi,123,3,common_samples,0.9933106547811189,0.8611111111111112,0.92,0.9583333333333334,0.9387755102040817,0.9090909090909091,0.7777777777777778,33,7,2,1,23,ckp/Multi_seed123/best_Multi_seed123_fold3_AUC_0.912.pth,all_samples,Multi_seed123_fold3_common_samples,2025-10-13 01:52:06
188
+ Multi,123,4,all_samples,0.9793932058099849,0.9063682302828268,0.8892405063291139,0.9590443686006825,0.922824302134647,0.8886255924170616,0.7286821705426356,422,94,35,12,281,ckp/Multi_seed123/best_Multi_seed123_fold4_AUC_0.906.pth,all_samples,Multi_seed123_fold4_all_samples,2025-10-13 01:52:06
189
+ Multi,123,4,common_samples,0.9793932058099849,0.8935185185185185,0.8275862068965517,1.0,0.9056603773584906,0.8484848484848485,0.4444444444444444,33,4,5,0,24,ckp/Multi_seed123/best_Multi_seed123_fold4_AUC_0.906.pth,all_samples,Multi_seed123_fold4_common_samples,2025-10-13 01:52:06
190
+ Multi,123,5,all_samples,0.9993914860967801,0.9235918194565705,0.9161073825503355,0.931740614334471,0.9238578680203046,0.8933649289099526,0.8062015503875969,422,104,25,20,273,ckp/Multi_seed123/best_Multi_seed123_fold5_AUC_0.924.pth,all_samples,Multi_seed123_fold5_all_samples,2025-10-13 01:52:06
191
+ Multi,123,5,common_samples,0.9993914860967801,0.9537037037037036,0.9230769230769231,1.0,0.9600000000000001,0.9393939393939394,0.7777777777777778,33,7,2,0,24,ckp/Multi_seed123/best_Multi_seed123_fold5_AUC_0.924.pth,all_samples,Multi_seed123_fold5_common_samples,2025-10-13 01:52:06
192
+ Multi,1000,1,all_samples,0.9992913857950106,0.9387223639455782,0.9254237288135593,0.9285714285714286,0.9269949066213922,0.8981042654028436,0.828125,422,106,22,21,273,ckp/Multi_seed1000/best_Multi_seed1000_fold1_AUC_0.939.pth,all_samples,Multi_seed1000_fold1_all_samples,2025-10-13 01:52:06
193
+ Multi,1000,1,common_samples,0.9992913857950106,0.875,0.92,0.9583333333333334,0.9387755102040817,0.90625,0.75,32,6,2,1,23,ckp/Multi_seed1000/best_Multi_seed1000_fold1_AUC_0.939.pth,all_samples,Multi_seed1000_fold1_common_samples,2025-10-13 01:52:06
194
+ Multi,1000,2,all_samples,0.9862505622139324,0.9068444585549118,0.871875,0.9522184300341296,0.9102773246329526,0.8696682464454977,0.6821705426356589,422,88,41,14,279,ckp/Multi_seed1000/best_Multi_seed1000_fold2_AUC_0.907.pth,all_samples,Multi_seed1000_fold2_all_samples,2025-10-13 01:52:06
195
+ Multi,1000,2,common_samples,0.9862505622139324,0.9855072463768116,0.9166666666666666,0.9565217391304348,0.9361702127659574,0.90625,0.7777777777777778,32,7,2,1,22,ckp/Multi_seed1000/best_Multi_seed1000_fold2_AUC_0.907.pth,all_samples,Multi_seed1000_fold2_common_samples,2025-10-13 01:52:06
196
+ Multi,1000,3,all_samples,0.9946299836946175,0.8979847207586934,0.8996655518394648,0.9212328767123288,0.910321489001692,0.8744075829383886,0.7692307692307693,422,100,30,23,269,ckp/Multi_seed1000/best_Multi_seed1000_fold3_AUC_0.898.pth,all_samples,Multi_seed1000_fold3_all_samples,2025-10-13 01:52:06
197
+ Multi,1000,3,common_samples,0.9946299836946175,0.9351851851851851,0.92,0.9583333333333334,0.9387755102040817,0.9090909090909091,0.7777777777777778,33,7,2,1,23,ckp/Multi_seed1000/best_Multi_seed1000_fold3_AUC_0.898.pth,all_samples,Multi_seed1000_fold3_common_samples,2025-10-13 01:52:06
198
+ Multi,1000,4,all_samples,0.9942736857422546,0.9377463819879884,0.9261744966442953,0.9419795221843004,0.934010152284264,0.9075829383886256,0.8294573643410853,422,107,22,17,276,ckp/Multi_seed1000/best_Multi_seed1000_fold4_AUC_0.938.pth,all_samples,Multi_seed1000_fold4_all_samples,2025-10-13 01:52:06
199
+ Multi,1000,4,common_samples,0.9942736857422546,0.962962962962963,0.9230769230769231,1.0,0.9600000000000001,0.9393939393939394,0.7777777777777778,33,7,2,0,24,ckp/Multi_seed1000/best_Multi_seed1000_fold4_AUC_0.938.pth,all_samples,Multi_seed1000_fold4_common_samples,2025-10-13 01:52:06
200
+ Multi,1000,5,all_samples,0.997084755403868,0.9168187951424714,0.9090909090909091,0.9215017064846417,0.9152542372881356,0.8815165876777251,0.7906976744186046,422,102,27,23,270,ckp/Multi_seed1000/best_Multi_seed1000_fold5_AUC_0.917.pth,all_samples,Multi_seed1000_fold5_all_samples,2025-10-13 01:52:06
201
+ Multi,1000,5,common_samples,0.997084755403868,0.9398148148148149,0.9230769230769231,1.0,0.9600000000000001,0.9393939393939394,0.7777777777777778,33,7,2,0,24,ckp/Multi_seed1000/best_Multi_seed1000_fold5_AUC_0.917.pth,all_samples,Multi_seed1000_fold5_common_samples,2025-10-13 01:52:06
analysis/re_all_48.tsv ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Category Term Count % PValue Genes List Total Pop Hits Pop Total Fold Enrichment Bonferroni Benjamini FDR
2
+ GOTERM_CC_DIRECT GO:0005615~extracellular space 16 34.04255319148936 4.663616024540596E-8 "FGB, SERPINB11, IGFBP4, IGFBP2, APOA1, NDNF, MSLN, KRT18, S100A16, S100A13, TIMP3, DPEP1, APOE, ANGPTL4, APOB, RAMP1" 47 1809 29722 5.593215953330275 6.0626825979515075E-6 6.062700831902774E-6 5.269886107730873E-6
3
+ GOTERM_BP_DIRECT GO:0071402~cellular response to lipoprotein particle stimulus 3 6.382978723404255 1.3431633526269892E-5 "APOA1, APOE, APOB" 46 4 29712 484.4347826086956 0.006680019 0.003278001 0.003231925
4
+ GOTERM_BP_DIRECT GO:0070328~triglyceride homeostasis 4 8.510638298 1.7157082277104606E-5 "HNF4A, APOA1, ANGPTL4, APOE" 46 33 29712 78.29249011857706 0.008524913 0.003278001 0.003231925
5
+ GOTERM_BP_DIRECT GO:0042632~cholesterol homeostasis 5 10.638297872340425 2.2737647712678613E-5 "HNF4A, EPHX2, APOA1, APOE, APOB" 46 109 29712 29.629038691663343 0.011282089659063299 0.003278001 0.003231925
6
+ GOTERM_BP_DIRECT GO:0006629~lipid metabolic process 6 12.76595744680851 2.632932918372177E-5 "HNF4A, MGST2, DPEP1, ANGPTL4, APOE, APOB" 46 232 29712 16.70464767616192 0.013052574785678228 0.003278001 0.003231925
7
+ GOTERM_BP_DIRECT GO:0042158~lipoprotein biosynthetic process 3 6.382978723404255 3.3514330785070164E-5 "APOA1, APOE, APOB" 46 6 29712 322.9565217391304 0.016584862706917503 0.003338027 0.003291107
8
+ REACTOME_PATHWAY R-MMU-381426~Regulation of IGF transport and uptake by IGFBPs 6 12.76595744680851 4.499995756510954E-5 "IGFBP4, IGFBP2, APOA1, APOE, APOB, MSLN" 32 121 9277 14.37551652892562 0.006682715 0.006749994 0.006164994
9
+ GOTERM_CC_DIRECT GO:0034363~intermediate-density lipoprotein particle 3 6.382978723404255 4.8967094457451444E-5 "APOA1, APOE, APOB" 47 7 29722 271.02127659574467 0.006345659 0.003182861 0.002766641
10
+ REACTOME_PATHWAY R-MMU-8963899~Plasma lipoprotein remodeling 4 8.510638298 9.360819206260126E-5 "APOA1, ANGPTL4, APOE, APOB" 32 27 9277 42.949074074074076 0.013851447007621642 0.007020614 0.006412161
11
+ GOTERM_CC_DIRECT GO:0016324~apical plasma membrane 7 14.893617021276595 9.972581619651291E-5 "UPK1B, IGFBP2, DPEP1, CD9, SORBS2, ATP1B1, EZR" 47 486 29722 9.108396812888538 0.012881318986070145 0.004321452 0.003756339
12
+ KEGG_PATHWAY mmu04979:Cholesterol metabolism 4 8.510638298 1.6730513704172898E-4 "APOA1, ANGPTL4, APOE, APOB" 22 50 9565 34.78181818181818 0.016592712395294584 0.016730514 0.016563208567131168
13
+ GOTERM_CC_DIRECT GO:0042627~chylomicron 3 6.382978723404255 1.8080415559233024E-4 "APOA1, APOE, APOB" 47 13 29722 145.9345335515548 0.023232536 0.005876135 0.005107717
14
+ GOTERM_CC_DIRECT GO:0034362~low-density lipoprotein particle 3 6.382978723404255 2.4291054284262555E-4 "APOA1, APOE, APOB" 47 15 29722 126.47659574468085 0.031088698 0.006315674 0.005489778
15
+ REACTOME_PATHWAY R-MMU-975634~Retinoid metabolism and transport 4 8.510638298 3.5513433187466735E-4 "RBP1, APOA1, APOE, APOB" 32 42 9277 27.610119047619047 0.0515483 0.010937765361186881 0.009989826
16
+ GOTERM_CC_DIRECT GO:0005902~microvillus 4 8.510638298 4.213662153132071E-4 "FOXA1, STARD10, CALML4, EZR" 47 94 29722 26.909913988229967 0.053315266 0.009129601 0.00793573
17
+ REACTOME_PATHWAY R-MMU-6806667~Metabolism of fat-soluble vitamins 4 8.510638298 4.6536661052986125E-4 "RBP1, APOA1, APOE, APOB" 32 46 9277 25.209239130434785 0.067005305 0.010937765361186881 0.009989826
18
+ REACTOME_PATHWAY R-MMU-3000480~Scavenging by Class A Receptors 3 6.382978723404255 4.782798164994853E-4 "APOA1, APOE, APOB" 32 10 9277 86.971875 0.06879957 0.010937765361186881 0.009989826
19
+ REACTOME_PATHWAY R-MMU-8957275~Post-translational protein phosphorylation 5 10.638297872340425 5.445284931620396E-4 "IGFBP4, APOA1, APOE, APOB, MSLN" 32 115 9277 12.60461956521739 0.077950937 0.010937765361186881 0.009989826
20
+ REACTOME_PATHWAY R-MMU-8963888~Chylomicron assembly 3 6.382978723404255 5.83347485929967E-4 "APOA1, APOE, APOB" 32 11 9277 79.06534091 0.083271695 0.010937765361186881 0.009989826
21
+ REACTOME_PATHWAY R-MMU-8963901~Chylomicron remodeling 3 6.382978723404255 5.83347485929967E-4 "APOA1, APOE, APOB" 32 11 9277 79.06534091 0.083271695 0.010937765361186881 0.009989826
22
+ GOTERM_MF_DIRECT GO:0120020~cholesterol transfer activity 3 6.382978723404255 6.391569962462111E-4 "APOA1, APOE, APOB" 46 24 28924 78.59782608695652 0.10356616276729458 0.064222692 0.060091407
23
+ GOTERM_BP_DIRECT GO:0006869~lipid transport 4 8.510638298 7.119332909555852E-4 "STARD10, APOA1, APOE, APOB" 46 115 29712 22.466540642722116 0.29909381494470433 0.059090463 0.058259874309865387
24
+ GOTERM_MF_DIRECT GO:0005319~lipid transporter activity 3 6.382978723404255 7.511425919989355E-4 "APOA1, APOE, APOB" 46 26 28924 72.55183946488295 0.12058085135426433 0.064222692 0.060091407
25
+ GOTERM_CC_DIRECT GO:0034361~very-low-density lipoprotein particle 3 6.382978723404255 8.02467227887457E-4 "APOA1, APOE, APOB" 47 27 29722 70.26477541371158 0.099101445 0.014287577651110425 0.012419202112119061
26
+ GOTERM_CC_DIRECT GO:0071944~cell periphery 4 8.510638298 8.792355477606416E-4 "KRT19, KRT18, KRT8, EZR" 47 121 29722 20.905222437137333 0.10805508185429236 0.014287577651110425 0.012419202112119061
27
+ GOTERM_CC_DIRECT GO:0034364~high-density lipoprotein particle 3 6.382978723404255 9.915764026191033E-4 "APOA1, APOE, APOB" 47 30 29722 63.238297872340425 0.12099868401909286 0.014322770260053715 0.012449792610662074
28
+ GOTERM_BP_DIRECT GO:0042157~lipoprotein metabolic process 3 6.382978723404255 0.001014208 "APOA1, APOE, APOB" 46 31 29712 62.507713884992995 0.3973066691945585 0.071532922 0.070527439
29
+ GOTERM_BP_DIRECT GO:0033344~cholesterol efflux 3 6.382978723404255 0.001220065 "APOA1, APOE, APOB" 46 34 29712 56.99232737 0.4562056575077009 0.071532922 0.070527439
30
+ GOTERM_BP_DIRECT GO:2000352~negative regulation of endothelial cell apoptotic process 3 6.382978723404255 0.001292764 "FGB, NDNF, ANGPTL4" 46 35 29712 55.36397516 0.475603075 0.071532922 0.070527439
31
+ REACTOME_PATHWAY "R-MMU-174824~Plasma lipoprotein assembly, remodeling, and clearance" 4 8.510638298 0.001340879 "APOA1, ANGPTL4, APOE, APOB" 32 66 9277 17.570075757575758 0.18120790705085665 0.022347988 0.020411162549310708
32
+ GOTERM_CC_DIRECT GO:0042383~sarcolemma 4 8.510638298 0.001509937 "KRT19, KRT8, ANXA8, ATP1B1" 47 146 29722 17.325561060915184 0.17834954313428197 0.018671911 0.016230199410954965
33
+ GOTERM_CC_DIRECT GO:0005856~cytoskeleton 6 12.76595744680851 0.00158742 "ACTA1, KRT19, KRT18, SORBS2, FRMD4B, EZR" 47 552 29722 6.873728029602219 0.186596997 0.018671911 0.016230199410954965
34
+ GOTERM_CC_DIRECT GO:0005829~cytosol 16 34.04255319148936 0.001723561 "BEX4, GSS, EPHX2, STARD10, APOA1, KRT18, S100A16, HNF4A, RBP1, S100A13, PIR, ANXA8, APOE, 2200002D01RIK, APOB, EZR" 47 4363 29722 2.319075787204783 0.2008896600123118 0.018671911 0.016230199410954965
35
+ GOTERM_MF_DIRECT GO:0042803~protein homodimerization activity 7 14.893617021276595 0.001762493 "S100A16, HNF4A, GSS, EPHX2, S100A13, APOA1, APOE" 46 834 28924 5.277551871546241 0.26040470565960694 0.10046207797952855 0.093999605
36
+ REACTOME_PATHWAY R-MMU-8963898~Plasma lipoprotein assembly 3 6.382978723404255 0.001783723 "APOA1, APOE, APOB" 32 19 9277 45.77467105263158 0.23357015038480022 0.026755851 0.024437010768742114
37
+ GOTERM_BP_DIRECT GO:0055088~lipid homeostasis 3 6.382978723404255 0.003279104 "HNF4A, RBP1, APOE" 46 56 29712 34.60248447204969 0.8058184128002348 0.14845398213697608 0.14636727957681778
38
+ GOTERM_BP_DIRECT GO:0031623~receptor internalization 3 6.382978723404255 0.003279104 "CD9, EZR, RAMP1" 46 56 29712 34.60248447204969 0.8058184128002348 0.14845398213697608 0.14636727957681778
39
+ REACTOME_PATHWAY R-MMU-2187338~Visual phototransduction 4 8.510638298 0.003364668 "RBP1, APOA1, APOE, APOB" 32 91 9277 12.743131868131869 0.3947906556263795 0.045881838 0.041905411759735486
40
+ GOTERM_MF_DIRECT GO:0005102~signaling receptor binding 5 10.638297872340425 0.00384581 "FGB, BEX1, HNF4A, APOA1, APOE" 46 413 28924 7.612380251 0.4825805306247958 0.16440839053758885 0.15383241219891353
41
+ KEGG_PATHWAY mmu05418:Fluid shear stress and atherosclerosis 4 8.510638298 0.003935318 "GSTA4, DUSP1, MGST2, CALML4" 22 148 9565 11.75061425061425 0.32585374062731987 0.19676590780176212 0.1947982487237445
42
+ GOTERM_CC_DIRECT GO:0034365~discoidal high-density lipoprotein particle 2 4.255319149 0.004635999 "APOA1, APOE" 47 3 29722 421.58865248226954 0.4534236118352485 0.046359989 0.040297529
43
+ GOTERM_MF_DIRECT GO:0008289~lipid binding 4 8.510638298 0.005375197 "STARD10, S100A13, APOA1, APOE" 46 228 28924 11.031273836765827 0.6021331379269204 0.1838317259373572 0.17200629327472017
44
+ REACTOME_PATHWAY R-MMU-8964043~Plasma lipoprotein clearance 3 6.382978723404255 0.006343615 "APOA1, APOE, APOB" 32 36 9277 24.158854166666668 0.6125658275230705 0.079295184 0.072422935
45
+ GOTERM_BP_DIRECT GO:0055090~acylglycerol homeostasis 2 4.255319149 0.007550301 "APOA1, APOE" 46 5 29712 258.3652173913043 0.9772205273239976 0.26898569617640505 0.2652047727361745
46
+ GOTERM_BP_DIRECT GO:1902995~positive regulation of phospholipid efflux 2 4.255319149 0.007550301 "APOA1, APOE" 46 5 29712 258.3652173913043 0.9772205273239976 0.26898569617640505 0.2652047727361745
47
+ GOTERM_BP_DIRECT GO:0007596~blood coagulation 3 6.382978723404255 0.007561847 "HNF4A, ANXA8, ANGPTL4" 46 86 29712 22.53185035389282 0.9773523846967987 0.26898569617640505 0.2652047727361745
48
+ GOTERM_MF_DIRECT GO:0060228~phosphatidylcholine-sterol O-acyltransferase activator activity 2 4.255319149 0.007755374 "APOA1, APOE" 46 5 28924 251.51304347826084 0.7358764214212566 0.19401925415601676 0.18153848342083442
49
+ GOTERM_MF_DIRECT GO:0005515~protein binding 17 36.17021276595745 0.007942309 "BEX1, IGFBP4, TMEM176A, KRT8, APOA1, SORBS2, FRMD4B, MSLN, ATP1B1, PLAC8, ACTA1, KRT19, KRT18, HNF4A, CD9, APOE, EZR" 46 5596 28924 1.9101687540790004 0.7442505105038177 0.19401925415601676 0.18153848342083442
50
+ REACTOME_PATHWAY R-MMU-114608~Platelet degranulation 4 8.510638298 0.008332442 "FGB, TIMP3, APOA1, CD9" 32 126 9277 9.203373015873016 0.7125583342810163 0.096143567 0.087811124
51
+ GOTERM_BP_DIRECT GO:0060706~cell differentiation involved in embryonic placenta development 2 4.255319149 0.009053658 "KRT19, KRT8" 46 6 29712 215.30434782608694 0.9893089785197537 0.281795117 0.27783414098302806
52
+ GOTERM_BP_DIRECT GO:0034380~high-density lipoprotein particle assembly 2 4.255319149 0.009053658 "APOA1, APOE" 46 6 29712 215.30434782608694 0.9893089785197537 0.281795117 0.27783414098302806
53
+ GOTERM_CC_DIRECT GO:0009986~cell surface 6 12.76595744680851 0.009063279 "FGB, APOA1, CD9, APOE, MSLN, RAMP1" 47 833 29722 4.554979438583944 0.6938248720271083 0.084159018 0.073153608
54
+ REACTOME_PATHWAY R-MMU-76005~Response to elevated platelet cytosolic Ca2+ 4 8.510638298 0.009268231 "FGB, TIMP3, APOA1, CD9" 32 131 9277 8.852099236641221 0.7502776882727377 0.09930247 0.090696256
55
+ GOTERM_BP_DIRECT GO:0008203~cholesterol metabolic process 3 6.382978723404255 0.010299088947230067 "APOA1, APOE, APOB" 46 101 29712 19.185535944898838 0.9942920382543627 0.29201582770897955 0.28791118756045975
56
+ GOTERM_BP_DIRECT GO:0042159~lipoprotein catabolic process 2 4.255319149 0.010554789 "APOE, APOB" 46 7 29712 184.54658385093165 0.9949825428698913 0.29201582770897955 0.28791118756045975
57
+ GOTERM_CC_DIRECT GO:0005882~intermediate filament 3 6.382978723404255 0.010736695583574613 "KRT19, KRT18, KRT8" 47 101 29722 18.783652833368443 0.7542197600415235 0.093051362 0.080883107
58
+ KEGG_PATHWAY mmu00480:Glutathione metabolism 3 6.382978723404255 0.010985222690159977 "GSTA4, GSS, MGST2" 22 73 9565 17.867372353673723 0.6686567991786101 0.2892295889407606 0.286337293
59
+ KEGG_PATHWAY mmu04971:Gastric acid secretion 3 6.382978723404255 0.011569183557630425 "CALML4, ATP1B1, EZR" 22 75 9565 17.39090909090909 0.6876599324114097 0.2892295889407606 0.286337293
60
+ GOTERM_BP_DIRECT GO:0010628~positive regulation of gene expression 5 10.638297872340425 0.011780649958481128 "ACTA1, HNF4A, EPHX2, APOB, EZR" 46 586 29712 5.511203442647277 0.9972971411699633 0.29339524474205136 0.2892712151974844
61
+ GOTERM_BP_DIRECT GO:0032489~regulation of Cdc42 protein signal transduction 2 4.255319149 0.012053696023619767 "APOA1, APOE" 46 8 29712 161.47826086956522 0.9976452919411164 0.29339524474205136 0.2892712151974844
62
+ GOTERM_MF_DIRECT GO:0031995~insulin-like growth factor II binding 2 4.255319149 0.012380337288475646 "IGFBP4, IGFBP2" 46 8 28924 157.19565217391303 0.8811926104424386 0.2234078151622103 0.20903655219855938
63
+ REACTOME_PATHWAY R-MMU-3000471~Scavenging by Class B Receptors 2 4.255319149 0.013301681639626462 "APOA1, APOB" 32 4 9277 144.953125 0.8640203820754289 0.13301681639626461 0.12148869230858834
64
+ GOTERM_BP_DIRECT GO:1905920~positive regulation of CoA-transferase activity 2 4.255319149 0.013550383 "APOA1, APOE" 46 9 29712 143.53623188405797 0.9988949564325077 0.29339524474205136 0.2892712151974844
65
+ GOTERM_BP_DIRECT GO:1903753~negative regulation of p38MAPK cascade 2 4.255319149 0.013550383 "DUSP1, EZR" 46 9 29712 143.53623188405797 0.9988949564325077 0.29339524474205136 0.2892712151974844
66
+ GOTERM_BP_DIRECT GO:0043567~regulation of insulin-like growth factor receptor signaling pathway 2 4.255319149 0.013550383 "IGFBP4, IGFBP2" 46 9 29712 143.53623188405797 0.9988949564325077 0.29339524474205136 0.2892712151974844
67
+ GOTERM_MF_DIRECT GO:0071813~lipoprotein particle binding 2 4.255319149 0.013917302457922998 "APOA1, APOE" 46 9 28924 139.7294685990338 0.9089707687621291 0.2234078151622103 0.20903655219855938
68
+ GOTERM_MF_DIRECT GO:0042802~identical protein binding 9 19.148936170212767 0.014445321808808755 "S100A16, GSS, MGST2, APOA1, SORBS2, ANGPTL4, APOE, EZR, IFI27L2B" 46 2094 28924 2.7025040488351815 0.9169377358336714 0.2234078151622103 0.20903655219855938
69
+ GOTERM_MF_DIRECT GO:0019904~protein domain specific binding 4 8.510638298 0.015141379454204905 "FOXA1, HNF4A, SORBS2, EZR" 46 334 28924 7.530330643061703 0.9263902562792856 0.2234078151622103 0.20903655219855938
70
+ GOTERM_CC_DIRECT GO:0044297~cell body 3 6.382978723404255 0.015853359283249914 "ACTA1, RBP1, EZR" 47 124 29722 15.299588194921068 0.8747505373027604 0.12880854417640555 0.11196434993795251
71
+ KEGG_PATHWAY mmu03320:PPAR signaling pathway 3 6.382978723404255 0.016028100085975338 "ACSL1, APOA1, ANGPTL4" 22 89 9565 14.655260469867212 0.8012671728700713 0.3205620017195068 0.31735638170231173
72
+ GOTERM_MF_DIRECT GO:0005543~phospholipid binding 3 6.382978723404255 0.016248545272126637 "APOA1, APOE, APOB" 46 125 28924 15.090782608695651 0.93927028 0.2234078151622103 0.20903655219855938
73
+ GOTERM_BP_DIRECT GO:0006750~glutathione biosynthetic process 2 4.255319149 0.016537108 "GSS, MGST2" 46 11 29712 117.43873517786561 0.9997566502791231 0.3232287937051377 0.3186854170867924
74
+ GOTERM_BP_DIRECT GO:0090205~positive regulation of cholesterol metabolic process 2 4.255319149 0.016537108 "APOA1, APOE" 46 11 29712 117.43873517786561 0.9997566502791231 0.3232287937051377 0.3186854170867924
75
+ REACTOME_PATHWAY R-MMU-8964026~Chylomicron clearance 2 4.255319149 0.016600252530351765 "APOE, APOB" 32 5 9277 115.96249999999999 0.9174373218398605 0.1556273674720478 0.142139662
76
+ GOTERM_BP_DIRECT GO:0007219~Notch signaling pathway 3 6.382978723404255 0.016875399 "FOXA1, KRT19, SORBS2" 46 131 29712 14.791901759044142 0.9997950380142201 0.3232287937051377 0.3186854170867924
77
+ GOTERM_CC_DIRECT GO:1903561~extracellular vesicle 2 4.255319149 0.016896115321687888 "APOA1, APOE" 47 11 29722 114.97872340425532 0.8908751686921086 0.12920558775408386 0.11230947243239597
78
+ GOTERM_MF_DIRECT GO:0005520~insulin-like growth factor binding 2 4.255319149 0.01698422 "IGFBP4, IGFBP2" 46 11 28924 114.32411067193675 0.9465627982585717 0.2234078151622103 0.20903655219855938
79
+ GOTERM_BP_DIRECT GO:0043066~negative regulation of apoptotic process 5 10.638297872340425 0.018037932391729884 "PLAC8, KRT18, DUSP1, DPEP1, ANGPTL4" 46 666 29712 4.849197023110066 0.9998864306543682 0.3326996418919067 0.3280231409014583
80
+ GOTERM_CC_DIRECT GO:0005576~extracellular region 8 17.02127659574468 0.018787582756394895 "IGFBP4, IGFBP2, TIMP3, APOA1, NDNF, ANGPTL4, APOE, APOB" 47 1780 29722 2.842170690891704 0.9150438890190896 0.13568809768507425 0.11794426952625683
81
+ GOTERM_BP_DIRECT GO:0034374~low-density lipoprotein particle remodeling 2 4.255319149 0.019514991027873532 "APOE, APOB" 46 13 29712 99.37123745819397 0.9999464156436694 0.3470880547100364 0.342209307
82
+ GOTERM_MF_DIRECT GO:0031994~insulin-like growth factor I binding 2 4.255319149 0.02004181 "IGFBP4, IGFBP2" 46 13 28924 96.73578595317726 0.9686317330025632 0.24479639195869982 0.22904925563387118
83
+ GOTERM_MF_DIRECT GO:0044877~protein-containing complex binding 5 10.638297872340425 0.021788815279992987 "KRT19, HNF4A, KRT8, APOE, EZR" 46 687 28924 4.576292639706347 0.9768804842486836 0.24839249419192005 0.23241402965325852
84
+ GOTERM_BP_DIRECT GO:0033700~phospholipid efflux 2 4.255319149 0.022484057 "APOA1, APOE" 46 15 29712 86.12173913043478 0.9999882022037042 0.386105525 0.38067833891367864
85
+ REACTOME_PATHWAY R-MMU-9709957~Sensory Perception 4 8.510638298 0.022959227 "RBP1, APOA1, APOE, APOB" 32 184 9277 6.302309782608696 0.9685956035032179 0.1967871190120001 0.1797322353642934
86
+ GOTERM_CC_DIRECT GO:0030018~Z disc 3 6.382978723404255 0.023215134 "KRT19, KRT8, SORBS2" 47 152 29722 12.48124300111982 0.9528089266310974 0.15884039360273036 0.13806895751621948
87
+ REACTOME_PATHWAY R-MMU-196854~Metabolism of vitamins and cofactors 4 8.510638298 0.023614454 "RBP1, APOA1, APOE, APOB" 32 186 9277 6.234543010752688 0.9715828910914607 0.1967871190120001 0.1797322353642934
88
+ GOTERM_CC_DIRECT GO:0016010~dystrophin-associated glycoprotein complex 2 4.255319149 0.024483538975750146 "KRT19, KRT8" 47 16 29722 79.04787234042553 0.960143581 0.15914300334237597 0.13833199521298833
89
+ GOTERM_BP_DIRECT GO:0090181~regulation of cholesterol metabolic process 2 4.255319149 0.025444330723180475 "EPHX2, APOE" 46 17 29712 75.98976982097187 0.9999974027158138 0.41397828215724164 0.40815931031969005
90
+ GOTERM_MF_DIRECT GO:0015643~toxic substance binding 2 4.255319149 0.026129118460428674 "GSTA4, EPHX2" 46 17 28924 73.97442455242967 0.9891922296548761 0.27925495354583146 0.26129118460428674
91
+ GOTERM_BP_DIRECT GO:0034375~high-density lipoprotein particle remodeling 2 4.255319149 0.026921178849089584 "APOA1, APOE" 46 18 29712 71.76811594202898 0.9999987813972694 0.41397828215724164 0.40815931031969005
92
+ GOTERM_BP_DIRECT GO:0055091~phospholipid homeostasis 2 4.255319149 0.026921178849089584 "HNF4A, APOA1" 46 18 29712 71.76811594202898 0.9999987813972694 0.41397828215724164 0.40815931031969005
93
+ GOTERM_CC_DIRECT GO:0005737~cytoplasm 17 36.17021276595745 0.027089245251105277 "BEX4, SERPINB11, BEX1, DUSP1, KRT8, FRMD4B, CALML4, SPINT2, ACTA1, KRT18, S100A16, HNF4A, S100A13, PIR, ANXA8, APOB, EZR" 47 6408 29722 1.6776701994846868 0.9718492126874074 0.16769532774493742 0.14576593873213792
94
+ GOTERM_BP_DIRECT GO:0072659~protein localization to plasma membrane 3 6.382978723404255 0.027432296 "ATP1B1, EZR, RAMP1" 46 170 29712 11.398465473145778 0.9999990624333748 0.41397828215724164 0.40815931031969005
95
+ GOTERM_BP_DIRECT GO:0043691~reverse cholesterol transport 2 4.255319149 0.029868313177000345 "APOA1, APOE" 46 20 29712 64.59130434782608 0.9999997317657274 0.4374829400631227 0.43133358146785794
96
+ GOTERM_CC_DIRECT GO:0043034~costamere 2 4.255319149 0.030512205131596002 "KRT19, KRT8" 47 20 29722 63.238297872340425 0.9821965167408635 0.18029939395943093 0.15672178090319766
97
+ GOTERM_MF_DIRECT GO:0043295~glutathione binding 2 4.255319149 0.030670321 "GSS, MGST2" 46 20 28924 62.87826086956521 0.9951400898727204 0.30100541018029503 0.28164248905758604
98
+ GOTERM_BP_DIRECT GO:0097284~hepatocyte apoptotic process 2 4.255319149 0.031338606 "KRT18, KRT8" 46 21 29712 61.51552795031056 0.999999874 0.4459035898618767 0.43963586871923993
99
+ GOTERM_MF_DIRECT GO:0008201~heparin binding 3 6.382978723404255 0.031684780018978426 "NDNF, APOE, APOB" 46 179 28924 10.538256011658975 0.9959368239172737 0.30100541018029503 0.28164248905758604
100
+ REACTOME_PATHWAY R-MMU-8964058~HDL remodeling 2 4.255319149 0.032933683 "APOA1, APOE" 32 10 9277 57.981249999999996 0.9931925837731957 0.2576843899493848 0.23535174282043808
101
+ KEGG_PATHWAY mmu04915:Estrogen signaling pathway 3 6.382978723404255 0.034381445 "KRT19, KRT18, CALML4" 22 134 9565 9.733717775 0.9697615004494204 0.5730240852231167 0.5672938443708855
102
+ GOTERM_CC_DIRECT GO:0005886~plasma membrane 16 34.04255319148936 0.03515941 "MGST2, SORBS2, SPINT2, AIG1, MSLN, ATP1B1, KRT19, S100A16, S100A13, DPEP1, CD9, ANXA8, APOE, 2200002D01RIK, EZR, RAMP1" 47 6054 29722 1.6713127947760933 0.990467133 0.19872709854747708 0.17273970873742236
103
+ REACTOME_PATHWAY R-MMU-2173782~Binding and Uptake of Ligands by Scavenger Receptors 3 6.382978723404255 0.035346598 "APOA1, APOE, APOB" 32 89 9277 9.772120786516854 0.9953083704498519 0.2576843899493848 0.23535174282043808
104
+ REACTOME_PATHWAY R-MMU-6809371~Formation of the cornified envelope 3 6.382978723404255 0.036075815 "KRT19, KRT18, KRT8" 32 90 9277 9.663541666666665 0.9958083145176094 0.2576843899493848 0.23535174282043808
105
+ GOTERM_BP_DIRECT GO:0030301~cholesterol transport 2 4.255319149 0.037198019 "APOA1, APOB" 46 25 29712 51.673043478260865 0.999999994 0.5145725915032304 0.5073396434298918
106
+ GOTERM_MF_DIRECT GO:0061629~RNA polymerase II-specific DNA-binding transcription factor binding 3 6.382978723404255 0.039129978779947616 "FOXA1, BEX1, HNF4A" 46 201 28924 9.384815055158988 0.9989144393685045 0.35216980901952855 0.3295156107785062
107
+ REACTOME_PATHWAY R-MMU-9707616~Heme signaling 2 4.255319149 0.042607394659846705 "APOA1, APOB" 32 13 9277 44.60096153846154 0.9984779803905748 0.2905049635898639 0.26532786674540904
108
+ GOTERM_CC_DIRECT GO:0048471~perinuclear region of cytoplasm 5 10.638297872340425 0.042648795 "KRT18, S100A16, S100A13, SORBS2, EZR" 47 854 29722 3.7024764562260204 0.9965384748157866 0.22850752460083787 0.19862577138380522
109
+ GOTERM_BP_DIRECT GO:0048844~artery morphogenesis 2 4.255319149 0.043022771126108805 "APOE, APOB" 46 29 29712 44.54572713643178 0.9999999997049468 0.5790632438054645 0.5709238006194439
110
+ GOTERM_CC_DIRECT GO:0016327~apicolateral plasma membrane 2 4.255319149 0.043943755 "KRT19, KRT8" 47 29 29722 43.61261922230374 0.9970969964780505 0.22850752460083787 0.19862577138380522
111
+ GOTERM_MF_DIRECT GO:0050750~low-density lipoprotein particle receptor binding 2 4.255319149 0.044170053263193135 "APOE, APOB" 46 29 28924 43.36431784107946 0.9995583398030792 0.37765395540030133 0.3533604261055451
112
+ REACTOME_PATHWAY R-MMU-109582~Hemostasis 6 12.76595744680851 0.047050720844276105 "FGB, TIMP3, APOA1, CD9, APOB, ATP1B1" 32 601 9277 2.8942387687188025 0.9992389764514836 0.30685252724527895 0.2802586415506881
113
+ GOTERM_CC_DIRECT GO:0005635~nuclear envelope 3 6.382978723404255 0.048634109 "MGST2, APOE, IFI27L2B" 47 228 29722 8.320828667413213 0.9984682603589135 0.2431705471484874 0.21137132175214673
114
+ GOTERM_MF_DIRECT GO:0004364~glutathione transferase activity 2 4.255319149 0.050110762 "GSTA4, MGST2" 46 33 28924 38.108036890645586 0.9998479181489063 0.4080447774347403 0.38179628297987395
115
+ GOTERM_BP_DIRECT GO:0010875~positive regulation of cholesterol efflux 2 4.255319149 0.050255276 "APOA1, APOE" 46 34 29712 37.994884910485936 0.9999999999933037 0.6586086111263479 0.6493510603675439
116
+ GOTERM_CC_DIRECT GO:0031528~microvillus membrane 2 4.255319149 0.051326889 "DPEP1, EZR" 47 34 29722 37.19899874843554 0.9989403636102837 0.24712946488060694 0.21481253485775834
117
+ GOTERM_BP_DIRECT GO:0015908~fatty acid transport 2 4.255319149 0.051695349432729076 "ACSL1, RBP1" 46 35 29712 36.90931677018633 0.9999999999968595 0.6601098466025406 0.6508311941402558
118
+ REACTOME_PATHWAY "R-MMU-76002~Platelet activation, signaling and aggregation" 4 8.510638298 0.052913301576377775 "FGB, TIMP3, APOA1, CD9" 32 256 9277 4.529785156 0.9996965613979936 0.3307081348523611 0.30204676316515644
119
+ GOTERM_BP_DIRECT GO:0019915~lipid storage 2 4.255319149 0.053133288 "RBP1, APOA1" 46 36 29712 35.88405797101449 0.9999999999985272 0.6615094382679285 0.6522111128304277
120
+ KEGG_PATHWAY mmu04977:Vitamin digestion and absorption 2 4.255319149 0.055614442714328305 "APOA1, APOB" 22 26 9565 33.44405594405594 0.9967270929710323 0.7530944537961819 0.745563509
121
+ GOTERM_BP_DIRECT GO:0043651~linoleic acid metabolic process 2 4.255319149 0.056002773 "ACSL1, EPHX2" 46 38 29712 33.995423340961096 0.9999999999996761 0.6802288021130457 0.6706673530873604
122
+ GOTERM_MF_DIRECT GO:0140678~molecular function inhibitor activity 2 4.255319149 0.056015364 "BEX4, BEX1" 46 37 28924 33.98824912 0.9999476396717799 0.435392145 0.40738446277442064
123
+ GOTERM_BP_DIRECT GO:0032870~cellular response to hormone stimulus 2 4.255319149 0.057434325 "IGFBP2, RAMP1" 46 39 29712 33.12374581939799 0.9999999999998481 0.6810069952673832 0.671434608
124
+ GOTERM_CC_DIRECT GO:0031012~extracellular matrix 3 6.382978723404255 0.060120618 "TIMP3, NDNF, APOE" 47 257 29722 7.381902475370477 0.9996842263232665 0.27913144230531395 0.2426296383115421
125
+ GOTERM_BP_DIRECT GO:0043407~negative regulation of MAP kinase activity 2 4.255319149 0.060291064 "DUSP1, APOE" 46 41 29712 31.507953340402967 0.9999999999999666 0.6982546459975671 0.688439822
126
+ GOTERM_BP_DIRECT GO:0019216~regulation of lipid metabolic process 2 4.255319149 0.064560305 "HNF4A, ANGPTL4" 46 44 29712 29.359683794466402 0.9999999999999966 0.7307052741402671 0.7204343164716287
127
+ GOTERM_BP_DIRECT GO:0045214~sarcomere organization 2 4.255319149 0.067395924 "KRT19, KRT8" 46 46 29712 28.083175803402646 0.9999999999999992 0.7458482279235058 0.735364417
128
+ REACTOME_PATHWAY R-MMU-5686938~Regulation of TLR by endogenous ligand 2 4.255319149 0.067948365 "FGB, APOB" 32 21 9277 27.610119047619047 0.9999720387926998 0.4076901901820616 0.37235704
129
+ GOTERM_BP_DIRECT GO:0001525~angiogenesis 3 6.382978723404255 0.07060693 "NDNF, ANGPTL4, RAMP1" 46 288 29712 6.728260869565218 0.9999999999999999 0.7643967642661483 0.7536522314350982
130
+ GOTERM_MF_DIRECT GO:0005504~fatty acid binding 2 4.255319149 0.070620302 "HNF4A, RBP1" 46 47 28924 26.756706753006476 0.9999963605648977 0.5250465940313758 0.49127166692994223
131
+ REACTOME_PATHWAY R-MMU-5423646~Aflatoxin activation and detoxification 2 4.255319149 0.074181462 "MGST2, DPEP1" 32 23 9277 25.209239130434785 0.9999897114296403 0.42796997150625177 0.3908792406423766
132
+ GOTERM_BP_DIRECT GO:0033209~tumor necrosis factor-mediated signaling pathway 2 4.255319149 0.077254559 "KRT18, KRT8" 46 53 29712 24.37407711 1 0.8185695796217634 0.8070635815146302
133
+ GOTERM_BP_DIRECT GO:0098869~cellular oxidant detoxification 2 4.255319149 0.080052545 "MGST2, APOE" 46 55 29712 23.487747035573122 1 0.8305451583344289 0.8188708287996076
134
+ KEGG_PATHWAY mmu05417:Lipid and atherosclerosis 3 6.382978723404255 0.081151305 "APOA1, CALML4, APOB" 22 217 9565 6.0106828655215745 0.9997889427455483 0.7530944537961819 0.745563509
135
+ GOTERM_BP_DIRECT GO:0097191~extrinsic apoptotic signaling pathway 2 4.255319149 0.082842236 "KRT18, KRT8" 46 57 29712 22.663615560640732 1 0.8419476214133508 0.8301130162930828
136
+ KEGG_PATHWAY mmu04216:Ferroptosis 2 4.255319149 0.086346434 "ACSL1, GSS" 22 41 9565 21.208425720620845 0.9998802831246382 0.7530944537961819 0.745563509
137
+ KEGG_PATHWAY mmu05207:Chemical carcinogenesis - receptor activation 3 6.382978723404255 0.088337907 "GSTA4, EPHX2, MGST2" 22 228 9565 5.720693779904305 0.9999037524423499 0.7530944537961819 0.745563509
138
+ GOTERM_BP_DIRECT GO:0031397~negative regulation of protein ubiquitination 2 4.255319149 0.088396824 "BEX4, BEX1" 46 61 29712 21.177476835352817 1 0.8804323699067076 0.8680568145064125
139
+ KEGG_PATHWAY mmu05208:Chemical carcinogenesis - reactive oxygen species 3 6.382978723404255 0.089000791 "GSTA4, EPHX2, MGST2" 22 229 9565 5.695712584358872 0.9999105047499052 0.7530944537961819 0.745563509
140
+ KEGG_PATHWAY mmu04975:Fat digestion and absorption 2 4.255319149 0.090371334 "APOA1, APOB" 22 43 9565 20.221987315010573 0.9999230136372668 0.7530944537961819 0.745563509
141
+ GOTERM_BP_DIRECT GO:0032526~response to retinoic acid 2 4.255319149 0.092541166 "IGFBP2, RBP1" 46 64 29712 20.184782608695652 1 0.9036372722225943 0.8909355434965738
142
+ GOTERM_CC_DIRECT GO:0062023~collagen-containing extracellular matrix 3 6.382978723404255 0.092985917 "FGB, S100A13, ANGPTL4" 47 331 29722 5.731567782991578 0.9999969109571304 0.4079055725078437 0.35456407456451033
143
+ GOTERM_CC_DIRECT GO:0005654~nucleoplasm 10 21.27659574468085 0.094132055 "PLAC8, BEX4, FOXA1, HNF4A, RBP1, KRT8, S100A13, PIR, DPEP1, 2200002D01RIK" 47 3572 29722 1.7703890781730243 0.9999973791875169 0.4079055725078437 0.35456407456451033
144
+ GOTERM_BP_DIRECT GO:0009612~response to mechanical stimulus 2 4.255319149 0.095293821 "ACTA1, IGFBP2" 46 66 29712 19.573122529644266 1 0.9083058114261483 0.895538461
145
+ GOTERM_BP_DIRECT GO:0051592~response to calcium ion 2 4.255319149 0.096667084 "FGB, S100A16" 46 67 29712 19.280986372485398 1 0.9083058114261483 0.895538461
ckp/Multi_seed0/multi_seed0_fold1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25af2f56d064c28a72cd02cbc8dfbbb3e9d5925ed624d781a94e0edfe7720777
3
+ size 6992494
ckp/Multi_seed0/multi_seed0_fold2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e45bb566c1dbca12289c587f48283766e437c483aef5593258a3645d4a10d11
3
+ size 6992494
ckp/Multi_seed0/multi_seed0_fold3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20bf5b58bbc7a6ffe5e81e270b8773562b4f965eb97850c34ee78f00680021ff
3
+ size 6992494
ckp/Multi_seed0/multi_seed0_fold4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8c10ffbe33a618aa58f56dd335b0849ebb221594a2774cdbdd43dfc98684352
3
+ size 6992494
ckp/Multi_seed0/multi_seed0_fold5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d4b2c284762c7f7ed955e5d72b423ffd2affa245fe7ac43d0619d374278d152
3
+ size 6992494
config.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+
2
+ MLM_RNA_CKP = "ckp/MLM/MLM_RNA_ValLoss0.4277.pth"
3
+ MLM_ATAC_CKP = "ckp/MLM/MLM_ATAC_ValLoss0.0019.pth"
4
+ MLM_FLUX_CKP = "ckp/MLM/MLM_Flux_ValLoss0.1001.pth"
5
+ SEED = 6
data/__init__.py ADDED
File without changes
data/create_dataset.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.utils.data import DataLoader, TensorDataset, random_split
3
+ from torch.utils.data.dataset import Dataset
4
+ from anndata import AnnData
5
+ import pandas as pd
6
+ import random
7
+ import numpy as np
8
+
9
+ def get_mlm_loaders(train_data, val_data, batch_size=32, batch_key='batch_no', data_dtype=torch.float32):
10
+ if isinstance(train_data, AnnData) and \
11
+ isinstance(val_data, AnnData):
12
+ X_train = torch.tensor(train_data.X.toarray().copy(), dtype=data_dtype)
13
+ b_train = torch.tensor(train_data.obs[batch_key], dtype=torch.int32)
14
+
15
+ X_val = torch.tensor(val_data.X.toarray().copy(), dtype=data_dtype)
16
+ b_val = torch.tensor(val_data.obs[batch_key], dtype=torch.int32)
17
+
18
+ elif isinstance(train_data, tuple) and \
19
+ isinstance(train_data[0], (pd.DataFrame)) and \
20
+ isinstance(val_data, (tuple)) and \
21
+ isinstance(val_data[0], (pd.DataFrame)):
22
+
23
+ X_train = torch.tensor(train_data[0].values, dtype=data_dtype)
24
+ b_train = torch.tensor(train_data[1], dtype=torch.int32)
25
+
26
+ X_val = torch.tensor(val_data[0].values, dtype=data_dtype)
27
+ b_val = torch.tensor(val_data[1], dtype=torch.int32)
28
+ else:
29
+ raise ValueError("Data must be an AnnData object or a tuple of (pd.DataFrame, list).")
30
+
31
+ mlm_train_dataset = TensorDataset(X_train, b_train)
32
+ mlm_train_loader = DataLoader(mlm_train_dataset, batch_size=batch_size, shuffle=True)
33
+
34
+ mlm_val_dataset = TensorDataset(X_val, b_val)
35
+ mlm_val_loader = DataLoader(mlm_val_dataset, batch_size=batch_size, shuffle=False)
36
+
37
+ return mlm_train_loader, mlm_val_loader
38
+
39
+
40
+ def get_cls_dataset(data, batch_key='batch_no', label_key='label',
41
+ pct_key='pct', filter_pcts=50.0,
42
+ data_dtype=torch.float32):
43
+
44
+ if isinstance(data, AnnData):
45
+ X = torch.tensor(data.X.toarray().copy(), dtype=data_dtype)
46
+ y = torch.tensor([{'reprogramming':1, 'dead-end':0}[i] for i in list(data.obs[label_key])], dtype=torch.float32)
47
+ b = torch.tensor(data.obs[batch_key], dtype=torch.int32)
48
+ pcts = torch.tensor(data.obs[pct_key], dtype=torch.float32)
49
+
50
+ X = X[pcts > filter_pcts]
51
+ y = y[pcts > filter_pcts]
52
+ b = b[pcts > filter_pcts]
53
+ pcts = pcts[pcts > filter_pcts]
54
+ feature_names = data.var_names.tolist()
55
+
56
+ elif isinstance(data, tuple) and isinstance(data[0], pd.DataFrame):
57
+ X = torch.tensor(data[0].values, dtype=data_dtype)
58
+ y = torch.tensor([{'reprogramming':1, 'dead-end':0}[i] for i in list(data[1])], dtype=torch.float32)
59
+ b = torch.tensor(data[2], dtype=torch.int32)
60
+ pcts = torch.tensor(data[3], dtype=torch.float32)
61
+ X = X[pcts > filter_pcts]
62
+ y = y[pcts > filter_pcts]
63
+ b = b[pcts > filter_pcts]
64
+ pcts = pcts[pcts > filter_pcts]
65
+ feature_names = data[0].columns.tolist()
66
+
67
+ else:
68
+ raise ValueError("Data must be an AnnData object or a tuple of (pd.DataFrame, list, list, list).")
69
+
70
+ dataset = TensorDataset(X, b, y)
71
+
72
+ return dataset, pcts, feature_names
73
+
74
+ def get_pair_modalities(adata_rna, adata_atac, flux_df, include_unused_atacs=False, seed=42):
75
+ """
76
+ Pair RNA, ATAC and Flux data based on clone IDs.
77
+ Args:
78
+ adata_rna (AnnData): RNA data.
79
+ adata_atac (AnnData): ATAC data.
80
+ flux_df (pd.DataFrame): Flux data.
81
+ include_unused_atacs (bool): Include ATAC samples that do not have a paired RNA sample.
82
+ Returns:
83
+ tuple:
84
+ - rna_data (pd.DataFrame): RNA data matched by clone IDs, with rows representing samples and columns representing gene expressions.
85
+ - atac_data (pd.DataFrame): ATAC data matched by clone IDs, with rows representing samples and columns representing chromatin accessibility features.
86
+ - flux_data (pd.DataFrame): Flux data matched by clone IDs, with rows representing samples and columns representing flux measurements.
87
+
88
+ np.array: labels. np.array of labels.
89
+ np.array: batch indices. np.array of batch indices.
90
+ pd.DataFrame: indices. A DataFrame where each row contains the indices of matched RNA and ATAC samples.
91
+ If no match is found for one modality, the corresponding value is None.
92
+ np.array: pcts. Array of dominant fate percentages for each paired sample.
93
+ """
94
+
95
+ # Create a dictionary to map ATAC clone IDs to their indices
96
+ atac_clone_to_indices = {clone_id: [] for clone_id in adata_atac.obs['clone_id'].unique()}
97
+ adata_atac.obs['index'] = adata_atac.obs.index
98
+ grouped = adata_atac.obs.groupby('clone_id')['index'].apply(list)
99
+ atac_clone_to_indices.update(grouped)
100
+
101
+ rna_data, atac_data, flux_data, labels, batch_ind, indices, pcts = [], [], [], [], [], [], []
102
+
103
+ used_atac_indices = set()
104
+
105
+ for rna_index, row in adata_rna.obs.iterrows():
106
+ clone_id = row['clone_id']
107
+ sibling_atac_indices = [idx for idx in atac_clone_to_indices.get(clone_id, []) if idx not in used_atac_indices]
108
+
109
+ if sibling_atac_indices:
110
+ random.seed(seed)
111
+ atac_index = random.choice(sibling_atac_indices)
112
+ # atac_index = sibling_atac_indices[0]
113
+
114
+ used_atac_indices.add(atac_index)
115
+
116
+ rna_sample = adata_rna[rna_index].X.toarray().flatten() if hasattr(adata_rna[rna_index].X, 'toarray') else adata_rna[rna_index].X
117
+ atac_sample = adata_atac[atac_index].X.toarray().flatten() if hasattr(adata_atac[atac_index].X, 'toarray') else adata_atac[atac_index].X
118
+ else:
119
+ rna_sample = adata_rna[rna_index].X.toarray().flatten() if hasattr(adata_rna[rna_index].X, 'toarray') else adata_rna[rna_index].X
120
+ atac_sample = np.zeros(adata_atac.shape[1]) # Fill with zeros if no ATAC pair is found
121
+
122
+ flux_sample = flux_df.loc[rna_index].values
123
+
124
+ label = row['label']
125
+ bt = row['batch_no']
126
+ pct = row['pct']
127
+
128
+ rna_data.append(rna_sample)
129
+ atac_data.append(atac_sample)
130
+ flux_data.append(flux_sample)
131
+ labels.append(label)
132
+ batch_ind.append(bt)
133
+ pcts.append(pct)
134
+ indices.append((rna_index, atac_index) if sibling_atac_indices else (rna_index, None))
135
+
136
+
137
+ if include_unused_atacs:
138
+ all_atac_indices = set(adata_atac.obs.index)
139
+ unused_atac_indices = sorted(list(all_atac_indices - used_atac_indices))
140
+ unused_atac_samples = adata_atac[list(unused_atac_indices)]
141
+
142
+ for atac_index in unused_atac_indices:
143
+ atac_sample = unused_atac_samples[atac_index].X.toarray().flatten() if hasattr(unused_atac_samples[atac_index].X, 'toarray') else unused_atac_samples[atac_index].X
144
+ rna_sample = np.zeros(adata_rna.shape[1]) # Fill with zeros for RNA
145
+ flux_sample = np.zeros(flux_df.shape[1]) # Fill with zeros for flux
146
+
147
+ label = adata_atac.obs.loc[atac_index, 'label']
148
+ bt = adata_atac.obs.loc[atac_index, 'batch_no']
149
+ pct = adata_atac.obs.loc[atac_index, 'pct']
150
+
151
+ rna_data.append(rna_sample)
152
+ atac_data.append(atac_sample)
153
+ flux_data.append(flux_sample)
154
+ labels.append(label)
155
+ batch_ind.append(bt)
156
+ pcts.append(pct)
157
+ indices.append((None, atac_index))
158
+
159
+ rna_data = pd.DataFrame(rna_data, columns=adata_rna.var_names, index=indices)
160
+ atac_data = pd.DataFrame(atac_data, columns=adata_atac.var_names, index=indices)
161
+ flux_data = pd.DataFrame(flux_data, columns=flux_df.columns, index=indices)
162
+
163
+ X_i = (rna_data, atac_data, flux_data)
164
+ y_i = np.array(labels)
165
+ b_i = np.array(batch_ind)
166
+ indices = pd.DataFrame(np.array(indices), columns=["RNA", "ATAC"])
167
+ pcts = np.array(pcts)
168
+
169
+ return X_i, y_i, b_i, indices, pcts
170
+
171
+ class MultiModalDataset(Dataset):
172
+ """
173
+ Multi-modal dataset for RNA, ATAC, and Flux data.
174
+ Args:
175
+ X (tuple): Tuple of (RNA, ATAC, Flux) data.
176
+ batch_no (list): List of batch indices.
177
+ labels (list): List of labels.
178
+ """
179
+ def __init__(self, X, batch_no, labels, df_indics=None, pcts=None, label_names=None):
180
+ if isinstance(X[0], pd.DataFrame):
181
+ self.rna_data = torch.tensor(X[0].values, dtype=torch.int32)
182
+ self.atac_data = torch.tensor(X[1].values, dtype=torch.float32)
183
+ self.flux_data = torch.tensor(X[2].values, dtype=torch.float32)
184
+ else:
185
+ self.rna_data = torch.tensor(X[0], dtype=torch.int32)
186
+ self.atac_data = torch.tensor(X[1], dtype=torch.float32)
187
+ self.flux_data = torch.tensor(X[2], dtype=torch.float32)
188
+
189
+ self.batch_no = torch.tensor(batch_no, dtype=torch.int32)
190
+ self.labels = torch.tensor(labels, dtype=torch.float32)
191
+ self.df_indics = df_indics
192
+ self.pcts = pcts
193
+ self.label_names = label_names
194
+ def __len__(self):
195
+ return len(self.labels)
196
+
197
+ def get_df_indices(self):
198
+ return self.df_indics
199
+ def get_pcts(self):
200
+ return self.pcts
201
+ def get_label_names(self):
202
+ return self.label_names
203
+ def __getitem__(self, idx):
204
+ rna_sample = self.rna_data[idx]
205
+ atac_sample = self.atac_data[idx]
206
+ flux_sample = self.flux_data[idx]
207
+ batch_no = self.batch_no[idx]
208
+ label = self.labels[idx]
209
+ return (rna_sample, atac_sample, flux_sample), batch_no, label
210
+
data/datasets/atac_labelled.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07fb45d79bff16f7fad9d7f9009053629be37d9bfc71dec9bd65a26fc7e74660
3
+ size 2869395
data/datasets/clones.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d64e578c90f4033771f50132897c5f28c6dc7da73bd249cca000adb345bf0a3
3
+ size 5572281
data/datasets/flux_labelled_11nov.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b439720848f7b97ee08662603d8cf0cddb9502b973a151280d63b30427075bc3
3
+ size 4726079
data/datasets/metabolic_model_metadata.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cc3192a8fe075f24e2435a3af0d963928af4d32b6576e179e687089a451b257
3
+ size 16995
data/datasets/rna_labelled.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3190a03dd53b4022952e647836021d5752bc5d6c1139eb3fa1f68c6a6b407b8
3
+ size 425798344
data/datasets/rna_labelled_all.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:437cc1a740d127f89c8e17f380e35881cb71b8189a14b68000ff9e54c0d531ab
3
+ size 326163366
data/load_data.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import anndata as ad
3
+ import pandas as pd
4
+ from sklearn.preprocessing import StandardScaler
5
+ from . import preprocess_data
6
+
7
+ def load_clones(data_path):
8
+ df_clone = pd.read_csv(data_path, index_col=["cell.bc"])
9
+ df_clone = df_clone[["assay", 'state/fate', 'cell_type',
10
+ 'most_dominant_fate', 'most_dominant_fate_pct',
11
+ "clone_id", "clone.size (RNA & ATAC)", 'clone.size (RNA)', 'clone.size (ATAC)',
12
+ '# of D3 cells (RNA)', '# of D3 cells (ATAC)']]
13
+ df_clone.rename({"clone.size (RNA & ATAC)": "clone_size",
14
+ "clone.size (RNA)": "cells_RNA",
15
+ 'clone.size (ATAC)': "cells_ATAC",
16
+ '# of D3 cells (ATAC)' : "cells_ATAC_D3",
17
+ '# of D3 cells (RNA)' : "cells_RNA_D3",
18
+ 'most_dominant_fate': 'label',
19
+ 'most_dominant_fate_pct': 'pct',
20
+ 'state/fate': 'day3_day21'}, inplace=True, axis=1)
21
+ return df_clone
22
+
23
+ def add_clone_info(adata, clone_path, split=False):
24
+ """
25
+ Adds clone information to the given AnnData object.
26
+ Parameters:
27
+ adata (AnnData): The AnnData object to which clone information will be added.
28
+ clone_path (str): The file path to the clone data.
29
+ split (bool): Whether to split the data into labelled and unlabelled. Default is False.
30
+ Returns:
31
+ AnnData: The modified AnnData object with clone information added.
32
+ """
33
+
34
+ df_clone = load_clones(clone_path)
35
+ filtered_obs = adata.obs.join(df_clone, how='inner')
36
+
37
+ if split:
38
+ filtered_obs = filtered_obs[(filtered_obs.label=='reprogramming') | (filtered_obs.label=='dead-end')]
39
+ adata_labelled = adata[filtered_obs.index].copy()
40
+ adata_labelled.obs = filtered_obs
41
+ adata_unlabelled = adata[~adata.obs.index.isin(adata_labelled.obs.index)].copy()
42
+ return adata_labelled, adata_unlabelled
43
+
44
+ adata = adata[filtered_obs.index]
45
+ adata.obs = filtered_obs
46
+ return adata
47
+
48
+ def load_rna(data_path, return_raw=True, clone_info=False, clone_path=None):
49
+ """
50
+ Load RNA data from a given file path.
51
+ Parameters:
52
+ - data_path (str): The file path to the RNA data.
53
+ - return_raw (bool): Whether to return the raw counts or not. Default is False.
54
+ - add_clone_info (bool): Whether to add clone information or not. Default is True.
55
+ - clone_path (str): The file path to the clone information. Required if add_clone_info is True.
56
+ Returns:
57
+ - adata_RNA (AnnData): Annotated data object containing the loaded RNA data.
58
+ """
59
+
60
+ # Load RNA data
61
+ adata_RNA = ad.read_h5ad(data_path)
62
+ adata_RNA.obs.index = adata_RNA.obs.index.str.replace('_', '-')
63
+
64
+ # Restore raw counts if necessary
65
+ if return_raw:
66
+ adata_RNA.X = adata_RNA.raw.X.copy() # Copy raw counts to the expression matrix
67
+
68
+ # Add batch information
69
+ adata_RNA.obs['batch_no'] = adata_RNA.obs.index.to_series().apply(lambda idx: 1 if 'r1' in idx else (2 if 'r2' in idx else 0))
70
+
71
+ # Add clone information
72
+ if clone_info:
73
+ if clone_path is None:
74
+ raise ValueError("clone_path must be provided if add_clone_info is True.")
75
+ else:
76
+ adata_RNA = add_clone_info(adata_RNA, clone_path)
77
+
78
+ # Remove unwanted columns
79
+ columns_to_remove = ['orig.ident', 'old_ident', 'cc_score_diff', 'snn_res_0_8',
80
+ 'seurat_clusters',
81
+ 'predicted__cca_co_id', 'prediction_score_fib_1', 'prediction_score_fib_0',
82
+ 'prediction_score_fib_2',
83
+ 'prediction_score_early_0', 'prediction_score_transition_0',
84
+ 'prediction_score_transition_1',
85
+ 'prediction_score_early_1', 'prediction_score_early_2', 'prediction_score_iep_1',
86
+ 'prediction_score_transition_2', 'prediction_score_iep_2', 'prediction_score_dead_end_1',
87
+ 'prediction_score_dead_end_0', 'prediction_score_iep_0', 'prediction_score_dead_end_2',
88
+ 'prediction_score_max', 'snn_res_0_2', 'cellranger_ident', 'metadata_fate_coarse_rev1',
89
+ 'md_fate_rev1', 'md_fate_coarse_rev1', 'metadata_fate_rev1', 'day3_day21', 'sample_id',
90
+ 'replicate_id', 'cell_type', 'assay']
91
+ intersection = set(columns_to_remove).intersection(adata_RNA.obs.columns)
92
+ if intersection:
93
+ adata_RNA.obs.drop(intersection, axis=1, inplace=True)
94
+
95
+ # Rename columns
96
+ columns_to_rename = {'S.Score': 'S_score',
97
+ 'G2M.Score': 'G2M_score',
98
+ 'nCount_RNA': 'total_counts',
99
+ 'nFeature_RNA': 'n_genes_by_counts',
100
+ 'Phase': 'phase',
101
+ 'percent.mt': 'pct_counts_mt',
102
+ }
103
+ intersection = set(columns_to_rename.keys()).intersection(adata_RNA.obs.columns)
104
+ if intersection:
105
+ adata_RNA.obs.rename(columns=columns_to_rename, inplace=True)
106
+
107
+ return adata_RNA
108
+
109
+ def load_atac(data_path, clone_info=False, clone_path=None):
110
+ """
111
+ Load ATAC data from a given file path.
112
+ Parameters:
113
+ - data_path (str): The file path to the ATAC data.
114
+ - clone_info (bool): Whether to add clone information or not. Default is False.
115
+ - clone_path (str): The file path to the clone information. Required if add_clone_info is True.
116
+ Returns:
117
+ - adata_atac (AnnData): Annotated data object containing the loaded ATAC data.
118
+ """
119
+ adata_atac = ad.read_h5ad(data_path)
120
+ adata_atac = adata_atac[:,adata_atac.var['name'] != "Crebzf_122"]
121
+ adata_atac.obs.index = adata_atac.obs.index.str.replace('_', '-')
122
+
123
+ adata_atac = adata_atac.copy()
124
+ adata_atac.obs['batch_no'] = adata_atac.obs.index.to_series().apply(lambda idx: 1 if 'r1' in idx else (2 if 'r2' in idx else 0))
125
+
126
+ columns_to_remove = ['BlacklistRatio', 'CellNames', 'DoubletEnrichment',
127
+ 'DoubletScore', 'NucleosomeRatio', 'PassQC', 'PromoterRatio',
128
+ 'ReadsInBlacklist', 'ReadsInPromoter', 'ReadsInTSS', 'TSSEnrichment',
129
+ 'nDiFrags', 'nFrags', 'nMonoFrags', 'nMultiFrags',
130
+ 'origin']
131
+
132
+ intersection = set(columns_to_remove).intersection(adata_atac.obs.columns)
133
+ if intersection:
134
+ adata_atac.obs.drop(intersection, axis=1, inplace=True)
135
+
136
+ if clone_info:
137
+ if clone_path is None:
138
+ raise ValueError("clone_path must be provided if add_clone_info is True.")
139
+ else:
140
+ adata_atac_labelled, adata_atac_unlabelled = add_clone_info(adata_atac, clone_path, split=True)
141
+ return adata_atac_labelled, adata_atac_unlabelled
142
+ else:
143
+ # warning that without clone info, the data will be returned as a single object
144
+ print("Warning: Clone information not provided. Returning a single object.")
145
+
146
+ return adata_atac
147
+
148
+ def concat_fluxes(directory, prefix):
149
+ df_list = []
150
+ for filename in os.listdir(directory):
151
+ if filename.startswith(prefix) and filename.endswith('.csv'):
152
+ file_path = os.path.join(directory, filename)
153
+ df = pd.read_csv(file_path, index_col=0)
154
+ df_list.append(df)
155
+
156
+ if df_list:
157
+ concatenated_df = pd.concat(df_list, axis=0)
158
+ else:
159
+ concatenated_df = pd.DataFrame()
160
+
161
+ return concatenated_df
162
+
163
+ def load_flux(data_path, prefix='flux_un', clone_info=False, clone_path=None, scale=True, flux_metadata_path=None):
164
+ """
165
+ Load Flux data from a given file path.
166
+ Parameters:
167
+ - data_path (str): The file path to the Flux data.
168
+ - prefix (str): The prefix of the Flux files. Default is 'flux_un'.
169
+ - clone_info (bool): Whether to add clone information or not. Default is False.
170
+ - clone_path (str): The file path to the clone information. Required if add_clone_info is True.
171
+ Returns:
172
+ - adata_Flux_labelled (pd.DataFrame): Annotated data object containing the labelled Flux data.
173
+ - adata_Flux_unlabelled (pd.DataFrame): Annotated data object containing the unlabelled Flux data.
174
+ - bi_labelled (list): List of binary labels for the labelled Flux data.
175
+ - bi_unlabelled (list): List of binary labels for the unlabelled Flux data.
176
+ - labels (list): List of labels for the labelled Flux data.
177
+ """
178
+
179
+ adata_Flux_labelled = pd.read_csv(data_path, index_col=0)
180
+ directory = os.path.dirname(data_path)
181
+ adata_Flux_unlabelled = concat_fluxes(directory, prefix)
182
+
183
+ adata_Flux_labelled.index = adata_Flux_labelled.index.str.replace('_', '-')
184
+ if not adata_Flux_unlabelled.empty:
185
+ adata_Flux_unlabelled.index = adata_Flux_unlabelled.index.str.replace('_', '-')
186
+ else:
187
+ # Keep schema consistent when unlabeled files are not shipped.
188
+ adata_Flux_unlabelled = pd.DataFrame(columns=adata_Flux_labelled.columns)
189
+
190
+ if scale:
191
+ std_sc = StandardScaler()
192
+ if not adata_Flux_unlabelled.empty:
193
+ scaled_unl = std_sc.fit_transform(adata_Flux_unlabelled.values)
194
+ scaled_unl += abs(scaled_unl.min())
195
+ adata_Flux_unlabelled = pd.DataFrame(
196
+ scaled_unl,
197
+ index=adata_Flux_unlabelled.index,
198
+ columns=adata_Flux_unlabelled.columns,
199
+ )
200
+ scaled_la = std_sc.transform(adata_Flux_labelled.values)
201
+ scaled_la += abs(scaled_la.min())
202
+ else:
203
+ # Fallback for minimal/portable app packages: scale from labelled only.
204
+ scaled_la = std_sc.fit_transform(adata_Flux_labelled.values)
205
+ scaled_la += abs(scaled_la.min())
206
+
207
+ adata_Flux_labelled = pd.DataFrame(
208
+ scaled_la,
209
+ index=adata_Flux_labelled.index,
210
+ columns=adata_Flux_labelled.columns,
211
+ )
212
+ if flux_metadata_path is not None:
213
+ md = pd.read_csv(flux_metadata_path)[['X', 'rxnName']]
214
+ else:
215
+ md = pd.read_csv("data/datasets/flux/metabolic_model_metadata.csv")[['X', 'rxnName']]
216
+ dict_rename = {}
217
+ for col in adata_Flux_labelled.columns:
218
+ reaction = md[md['X'] == col]['rxnName'].str.replace(" -> ", "→").values
219
+ dict_rename[col] = reaction[0]
220
+ adata_Flux_labelled = adata_Flux_labelled.rename(columns=dict_rename)
221
+ adata_Flux_unlabelled = adata_Flux_unlabelled.rename(columns=dict_rename)
222
+
223
+
224
+ if clone_info:
225
+ if clone_path is None:
226
+ raise ValueError("clone_path must be provided if add_clone_info is True.")
227
+ else:
228
+ df_clone = load_clones(clone_path)
229
+ filtered_obs = adata_Flux_labelled.join(df_clone, how='inner')
230
+ labels = filtered_obs['label']
231
+ pcts = filtered_obs['pct']
232
+ bi_labelled = adata_Flux_labelled.index.map(lambda x: 2 if 'r2' in x else 1 if 'r1' in x else 0)
233
+ bi_unlabelled = adata_Flux_unlabelled.index.map(lambda x: 2 if 'r2' in x else 1 if 'r1' in x else 0)
234
+ adata_Flux_labelled = adata_Flux_labelled.loc[filtered_obs.index]
235
+ return adata_Flux_labelled, adata_Flux_unlabelled, bi_labelled, bi_unlabelled, labels, pcts
236
+ else:
237
+ print("Warning: Clone information not provided. Returning raw data.")
238
+ return adata_Flux_labelled, adata_Flux_unlabelled
239
+
240
+
241
+ def load_processed_rna(verbose=True, return_raw=True, return_all_features=False):
242
+
243
+ if verbose:
244
+ print('Loading RNA data...')
245
+ # Load RNA data labelled
246
+ adata_RNA_labelled = load_rna("data/datasets/rna/all_rna_d3_labelled.h5ad",
247
+ return_raw=True,
248
+ clone_info=True,
249
+ clone_path="data/datasets/clone/clones.csv")
250
+ # Load RNA data unlabelled
251
+ adata_RNA_unlabelled = load_rna("data/datasets/rna/all_rna_d3_unlabelled.h5ad",
252
+ return_raw=True,
253
+ clone_info=False)
254
+
255
+ if verbose:
256
+ print('Filtering RNA data...')
257
+ adata_RNA_labelled = preprocess_data.filter_rna_cells_genes(adata_RNA_labelled.copy())
258
+ adata_RNA_unlabelled = preprocess_data.filter_rna_cells_genes(adata_RNA_unlabelled.copy())
259
+
260
+ if verbose:
261
+ print('Feature Selection by DEGs...')
262
+ deg_list = preprocess_data.get_degs(adata_RNA_labelled, method='t-test')
263
+
264
+ if verbose:
265
+ print('Filtering Genes...')
266
+ genes_intersection = set(adata_RNA_labelled.var_names).intersection(set(adata_RNA_unlabelled.var_names)).intersection(set(deg_list.gene))
267
+ adata_RNA_labelled_all = adata_RNA_labelled.copy()
268
+ adata_RNA_labelled = adata_RNA_labelled[:, list(genes_intersection)]
269
+ adata_RNA_unlabelled = adata_RNA_unlabelled[:, list(genes_intersection)]
270
+
271
+
272
+ if return_raw:
273
+ gene_indices = [adata_RNA_labelled.raw.var_names.get_loc(gene) for gene in adata_RNA_labelled.var_names]
274
+ adata_RNA_labelled.X = adata_RNA_labelled.raw.X[:, gene_indices].toarray().copy()
275
+ adata_RNA_unlabelled.X = adata_RNA_unlabelled.raw.X[:, gene_indices].copy()
276
+
277
+ if return_all_features:
278
+ return adata_RNA_labelled, adata_RNA_unlabelled, deg_list, adata_RNA_labelled_all
279
+ return adata_RNA_labelled, adata_RNA_unlabelled, deg_list
280
+
281
+ if __name__ == '__main__':
282
+ adata_ATAC_labelled, adata_ATAC_unlabelled = load_atac("data/datasets/atac/all_atac_d3_motif.h5ad",
283
+ clone_info=True,
284
+ clone_path="data/datasets/clone/clones.csv")
285
+ print(adata_ATAC_labelled.obs.columns, adata_ATAC_labelled.obs.shape, adata_ATAC_labelled.obs.index[:10])
286
+ print(adata_ATAC_unlabelled.obs.columns, adata_ATAC_unlabelled.obs.shape, adata_ATAC_unlabelled.obs.index[:10])
287
+ print("Data loaded successfully!")
288
+
289
+
290
+
291
+
data/preprocess_data.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import scanpy as sc
2
+ import pandas as pd
3
+ import numpy as np
4
+ from scipy.stats import ttest_ind
5
+ from statsmodels.stats.multitest import multipletests
6
+
7
+ def filter_rna_cells_genes(adata, min_genes=100, min_cells=10):
8
+ """
9
+ Filter cells and genes in RNA data.
10
+ Parameters:
11
+ - adata (AnnData): Annotated data object containing the RNA data.
12
+ - min_genes (int): The minimum number of genes to keep a cell. Default is 100.
13
+ - min_cells (int): The minimum number of cells to keep a gene. Default is 10.
14
+ Returns:
15
+ - adata_filtered (AnnData): Annotated data object containing the filtered RNA data.
16
+ """
17
+ sc.pp.filter_cells(adata, min_genes=min_genes)
18
+ sc.pp.filter_genes(adata, min_cells=min_cells)
19
+ return adata
20
+
21
+ def get_degs(adata, method='t-test', p_val=0.05,
22
+ batch_remove=True, batch_key='batch_no', label_key='label',
23
+ reference='dead-end', target='reprogramming'):
24
+ """
25
+ Get differentially expressed genes (DEGs) from the RNA data.
26
+ """
27
+
28
+ sc.pp.normalize_total(adata, target_sum=1e4, exclude_highly_expressed=False)
29
+ sc.pp.log1p(adata)
30
+ if batch_remove:
31
+ sc.pp.combat(adata, key=batch_key)
32
+
33
+ sc.tl.rank_genes_groups(adata, groupby=label_key, method=method, n_genes=adata.shape[1], use_raw=False, reference=reference)
34
+
35
+ de_results = adata.uns['rank_genes_groups']
36
+ gene_list = list(pd.DataFrame(de_results['names'])[target])
37
+
38
+ # Compute mean and std for each gene in both groups.
39
+ # These are Series indexed by gene names (from adata.var_names).
40
+ group_a_mean_expression = adata[adata.obs[label_key] == reference].to_df().mean()
41
+ group_a_std_expression = adata[adata.obs[label_key] == reference].to_df().std()
42
+ group_b_mean_expression = adata[adata.obs[label_key] == target].to_df().mean()
43
+ group_b_std_expression = adata[adata.obs[label_key] == target].to_df().std()
44
+
45
+ # Reorder (or reindex) the computed series so that they match the order in gene_list.
46
+ group_a_mean_expression = group_a_mean_expression.reindex(gene_list)
47
+ group_a_std_expression = group_a_std_expression.reindex(gene_list)
48
+ group_b_mean_expression = group_b_mean_expression.reindex(gene_list)
49
+ group_b_std_expression = group_b_std_expression.reindex(gene_list)
50
+
51
+ # Create the DEG DataFrame.
52
+ df = pd.DataFrame({
53
+ 'gene': gene_list,
54
+ 'mean_exp_de': group_a_mean_expression.values, # 'dead-end' (reference)
55
+ 'mean_exp_re': group_b_mean_expression.values, # 'reprogramming' (target)
56
+ 'std_exp_de': group_a_std_expression.values,
57
+ 'std_exp_re': group_b_std_expression.values,
58
+ 'pval': de_results['pvals'][target],
59
+ 'pval_adj': de_results['pvals_adj'][target],
60
+ 'log_fc': de_results['logfoldchanges'][target],
61
+ })
62
+
63
+ df['group'] = df.apply(lambda row: reference if row['log_fc'] < 0 else target, axis=1)
64
+
65
+ df.sort_values(by='pval_adj', inplace=True)
66
+ df.reset_index(drop=True, inplace=True)
67
+ df['pval_adj_log'] = -np.log10(df['pval_adj'])
68
+
69
+ df = df[(df.pval_adj < p_val) & ((df.log_fc < -1) | ((df.log_fc > 1) & (df.log_fc < 7)))]
70
+ return df
71
+
72
+ def get_flux_degs(adata_Flux_labelled, labels):
73
+ dead_end = adata_Flux_labelled[labels.values == "dead-end"]
74
+ reprogramming = adata_Flux_labelled[labels.values == "reprogramming"]
75
+
76
+ features = []
77
+ log_fold_changes = []
78
+ p_values = []
79
+ mean_des = []
80
+ mean_res = []
81
+ std_des = []
82
+ std_res = []
83
+
84
+ for feature in adata_Flux_labelled.columns:
85
+ mean_de = dead_end[feature].mean()
86
+ mean_re = reprogramming[feature].mean()
87
+ std_de = dead_end[feature].std()
88
+ std_re = reprogramming[feature].std()
89
+
90
+ log_fold_change = np.log2(mean_re + 1e-10) - np.log2(mean_de + 1e-10)
91
+ t_stat, p_value = ttest_ind(dead_end[feature], reprogramming[feature], nan_policy="omit")
92
+ mean_des.append(mean_de)
93
+ mean_res.append(mean_re)
94
+ std_des.append(std_de)
95
+ std_res.append(std_re)
96
+ features.append(feature)
97
+ log_fold_changes.append(log_fold_change)
98
+ p_values.append(p_value)
99
+
100
+ adjusted_p_values = multipletests(p_values, method="fdr_bh")[1]
101
+
102
+ df_flux_degs = pd.DataFrame({
103
+ "feature": features,
104
+ "mean_de": mean_des,
105
+ "mean_re": mean_res,
106
+ "mean_diff": np.array(mean_res) - np.array(mean_des),
107
+ "std_de": std_des,
108
+ "std_re": std_res,
109
+ "log_fc": log_fold_changes,
110
+ "pval": p_values,
111
+ "pval_adj": adjusted_p_values,
112
+ 'pval_adj_log' : -np.log10(adjusted_p_values)
113
+ })
114
+ df_flux_degs['group'] = df_flux_degs.apply(lambda row: 'dead-end' if row['mean_de'] > row['mean_re'] else 'reprogramming', axis=1)
115
+ df_flux_degs = df_flux_degs.sort_values(by="pval_adj").reset_index(drop=True)
116
+ return df_flux_degs
117
+
118
+ def get_atac_degs(adata, method='t-test', label_key='label',
119
+ reference='dead-end', target='reprogramming'):
120
+ """
121
+ Get differentially expressed genes (DEGs) from the ATAC data.
122
+ """
123
+
124
+ sc.tl.rank_genes_groups(adata, groupby=label_key, method=method,
125
+ n_genes=adata.shape[1], use_raw=False, reference=reference)
126
+
127
+ group_a_mean_expression = adata[adata.obs[label_key] == reference].to_df().mean()
128
+ group_a_std_expression = adata[adata.obs[label_key] == reference].to_df().std()
129
+ group_b_mean_expression = adata[adata.obs[label_key] == target].to_df().mean()
130
+ group_b_std_expression = adata[adata.obs[label_key] == target].to_df().std()
131
+ de_results = adata.uns['rank_genes_groups']
132
+ features = list(pd.DataFrame(de_results['names'])[target])
133
+
134
+ # Reindex the mean and std Series to this feature list
135
+ mean_de = group_a_mean_expression.reindex(features)
136
+ mean_re = group_b_mean_expression.reindex(features)
137
+ std_de = group_a_std_expression.reindex(features)
138
+ std_re = group_b_std_expression.reindex(features)
139
+
140
+ min_val = min(mean_de.min(), mean_re.min())
141
+ # Determine a shift value so that the smallest value becomes a small positive number.
142
+ shift = 0
143
+ if min_val <= 0:
144
+ shift = abs(min_val) + 1e-10
145
+ df = pd.DataFrame({
146
+ 'feature': list(pd.DataFrame(de_results['names'])[target]),
147
+ 'pval': de_results['pvals'][target],
148
+ 'pval_adj': de_results['pvals_adj'][target],
149
+ 'log_fc': np.log2(mean_re + shift) - np.log2(mean_de + shift),
150
+ 'mean_de': mean_de,
151
+ 'mean_re': mean_re,
152
+ 'mean_diff': mean_re - mean_de,
153
+ 'std_de': std_de,
154
+ 'std_re': std_re,
155
+
156
+ })
157
+
158
+ df['group'] = df.apply(lambda row: 'dead-end' if row['mean_de'] > row['mean_re'] else 'reprogramming', axis=1)
159
+
160
+ df.sort_values(by='pval_adj', inplace=True)
161
+ df.reset_index(drop=True, inplace=True)
162
+ df['pval_adj_log'] = -np.log10(df['pval_adj'])
163
+ return df
interpretation/__init__.py ADDED
File without changes
interpretation/attentions.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ from torch.utils.data import DataLoader, Subset
4
+ from utils.helpers import create_multimodal_model
5
+ from models import SingleTransformer
6
+ from scipy.sparse import csr_matrix
7
+
8
+ def filter_idx(dataset, idx):
9
+ """
10
+ Filter the idx to only return the samples that none of its modalities are all zeros
11
+ Args:
12
+ dataset: Dataset object containing the data.
13
+ idx: List of indices to filter.
14
+ Returns:
15
+ filtered_idx: List of filtered indices.
16
+ """
17
+ rna = dataset.rna_data
18
+ atac = dataset.atac_data
19
+ flux = dataset.flux_data
20
+ mask = (rna != 0).any(axis=1) & (atac != 0).any(axis=1) & (flux != 0).any(axis=1)
21
+ # filter the idx if the id is in the mask
22
+ filtered_idx = [i for i in idx if mask[i]]
23
+
24
+ return filtered_idx
25
+
26
+
27
+ def analyze_cls_attention(id, fold_results, dataset, model_config, device, indices,
28
+ average_heads=True, return_flow_attention=False):
29
+ """
30
+ Extracts the attention weights of the validation set of each fold
31
+ Args:
32
+ id: The type of data to use. Must be one of 'RNA', 'ATAC', 'Flux', 'Multi'.
33
+ fold_results: List of dictionaries containing the results of each fold.
34
+ dataset: Dataset object containing the data.
35
+ model_config: Dictionary containing the model configuration.
36
+ device: Device to run the model on.
37
+ sample_type: The type of samples to analyze. Must be one of 'all', 'dead-end', or 'reprogramming'. Defaults to 'all'.
38
+ average_heads: Whether to average the attention weights across heads. Defaults to True.
39
+ Returns:
40
+ all_attention_weights: Numpy array containing the attention weights of the validation set
41
+ """
42
+ if id not in ['RNA', 'ATAC', 'Flux', 'Multi']:
43
+ raise ValueError("id must be one of 'RNA', 'ATAC', 'Flux', 'Multi'")
44
+
45
+ all_attention_weights = []
46
+
47
+ for fold in fold_results:
48
+
49
+ val_idx = fold['val_idx']
50
+ # filter val_idx if is in indices
51
+ val_idx = [i for i in val_idx if i in indices]
52
+
53
+ if id == 'Multi':
54
+ val_idx = filter_idx(dataset, val_idx)
55
+
56
+ if len(val_idx) == 0:
57
+ print('No samples of the specified type in the validation set. Skipping...')
58
+ continue
59
+
60
+ val_ds = Subset(dataset, val_idx)
61
+ val_loader = DataLoader(val_ds, batch_size=32, shuffle=False)
62
+
63
+ if id=='Multi':
64
+ model = create_multimodal_model(model_config, device, use_mlm=False)
65
+ else:
66
+ model = SingleTransformer(id=id, **model_config).to(device)
67
+
68
+ model_path = fold['best_model_path']
69
+ state_dict = torch.load(model_path, map_location='cpu')
70
+ model.load_state_dict(state_dict)
71
+ model.eval()
72
+
73
+ with torch.no_grad():
74
+ for batch in val_loader:
75
+ x, b, _ = batch
76
+ if isinstance(x, list):
77
+ rna = x[0].to(device)
78
+ atac = x[1].to(device)
79
+ flux = x[2].to(device)
80
+ x = (rna, atac, flux)
81
+ else:
82
+ x = x.to(device)
83
+ b = b.to(device)
84
+
85
+ _, _, attention_weights = model(x, b, return_attention=True, return_flow_attention=return_flow_attention)
86
+
87
+ if not return_flow_attention:
88
+ if average_heads:
89
+ attention_weights = attention_weights.squeeze(-2).mean(dim=1) # Average across heads (batch, 1, seq_len) -> (batch, seq_len)
90
+ else:
91
+ attention_weights = attention_weights.squeeze(-2) # (batch, num_heads, 1, seq_len) -> (batch, num_heads, seq_len)
92
+
93
+ # if hasattr(attention_weights, 'numpy'):
94
+ # attention_weights = attention_weights.cpu().numpy()
95
+ all_attention_weights.append(attention_weights)
96
+
97
+ if not return_flow_attention:
98
+ return np.concatenate(all_attention_weights, axis=0) # (n_samples, seq_len) or (n_samples, num_heads, seq_len)
99
+ else:
100
+ att_w = {'rna': [], 'atac': [], 'flux': [], 'cls': []}
101
+ # noew we have a dict. So concatenating all values for each key
102
+ num_layers_mlm = len(all_attention_weights[0]['rna'])
103
+ num_layers_cls = len(all_attention_weights[0]['cls']) if isinstance(all_attention_weights[0]['cls'], list) else 1
104
+
105
+ for key in all_attention_weights[0].keys():
106
+ key_all_attentions = []
107
+ for batch_row in all_attention_weights:
108
+ modality_batch_attention_layers = batch_row[key]
109
+ if isinstance(modality_batch_attention_layers, list):
110
+ for i, modality_attention_layers in enumerate(modality_batch_attention_layers):
111
+ modality_batch_attention_layers[i] = modality_attention_layers.cpu()
112
+ key_all_attentions.append(modality_batch_attention_layers)
113
+ else:
114
+ key_all_attentions.append([modality_batch_attention_layers.cpu()])
115
+ # now I have a list of attention weights for each batch in each layer [[layer0_att_weights_batch1, layer1_att_weights_batch1, ...], [layer0_att_weights_batch2, layer1_att_weights_batch2, ...], ...]
116
+ # I want to concatenate all the attention weights for each layer
117
+ num_layers = num_layers_cls if key == 'cls' else num_layers_mlm
118
+ att_w[key] = [torch.cat([layer[i] for layer in key_all_attentions], axis=0) for i in range(num_layers)]
119
+ return att_w
120
+
121
+
122
+ # def compute_attention_rollout(attention_weights):
123
+ # num_layers = len(attention_weights)
124
+ # combined_attention = torch.eye(attention_weights[0].size(-1)).to(attention_weights[0].device)
125
+ # for layer in range(num_layers):
126
+ # layer_attention = attention_weights[layer].mean(dim=1) # Average over heads
127
+ # combined_attention = torch.matmul(layer_attention, combined_attention)
128
+ # return combined_attention
129
+ def compute_attention_rollout(attention_weights):
130
+ """
131
+ Computes the attention rollout for a batch of samples.
132
+ Expects attention_weights to be a list (length=num_layers) of tensors
133
+ with shape (batch, num_heads, seq_len, seq_len). For each layer, we average
134
+ over the heads and then compute the rollout per sample.
135
+
136
+ Returns:
137
+ rollout: A tensor of shape (batch, seq_len, seq_len) representing the
138
+ effective attention from the input token (typically CLS) to all tokens.
139
+ """
140
+ num_layers = len(attention_weights)
141
+ # Get batch size and sequence length from the first layer's tensor
142
+ batch_size, num_heads, seq_len, _ = attention_weights[0].shape
143
+
144
+ # Initialize the combined attention as the identity matrix for each sample
145
+ combined_attention = torch.eye(seq_len, device=attention_weights[0].device)
146
+ combined_attention = combined_attention.unsqueeze(0).repeat(batch_size, 1, 1)
147
+
148
+ for layer in range(num_layers):
149
+ # Average over heads to get a (batch, seq_len, seq_len) tensor for this layer
150
+ layer_attention = attention_weights[layer].mean(dim=1)
151
+ # Update the rollout for each sample using batched matrix multiplication
152
+ combined_attention = torch.bmm(layer_attention, combined_attention)
153
+ return combined_attention
154
+ def multimodal_attention_rollout(all_attention_weights):
155
+ rna_rollout = compute_attention_rollout(all_attention_weights['rna'])
156
+ atac_rollout = compute_attention_rollout(all_attention_weights['atac'])
157
+ flux_rollout = compute_attention_rollout(all_attention_weights['flux'])
158
+
159
+ cls_attention = all_attention_weights['cls'][0].mean(dim=1).squeeze(1) # Average over heads
160
+
161
+ # Split CLS attention for each modality
162
+ rna_cls_attn, atac_cls_attn, flux_cls_attn = cls_attention.split(
163
+ [rna_rollout.size(1), atac_rollout.size(1), flux_rollout.size(1)], dim=1)
164
+
165
+ final_rollout = torch.cat([
166
+ rna_cls_attn.unsqueeze(1) @ rna_rollout,
167
+ atac_cls_attn.unsqueeze(1) @ atac_rollout,
168
+ flux_cls_attn.unsqueeze(1) @ flux_rollout
169
+ ], dim=2)
170
+
171
+ return final_rollout.squeeze(1) # remove head dimension [samples, tokens]
172
+
173
+ def print_top_features(attention_weights, feature_names, top_n=5, modality=None):
174
+ print(f"\nTop {top_n} attended features ({modality} samples):")
175
+ avg_attention = attention_weights.mean(axis=0).numpy() if hasattr(attention_weights, 'numpy') else attention_weights.mean(axis=0)
176
+ top_indices = avg_attention.argsort()[-top_n:][::-1]
177
+ for i in top_indices:
178
+ print(f"{feature_names[i]}: {avg_attention[i]:.4f}")
179
+
180
+ def get_top_features(attention_weights, feature_names, top_n=100, modality=None):
181
+ ls = []
182
+ avg_attention = attention_weights.mean(axis=0).numpy() if hasattr(attention_weights, 'numpy') else attention_weights.mean(axis=0)
183
+ if top_n:
184
+ top_indices = avg_attention.argsort()[-top_n:][::-1]
185
+ else:
186
+ top_indices = avg_attention.argsort()[::-1]
187
+
188
+ for i in top_indices:
189
+ ls.append((feature_names[i],avg_attention[i]))
190
+ return ls
191
+
192
+ from scipy.sparse.csgraph import maximum_flow
193
+
194
+ def compute_attention_flow(attention_weights):
195
+ num_layers = len(attention_weights)
196
+ num_tokens = attention_weights[0].size(-1)
197
+
198
+ # Create adjacency matrix for the flow network
199
+ adj_matrix = np.zeros((num_layers * num_tokens, num_layers * num_tokens))
200
+
201
+ for i in range(num_layers - 1):
202
+ layer_attention = attention_weights[i].mean(dim=1).cpu().numpy() # Average over heads
203
+ start_idx = i * num_tokens
204
+ end_idx = (i + 1) * num_tokens
205
+ adj_matrix[start_idx:end_idx, end_idx:(end_idx + num_tokens)] = layer_attention
206
+
207
+ for i in range(num_layers - 1):
208
+ start_idx = i * num_tokens
209
+ end_idx = (i + 1) * num_tokens
210
+ adj_matrix[start_idx:end_idx, end_idx:(end_idx + num_tokens)] += np.eye(num_tokens)
211
+
212
+ flows = np.zeros((num_tokens, num_tokens))
213
+ for i in range(num_tokens):
214
+ source = i
215
+ for j in range(num_tokens):
216
+ sink = (num_layers - 1) * num_tokens + j
217
+ _, flow = maximum_flow(csr_matrix(adj_matrix), source, sink)
218
+ flows[i, j] = flow
219
+
220
+ return torch.tensor(flows, device=attention_weights[0].device)
221
+
222
+ def multimodal_attention_flow(all_attention_weights):
223
+ rna_flow = compute_attention_flow(all_attention_weights['rna'])
224
+ atac_flow = compute_attention_flow(all_attention_weights['atac'])
225
+ flux_flow = compute_attention_flow(all_attention_weights['flux'])
226
+
227
+ cls_attention = all_attention_weights['cls'][0].mean(dim=1).squeeze(1) # Average over heads
228
+
229
+ # Split CLS attention for each modality
230
+ rna_cls_attn, atac_cls_attn, flux_cls_attn = cls_attention.split(
231
+ [rna_flow.size(1), atac_flow.size(1), flux_flow.size(1)], dim=1)
232
+
233
+ # Normalize flows
234
+ rna_flow = rna_flow / rna_flow.sum(dim=1, keepdim=True)
235
+ atac_flow = atac_flow / atac_flow.sum(dim=1, keepdim=True)
236
+ flux_flow = flux_flow / flux_flow.sum(dim=1, keepdim=True)
237
+
238
+ final_flow = torch.cat([
239
+ rna_cls_attn.unsqueeze(1) @ rna_flow,
240
+ atac_cls_attn.unsqueeze(1) @ atac_flow,
241
+ flux_cls_attn.unsqueeze(1) @ flux_flow
242
+ ], dim=2)
243
+
244
+ return final_flow.squeeze(1)
interpretation/latentspace.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.utils.data import DataLoader, Subset
3
+ import numpy as np
4
+ from tqdm import tqdm
5
+ import matplotlib.pyplot as plt
6
+ from models import SingleTransformer
7
+ from utils.helpers import create_multimodal_model
8
+ from data.create_dataset import MultiModalDataset
9
+ from .attentions import filter_idx
10
+
11
+ def get_latent_space(id, fold_results, labelled_dataset,
12
+ model_config, device, batch_size=32, common_samples=True):
13
+
14
+ if id not in ['RNA', 'ATAC', 'Flux', 'Multi']:
15
+ raise ValueError("id must be one of 'RNA', 'ATAC', 'Flux', 'Multi'")
16
+
17
+ latent_space = []
18
+ labels = []
19
+ preds = []
20
+ for fold in fold_results:
21
+ model_path = fold['best_model_path']
22
+ val_idx = fold['val_idx']
23
+ if common_samples:
24
+ val_idx = filter_idx(labelled_dataset, val_idx)
25
+ val_ds = Subset(labelled_dataset, val_idx)
26
+ val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
27
+ if id=='Multi':
28
+ model = create_multimodal_model(model_config, device, use_mlm=False)
29
+ else:
30
+ model = SingleTransformer(id=id, **model_config).to(device)
31
+
32
+ # Load weights to CPU first, then move to target device (handles CUDA->MPS/CPU transfer)
33
+ state_dict = torch.load(model_path, map_location='cpu')
34
+ model.load_state_dict(state_dict)
35
+ model = model.to(device)
36
+ model.eval()
37
+ with torch.no_grad():
38
+ for batch in val_loader:
39
+ x, b, y = batch
40
+ if isinstance(x, list):
41
+ rna= x[0].to(device)
42
+ atac = x[1].to(device)
43
+ flux = x[2].to(device)
44
+ x = (rna, atac, flux)
45
+ else:
46
+ x = x.to(device)
47
+ b = b.to(device)
48
+
49
+ ls, pred = model.get_latent_space(x, b)
50
+ latent_space.append(ls.cpu().numpy())
51
+ labels.append(y.numpy())
52
+ preds.append(pred.cpu().numpy())
53
+ latent_space = np.concatenate(latent_space)
54
+ labels = np.concatenate(labels)
55
+ preds = np.concatenate(preds)
56
+ preds = np.round(preds)
57
+ return latent_space, labels, preds
58
+
59
+ def get_latent_space_cached(models, fold_results, dataset, device, batch_size=64, common_samples=True):
60
+ """
61
+ Compute latent space using preloaded models.
62
+ """
63
+ latent_space = []
64
+ labels = []
65
+ preds = []
66
+ for model, fold in zip(models, fold_results):
67
+ val_idx = fold['val_idx']
68
+ if common_samples:
69
+ val_idx = filter_idx(dataset, val_idx)
70
+ val_ds = Subset(dataset, val_idx)
71
+ # Increase batch size to speed up inference
72
+ val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
73
+ model.eval()
74
+ with torch.no_grad():
75
+ for batch in val_loader:
76
+ x, b, y = batch
77
+ if isinstance(x, list):
78
+ # For multimodal inputs, move each modality to device
79
+ rna = x[0].to(device)
80
+ atac = x[1].to(device)
81
+ flux = x[2].to(device)
82
+ x = (rna, atac, flux)
83
+ else:
84
+ x = x.to(device)
85
+ b = b.to(device)
86
+ ls, pred = model.get_latent_space(x, b)
87
+ latent_space.append(ls.cpu().numpy())
88
+ labels.append(y.numpy())
89
+ preds.append(pred.cpu().numpy())
90
+ latent_space = np.concatenate(latent_space)
91
+ labels = np.concatenate(labels)
92
+ preds = np.concatenate(preds)
93
+ preds = np.round(preds)
94
+ return latent_space, labels, preds
95
+
96
+ def measure_shift(original_latent, perturbed_latent):
97
+ return np.mean(np.linalg.norm(original_latent - perturbed_latent, axis=1))
98
+
99
+ def perturb_feature(data, feature_idx, perturbation_type='additive', scale=0.1, min_samples_threshold=10):
100
+ perturbed_data = data.clone()
101
+ non_zero_rows_mask = data[:, feature_idx] != 0
102
+
103
+ # Check if feature has enough non-zero samples
104
+ if non_zero_rows_mask.sum() < min_samples_threshold:
105
+ return None, True # Return None and flag indicating insufficient samples
106
+
107
+ if perturbation_type == 'shuffle':
108
+ # Shuffle only non-zero values (preserves sparsity pattern)
109
+ non_zero_values = perturbed_data[non_zero_rows_mask, feature_idx].clone()
110
+ shuffled_idx = torch.randperm(non_zero_values.size(0), device=perturbed_data.device)
111
+ perturbed_data[non_zero_rows_mask, feature_idx] = non_zero_values[shuffled_idx]
112
+
113
+ elif perturbation_type == 'shuffle_all':
114
+ # Shuffle all values (including zeros)
115
+ shuffled_idx = torch.randperm(perturbed_data.size(0), device=perturbed_data.device)
116
+ perturbed_data[:, feature_idx] = data[shuffled_idx, feature_idx]
117
+
118
+ elif perturbation_type == 'additive':
119
+ noise = torch.randn_like(perturbed_data[:, feature_idx].float()) * scale * torch.std(perturbed_data[:, feature_idx].float())
120
+ noise = noise.to(perturbed_data.device)
121
+
122
+ if data.dtype == torch.int32:
123
+ perturbed_data[non_zero_rows_mask, feature_idx] += torch.tensor(noise[non_zero_rows_mask], dtype=torch.int32).to(perturbed_data.device)
124
+ else:
125
+ perturbed_data[non_zero_rows_mask, feature_idx] += noise[non_zero_rows_mask]
126
+
127
+ elif perturbation_type == 'multiplicative':
128
+ factor = 1 + scale * (torch.rand(perturbed_data.shape[0], device=perturbed_data.device) - 0.5)
129
+ if data.dtype == torch.int32:
130
+ perturbed_data[non_zero_rows_mask, feature_idx] = torch.tensor(
131
+ perturbed_data[non_zero_rows_mask, feature_idx].float() * factor[non_zero_rows_mask],
132
+ dtype=torch.int32).to(perturbed_data.device)
133
+ else:
134
+ perturbed_data[non_zero_rows_mask, feature_idx] *= factor[non_zero_rows_mask]
135
+
136
+ return perturbed_data, False # Return perturbed data and flag indicating sufficient samples
137
+
138
+ def analyze_feature_importance_multi(id, model_config, fold_results, dataset, feature_names,
139
+ device, analyse_features='all', perturbation_scale=0.1, min_samples_threshold=10, common_samples=True):
140
+ if analyse_features not in ['all', 'RNA', 'ATAC', 'Flux']:
141
+ raise ValueError("analyse_features must be one of 'all', 'RNA', 'ATAC', 'Flux'")
142
+
143
+ models = []
144
+ for fold in fold_results:
145
+ model_path = fold['best_model_path']
146
+ if id == 'Multi':
147
+ model = create_multimodal_model(model_config, device, use_mlm=False)
148
+ else:
149
+ model = SingleTransformer(id=id, **model_config).to(device)
150
+ # Load weights to CPU first, then move to target device (handles CUDA->MPS/CPU transfer)
151
+ state_dict = torch.load(model_path, map_location='cpu')
152
+ model.load_state_dict(state_dict)
153
+ model = model.to(device)
154
+ model.eval()
155
+ models.append(model)
156
+
157
+ # Compute the original latent space once using the cached models
158
+ original_latent, _, _ = get_latent_space_cached(models, fold_results, dataset, device, batch_size=64, common_samples=common_samples)
159
+
160
+ feature_shifts = []
161
+ skipped_features = [] # Track features skipped due to insufficient samples
162
+ # Unpack multi-modal data
163
+ X, b, y = (dataset.rna_data, dataset.atac_data, dataset.flux_data), dataset.batch_no, dataset.labels
164
+ rna_input, atac_input, flux_input = X[0], X[1], X[2]
165
+ atac_start = rna_input.shape[1] + 1
166
+ flux_start = atac_start + atac_input.shape[1] + 1
167
+ print("atac start", atac_start, "flux start", flux_start)
168
+ perturb_type = 'shuffle'
169
+ if analyse_features in ['RNA', 'all']:
170
+ print("Analyzing RNA features")
171
+ print("Permuting RNA features with", perturb_type)
172
+ for i in tqdm(range(rna_input.shape[1])):
173
+ # Choose perturbation type based on the mean value
174
+ #if rna_input[:, i].float().mean() < 10 else 'multiplicative'
175
+ perturbed_rna, insufficient_samples = perturb_feature(rna_input, i, perturb_type, scale=perturbation_scale, min_samples_threshold=min_samples_threshold)
176
+ if insufficient_samples:
177
+ skipped_features.append((feature_names[i], "RNA", (rna_input[:, i] != 0).sum().item()))
178
+ feature_shifts.append((feature_names[i], 0.0)) # Add with 0 importance
179
+ else:
180
+ perturbed_dataset = MultiModalDataset((perturbed_rna, atac_input, flux_input), b, y)
181
+ perturbed_latent, _, _ = get_latent_space_cached(models, fold_results, perturbed_dataset, device, batch_size=64, common_samples=common_samples)
182
+ shift = measure_shift(original_latent, perturbed_latent)
183
+ feature_shifts.append((feature_names[i], shift))
184
+
185
+ if analyse_features in ['ATAC', 'all']:
186
+ print("Analyzing ATAC features")
187
+ print("Permuting ATAC features with", perturb_type)
188
+ for i in tqdm(range(atac_input.shape[1])):
189
+ perturbed_atac, insufficient_samples = perturb_feature(atac_input, i, perturb_type, perturbation_scale, min_samples_threshold=min_samples_threshold)
190
+ if insufficient_samples:
191
+ skipped_features.append((feature_names[atac_start + i], "ATAC", (atac_input[:, i] != 0).sum().item()))
192
+ feature_shifts.append((feature_names[atac_start + i], 0.0)) # Add with 0 importance
193
+ else:
194
+ perturbed_dataset = MultiModalDataset((rna_input, perturbed_atac, flux_input), b, y)
195
+ perturbed_latent, _, _ = get_latent_space_cached(models, fold_results, perturbed_dataset, device, batch_size=64, common_samples=common_samples)
196
+ shift = measure_shift(original_latent, perturbed_latent)
197
+ feature_shifts.append((feature_names[atac_start + i], shift))
198
+
199
+ if analyse_features in ['Flux', 'all']:
200
+ print("Permuting Flux features with", perturb_type)
201
+ print("Analyzing Flux features")
202
+ for i in tqdm(range(flux_input.shape[1])):
203
+ perturbed_flux, insufficient_samples = perturb_feature(flux_input, i, 'shuffle_all', perturbation_scale, min_samples_threshold=min_samples_threshold)
204
+ if insufficient_samples:
205
+ skipped_features.append((feature_names[flux_start + i], "Flux", (flux_input[:, i] != 0).sum().item()))
206
+ feature_shifts.append((feature_names[flux_start + i], 0.0)) # Add with 0 importance
207
+ else:
208
+ perturbed_dataset = MultiModalDataset((rna_input, atac_input, perturbed_flux), b, y)
209
+ perturbed_latent, _, _ = get_latent_space_cached(models, fold_results, perturbed_dataset, device, batch_size=64, common_samples=common_samples)
210
+ shift = measure_shift(original_latent, perturbed_latent)
211
+ feature_shifts.append((feature_names[flux_start + i], shift))
212
+
213
+ # Log skipped features
214
+ if skipped_features:
215
+ print(f"\nSkipped {len(skipped_features)} features due to insufficient samples (< {min_samples_threshold}):")
216
+ for feature_name, modality, sample_count in skipped_features:
217
+ print(f" {feature_name} ({modality}): {sample_count} samples")
218
+
219
+ return sorted(feature_shifts, key=lambda x: x[1], reverse=True)
interpretation/metrics.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ from torch.utils.data import DataLoader, Subset
4
+ from sklearn.metrics import confusion_matrix
5
+ from models import SingleTransformer
6
+ from utils.helpers import create_multimodal_model
7
+
8
+
9
+ def compute_confusion_matrices(id, model_config, fold_results, dataset, device):
10
+ """
11
+ Get confusion matrices for each fold and aggregate them.
12
+ Args:
13
+ id (str): Model ID.
14
+ model_config (dict): Model configuration.
15
+ fold_results (list): List of dictionaries containing fold results.
16
+ cls_valid_loader (torch.utils.data.DataLoader): Validation data loader.
17
+ device (str): Device to use.
18
+ Returns:
19
+ list: List of confusion matrices for each fold and the aggregated confusion
20
+ matrix.
21
+ """
22
+ if id not in ['RNA', 'ATAC', 'Flux', 'Multi']:
23
+ raise ValueError("id must be one of 'RNA', 'ATAC', 'Flux', 'Multi'")
24
+ # Initialize an empty confusion matrix for aggregation
25
+ agg_cm = np.zeros((2, 2), dtype=int)
26
+ cms = []
27
+
28
+ for i, fold in enumerate(fold_results, 1):
29
+ model_path = fold['best_model_path']
30
+ state_dict = torch.load(model_path)
31
+ val_subset = Subset(dataset, fold['val_idx'])
32
+ cls_valid_loader = DataLoader(val_subset, batch_size=32, shuffle=False)
33
+
34
+ if id=='Multi':
35
+ model = create_multimodal_model(model_config, device, use_mlm=False)
36
+ else:
37
+ model = SingleTransformer(id, **model_config).to(device)
38
+
39
+ model.load_state_dict(state_dict, strict=True)
40
+ model.eval()
41
+
42
+ val_preds, val_labels = [], []
43
+ with torch.no_grad():
44
+ for inputs, bi, y in cls_valid_loader:
45
+ if isinstance(inputs, list):
46
+ rna= inputs[0].to(device)
47
+ atac = inputs[1].to(device)
48
+ flux = inputs[2].to(device)
49
+ inputs = (rna, atac, flux)
50
+ else:
51
+ inputs = inputs.to(device)
52
+ bi, y = bi.to(device), y.to(device)
53
+
54
+ preds, _ = model(inputs, bi)
55
+ preds = preds.cpu().numpy()
56
+ val_preds.append(preds)
57
+ val_labels.append(y.cpu().numpy())
58
+
59
+ val_preds = np.concatenate(val_preds).ravel()
60
+ val_labels = np.concatenate(val_labels).ravel()
61
+
62
+ binary_preds = (val_preds >= 0.5).astype(int)
63
+ # print(f"Fold {i} Confusion Matrix:", val_preds)
64
+ cm = confusion_matrix(val_labels, binary_preds)
65
+ agg_cm += cm
66
+ cms.append(cm)
67
+
68
+ cms.append(agg_cm)
69
+ return cms
70
+
71
+
72
+ def compute_metrics_from_confusion_matrix(cm):
73
+ """
74
+ Compute classification metrics from a confusion matrix.
75
+ Args:
76
+ cm (np.array): Confusion matrix.
77
+ Returns:
78
+ dict: Dictionary containing classification metrics.
79
+ """
80
+ # in cm results of 5 folds are saved in a list. compute this metrics for each fold
81
+ # then return the average of them and the std
82
+ metrics_list = []
83
+ for fold_cm in cm[:-1]: # Exclude the aggregated confusion matrix
84
+ tn, fp, fn, tp = fold_cm.ravel()
85
+ precision = tp / (tp + fp) if tp + fp > 0 else 0
86
+ recall = tp / (tp + fn) if tp + fn > 0 else 0
87
+ f1 = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
88
+ accuracy = (tp + tn) / (tp + tn + fp + fn) if tp + tn + fp + fn > 0 else 0
89
+ metrics_list.append({
90
+ 'precision': precision,
91
+ 'recall': recall,
92
+ 'f1': f1,
93
+ 'accuracy': accuracy,
94
+ })
95
+
96
+ avg_metrics = {
97
+ 'precision': np.mean([m['precision'] for m in metrics_list]),
98
+ 'recall': np.mean([m['recall'] for m in metrics_list]),
99
+ 'f1': np.mean([m['f1'] for m in metrics_list]),
100
+ 'accuracy': np.mean([m['accuracy'] for m in metrics_list]),
101
+ }
102
+
103
+ std_metrics = {
104
+ 'precision': np.std([m['precision'] for m in metrics_list]),
105
+ 'recall': np.std([m['recall'] for m in metrics_list]),
106
+ 'f1': np.std([m['f1'] for m in metrics_list]),
107
+ 'accuracy': np.std([m['accuracy'] for m in metrics_list]),
108
+ }
109
+
110
+ return {
111
+ 'average': avg_metrics,
112
+ 'std': std_metrics,
113
+ }
interpretation/predictions.py ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Validation Results Analysis
3
+ This module provides functions to create comprehensive DataFrames containing
4
+ sample-level predictions, labels, and metadata from cross-validation results.
5
+ """
6
+
7
+ import pandas as pd
8
+ import numpy as np
9
+ import torch
10
+ from torch.utils.data import DataLoader, Subset
11
+ from utils.helpers import create_multimodal_model
12
+ from models import SingleTransformer
13
+
14
+ def get_sample_predictions_dataframe(
15
+ model_type,
16
+ multimodal_dataset,
17
+ fold_results,
18
+ model_config,
19
+ device='cpu',
20
+ batch_size=32,
21
+ adata_rna=None,
22
+ adata_atac=None,
23
+ threshold=0.5
24
+ ):
25
+ """
26
+ Creates a comprehensive DataFrame with sample-level predictions and metadata.
27
+
28
+ Parameters
29
+ ----------
30
+ model_type : str
31
+ Type of model: 'Multi', 'RNA', 'ATAC', or 'Flux'
32
+ multimodal_dataset : MultiModalDataset
33
+ The multimodal dataset containing all samples
34
+ fold_results : list
35
+ List of fold result dictionaries from cross-validation
36
+ model_config : dict
37
+ Model configuration dictionary
38
+ device : str, optional
39
+ Device to run predictions on ('cpu', 'cuda', 'mps')
40
+ batch_size : int, optional
41
+ Batch size for predictions
42
+ adata_rna : AnnData, optional
43
+ RNA AnnData object for additional metadata
44
+ adata_atac : AnnData, optional
45
+ ATAC AnnData object for additional metadata
46
+ threshold : float, optional
47
+ Classification threshold for binary predictions (default: 0.5)
48
+
49
+ Returns
50
+ -------
51
+ pd.DataFrame
52
+ DataFrame with columns:
53
+ - ind: Sample index in the dataset
54
+ - fold: Fold number
55
+ - label_numeric: Actual label (0 or 1)
56
+ - label: Actual label name ('dead-end' or 'reprogramming')
57
+ - predicted_value: Predicted probability [0, 1]
58
+ - predicted_class_numeric: Predicted class (0 or 1)
59
+ - predicted_class: Predicted class name ('dead-end' or 'reprogramming')
60
+ - correct: Whether prediction matches label
61
+ - abs_error: Absolute error of prediction
62
+ - modality: Available modalities for this sample (e.g., 'RAF', 'A', 'RF')
63
+ - batch_no: Batch number
64
+ - pct: Percentage metadata (if available)
65
+ - clone_size: Clone size (if available)
66
+ - clone_id: Clone ID (if available)
67
+ - (additional RNA/ATAC metadata if adata objects provided)
68
+ """
69
+
70
+ # Collect all predictions across folds
71
+ all_predictions = []
72
+ all_labels = []
73
+ all_indices = []
74
+ all_folds = []
75
+
76
+ print(f"Processing {len(fold_results)} folds...")
77
+
78
+ for fold_idx, fold in enumerate(fold_results):
79
+ model_path = fold['best_model_path']
80
+ val_idx = fold['val_idx']
81
+
82
+ # Create validation subset
83
+ val_subset = Subset(multimodal_dataset, val_idx)
84
+ val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)
85
+
86
+ # Load model
87
+ if model_type == 'Multi':
88
+ model = create_multimodal_model(model_config, device, use_mlm=False)
89
+ else:
90
+ model = SingleTransformer(id=model_type, **model_config).to(device)
91
+
92
+ # Load weights
93
+ state_dict = torch.load(model_path, map_location='cpu')
94
+ model.load_state_dict(state_dict)
95
+ model = model.to(device)
96
+ model.eval()
97
+
98
+ # Get predictions
99
+ fold_preds = []
100
+ fold_labels = []
101
+
102
+ with torch.no_grad():
103
+ for batch in val_loader:
104
+ x, b, y = batch
105
+
106
+ if isinstance(x, list):
107
+ rna = x[0].to(device)
108
+ atac = x[1].to(device)
109
+ flux = x[2].to(device)
110
+ x = (rna, atac, flux)
111
+ else:
112
+ x = x.to(device)
113
+
114
+ b = b.to(device)
115
+
116
+ # Get predictions
117
+ preds, _ = model(x, b)
118
+ preds = preds.squeeze()
119
+
120
+ if preds.dim() == 0:
121
+ preds = preds.unsqueeze(0)
122
+ if y.dim() == 0:
123
+ y = y.unsqueeze(0)
124
+
125
+ fold_preds.extend(preds.cpu().numpy())
126
+ fold_labels.extend(y.numpy())
127
+
128
+ # Store results
129
+ all_predictions.extend(fold_preds)
130
+ all_labels.extend(fold_labels)
131
+ all_indices.extend(val_idx)
132
+ all_folds.extend([fold_idx + 1] * len(val_idx))
133
+
134
+ print(f" Fold {fold_idx + 1}: {len(val_idx)} samples processed")
135
+
136
+ # Convert to arrays
137
+ all_predictions = np.array(all_predictions)
138
+ all_labels = np.array(all_labels)
139
+ all_indices = np.array(all_indices)
140
+ all_folds = np.array(all_folds)
141
+
142
+ # Determine modality availability for each sample
143
+ modalities = _get_modality_info(multimodal_dataset, all_indices)
144
+
145
+ # Get additional metadata
146
+ df_indices = multimodal_dataset.df_indics if hasattr(multimodal_dataset, 'df_indics') else None
147
+ pcts = multimodal_dataset.pcts if hasattr(multimodal_dataset, 'pcts') else None
148
+ label_names = multimodal_dataset.label_names if hasattr(multimodal_dataset, 'label_names') else None
149
+
150
+ # Build base dataframe
151
+ samples_data = []
152
+
153
+ for i, (idx, pred, label, fold) in enumerate(zip(all_indices, all_predictions, all_labels, all_folds)):
154
+ # Compute error
155
+ abs_error = abs(label - pred)
156
+
157
+ # Determine if correct
158
+ pred_class = int(pred >= threshold)
159
+ is_correct = pred_class == int(label)
160
+
161
+ # Get batch number
162
+ batch_no = int(multimodal_dataset.batch_no[idx].item())
163
+
164
+ # Base sample info
165
+ sample_info = {
166
+ 'ind': idx,
167
+ 'fold': fold,
168
+ 'label_numeric': int(label),
169
+ 'label': 'reprogramming' if label == 1 else 'dead-end',
170
+ 'predicted_value': float(pred),
171
+ 'predicted_class_numeric': pred_class,
172
+ 'predicted_class': 'reprogramming' if pred_class == 1 else 'dead-end',
173
+ 'correct': int(is_correct),
174
+ 'abs_error': float(abs_error),
175
+ 'modality': modalities[i],
176
+ 'batch_no': batch_no,
177
+ }
178
+
179
+ # Add percentage if available
180
+ if pcts is not None:
181
+ sample_info['pct'] = float(pcts[idx])
182
+
183
+ # Add additional metadata from AnnData objects if available
184
+ if df_indices is not None and (adata_rna is not None or adata_atac is not None):
185
+ rna_id = df_indices.iloc[idx, 0] if df_indices.shape[1] > 0 else None
186
+ atac_id = df_indices.iloc[idx, 1] if df_indices.shape[1] > 1 else None
187
+
188
+ # Try to get metadata from RNA or ATAC
189
+ metadata_added = False
190
+
191
+ if adata_rna is not None and rna_id is not None and rna_id in adata_rna.obs.index:
192
+ obs = adata_rna.obs.loc[rna_id]
193
+ _add_obs_metadata(sample_info, obs)
194
+ metadata_added = True
195
+
196
+ if not metadata_added and adata_atac is not None and atac_id is not None and atac_id in adata_atac.obs.index:
197
+ obs = adata_atac.obs.loc[atac_id]
198
+ _add_obs_metadata(sample_info, obs)
199
+
200
+ samples_data.append(sample_info)
201
+
202
+ # Create DataFrame
203
+ df_samples = pd.DataFrame(samples_data)
204
+
205
+ # Sort by index for easier analysis
206
+ df_samples = df_samples.sort_values('ind').reset_index(drop=True)
207
+
208
+ print(f"\nTotal samples: {len(df_samples)}")
209
+ print(f"Correct predictions: {df_samples['correct'].sum()} ({100 * df_samples['correct'].mean():.2f}%)")
210
+ print(f"Mean absolute error: {df_samples['abs_error'].mean():.4f}")
211
+
212
+ return df_samples
213
+
214
+
215
+ def _get_modality_info(dataset, indices):
216
+ """
217
+ Determine which modalities are available for each sample.
218
+
219
+ Returns a list of modality strings:
220
+ - 'RAF': RNA, ATAC, Flux all available
221
+ - 'RA': RNA and ATAC available
222
+ - 'RF': RNA and Flux available
223
+ - 'AF': ATAC and Flux available
224
+ - 'R': Only RNA available
225
+ - 'A': Only ATAC available
226
+ - 'F': Only Flux available
227
+ """
228
+ modalities = []
229
+
230
+ for idx in indices:
231
+ # Check if each modality has data
232
+ has_rna = (dataset.rna_data[idx] != 0).any().item()
233
+ has_atac = (dataset.atac_data[idx] != 0).any().item()
234
+ has_flux = (dataset.flux_data[idx] != 0).any().item()
235
+
236
+ # Build modality string
237
+ modality = ''
238
+ if has_rna:
239
+ modality += 'R'
240
+ if has_atac:
241
+ modality += 'A'
242
+ if has_flux:
243
+ modality += 'F'
244
+
245
+ modalities.append(modality if modality else 'None')
246
+
247
+ return modalities
248
+
249
+
250
+ def _add_obs_metadata(sample_info, obs):
251
+ """Add metadata from AnnData obs to sample_info dictionary."""
252
+ metadata_fields = [
253
+ 'clone_size', 'clone_id', 'cells_RNA', 'cells_ATAC',
254
+ 'cells_RNA_D3', 'cells_ATAC_D3', 'n_genes', 'phase',
255
+ 'G2M_score', 'pct_counts_mt', 'total_counts'
256
+ ]
257
+
258
+ for field in metadata_fields:
259
+ if field in obs:
260
+ value = obs[field]
261
+ # Handle different data types
262
+ if pd.notna(value):
263
+ if isinstance(value, (int, float, np.integer, np.floating)):
264
+ sample_info[field] = value
265
+ else:
266
+ sample_info[field] = str(value)
267
+
268
+
269
+ def summarize_by_modality(df_samples):
270
+ """
271
+ Summarize prediction performance by modality.
272
+
273
+ Parameters
274
+ ----------
275
+ df_samples : pd.DataFrame
276
+ DataFrame from get_sample_predictions_dataframe
277
+
278
+ Returns
279
+ -------
280
+ pd.DataFrame
281
+ Summary statistics grouped by modality
282
+ """
283
+ summary = df_samples.groupby('modality').agg({
284
+ 'ind': 'count',
285
+ 'correct': 'mean',
286
+ 'abs_error': 'mean',
287
+ 'predicted_value': ['mean', 'std']
288
+ }).round(4)
289
+
290
+ summary.columns = ['n_samples', 'accuracy', 'mean_abs_error', 'mean_pred', 'std_pred']
291
+ summary = summary.reset_index()
292
+ summary = summary.sort_values('n_samples', ascending=False)
293
+
294
+ return summary
295
+
296
+
297
+ def summarize_by_fold(df_samples):
298
+ """
299
+ Summarize prediction performance by fold.
300
+
301
+ Parameters
302
+ ----------
303
+ df_samples : pd.DataFrame
304
+ DataFrame from get_sample_predictions_dataframe
305
+
306
+ Returns
307
+ -------
308
+ pd.DataFrame
309
+ Summary statistics grouped by fold
310
+ """
311
+ summary = df_samples.groupby('fold').agg({
312
+ 'ind': 'count',
313
+ 'correct': 'mean',
314
+ 'abs_error': 'mean',
315
+ 'predicted_value': ['mean', 'std']
316
+ }).round(4)
317
+
318
+ summary.columns = ['n_samples', 'accuracy', 'mean_abs_error', 'mean_pred', 'std_pred']
319
+ summary = summary.reset_index()
320
+
321
+ return summary
322
+ def get_misclassified_samples(df_samples):
323
+ """
324
+ Get only misclassified samples.
325
+
326
+ Parameters
327
+ ----------
328
+ df_samples : pd.DataFrame
329
+ DataFrame from get_sample_predictions_dataframe
330
+
331
+ Returns
332
+ -------
333
+ pd.DataFrame
334
+ DataFrame containing only misclassified samples
335
+ """
336
+ return df_samples[df_samples['correct'] == 0].copy()
337
+ def get_samples_by_modality(df_samples, modality):
338
+ """
339
+ Get samples filtered by modality.
340
+
341
+ Parameters
342
+ ----------
343
+ df_samples : pd.DataFrame
344
+ DataFrame from get_sample_predictions_dataframe
345
+ modality : str
346
+ Modality string (e.g., 'RAF', 'A', 'RF')
347
+
348
+ Returns
349
+ -------
350
+ pd.DataFrame
351
+ Filtered DataFrame
352
+ """
353
+ return df_samples[df_samples['modality'] == modality].copy()
354
+
355
+
356
+ if __name__ == "__main__":
357
+ # Example usage
358
+ print("This module provides functions to analyze validation results.")
359
+ print("Main function: get_sample_predictions_dataframe()")
interpretation/shapvalues.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from utils.helpers import create_multimodal_model
3
+ from models import SingleTransformer
4
+ from utils.helpers import get_all_modalities_available_samples
5
+ from data import create_dataset
6
+ import shap
7
+
8
+ def filter_ds(dataset, indices):
9
+ rna = dataset.rna_data[indices]
10
+ atac = dataset.atac_data[indices]
11
+ flux = dataset.flux_data[indices]
12
+ new_ds = create_dataset.MultiModalDataset((rna, atac, flux),
13
+ dataset.batch_no[indices],
14
+ dataset.labels[indices])
15
+ return new_ds
16
+
17
+ def get_background_data(id, dataset, samples=100, return_other_samples=False):
18
+ """
19
+ Get background data with balanced samples from each label
20
+ Args:
21
+ dataset: MultiModalDataset object
22
+ samples: Number of samples to get
23
+ return_other_samples: If True, return other samples as well
24
+ Returns:
25
+ new_ds: MultiModalDataset object with background samples
26
+ background_indices: Indices of background samples
27
+ other_ds: MultiModalDataset object with other samples
28
+ other_indices: Indices of other samples
29
+ """
30
+ if id not in ['RNA', 'ATAC', 'Flux', 'Multi']:
31
+ raise ValueError("id must be one of 'RNA', 'ATAC', 'Flux', 'Multi'")
32
+
33
+ if id == 'Multi':
34
+ dataset = get_all_modalities_available_samples(dataset)
35
+ labels = dataset.labels
36
+
37
+ # get a balance of samples between labels
38
+ samples_per_label = samples // len(torch.unique(labels))
39
+ background_indices = []
40
+ for label in torch.unique(labels):
41
+ label_indices = torch.where(labels == label)[0]
42
+ background_indices.extend(label_indices[:samples_per_label])
43
+ background_indices = torch.tensor(background_indices)
44
+ background_rna = dataset.rna_data[background_indices]
45
+ background_atac = dataset.atac_data[background_indices]
46
+ background_flux = dataset.flux_data[background_indices]
47
+ bg_ds = create_dataset.MultiModalDataset((background_rna, background_atac, background_flux),
48
+ dataset.batch_no[background_indices],
49
+ dataset.labels[background_indices])
50
+ if return_other_samples:
51
+ # create a new dataset of other samples
52
+ other_indices = torch.tensor([i for i in range(len(labels)) if i not in background_indices])
53
+ other_rna = dataset.rna_data[other_indices]
54
+ other_atac = dataset.atac_data[other_indices]
55
+ other_flux = dataset.flux_data[other_indices]
56
+ other_ds = create_dataset.MultiModalDataset((other_rna, other_atac, other_flux),
57
+ dataset.batch_no[other_indices],
58
+ dataset.labels[other_indices])
59
+ return bg_ds, background_indices, other_ds, other_indices
60
+ return bg_ds, background_indices
61
+ else:
62
+ raise ValueError("Not Implemented")
63
+
64
+ class ShapWrapper(torch.nn.Module):
65
+ def __init__(self, model):
66
+ super().__init__()
67
+ self.model = model
68
+ self.model.eval()
69
+
70
+ def forward(self, x):
71
+ inputs, b = x[:,:-2], x[:,-1].squeeze(-1).long()
72
+ inputs = (inputs[:,:944].long(), inputs[:,944:944+883].float(), inputs[:,944+883:].float())
73
+ preds, _ = self.model(inputs, b)
74
+ preds = torch.sigmoid(preds)
75
+ # print(preds.shape)
76
+ return preds
77
+
78
+ def compute_shap_values(id, fold_results, dataset, model_config, device):
79
+
80
+ if id not in ['RNA', 'ATAC', 'Flux', 'Multi']:
81
+ raise ValueError("id must be one of 'RNA', 'ATAC', 'Flux', 'Multi'")
82
+
83
+ all_shap_values = []
84
+
85
+ if id == 'Multi':
86
+ bg_ds, bg_idx, other_ds, other_idx = get_background_data(id, dataset, samples=50, return_other_samples=True)
87
+ print("total background samples: ", len(bg_idx), "total test samples: ", len(other_idx))
88
+
89
+ for fold in fold_results:
90
+ val_idx = fold['val_idx']
91
+ # filter val_idx if is in indices
92
+ val_idx = [i for i in val_idx if i in other_idx]
93
+
94
+ if len(val_idx) == 0:
95
+ print('No samples of the specified type in the validation set. Skipping...')
96
+ continue
97
+ else:
98
+ print(f'fold {fold["fold"]} -> {len(val_idx)} samples')
99
+
100
+ val_ds = filter_ds(dataset, val_idx)
101
+ val_loader = torch.utils.data.DataLoader(val_ds, batch_size=32, shuffle=False)
102
+
103
+ if id=='Multi':
104
+ model = create_multimodal_model(model_config, device, use_mlm=False)
105
+ else:
106
+ model = SingleTransformer(id=id, **model_config).to(device)
107
+
108
+ model_path = fold['best_model_path']
109
+ model.load_state_dict(torch.load(model_path))
110
+ model.eval()
111
+ wrapped_model = ShapWrapper(model).to(device)
112
+
113
+ bg_x = torch.cat([bg_ds.rna_data, bg_ds.atac_data, bg_ds.flux_data], dim=1).to(device)
114
+ bg_b = bg_ds.batch_no.to(device)
115
+ bgx = torch.cat([bg_x, bg_b[...,None]], dim=-1)
116
+ explainer = shap.GradientExplainer(wrapped_model, bgx)
117
+
118
+ inputs, batch_indices = (val_ds.rna_data, val_ds.atac_data, val_ds.flux_data), val_ds.batch_no
119
+
120
+ inputs = torch.cat([inputs[0], inputs[1], inputs[2]], dim=1).to(device)
121
+ batch_indices = batch_indices.to(device)
122
+ bgv = torch.cat([inputs, batch_indices[...,None]], dim=-1)
123
+ shap_values = explainer(bgv)
124
+ all_shap_values.append(shap_values)
125
+
126
+ return all_shap_values
interpretation/similarity.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from utils.helpers import get_token_embeddings
3
+
4
+ def compute_similarity_matrix(model, dataset, device):
5
+ """
6
+ Compute the similarity matrix for the dataset.
7
+ Args:
8
+ model (torch.nn.Module): Model.
9
+ dataset (torch.utils.data.Dataset): Dataset.
10
+ device (str): Device to use.
11
+ Returns:
12
+ np.ndarray: Similarity matrix.
13
+ """
14
+ embeddings = get_token_embeddings(model, dataset, device) # shape: (n_samples, seq_len, d_model)
15
+
16
+ # Compute the mean embedding for each token across all samples
17
+ mean_token_embeddings = embeddings.mean(dim=0) # shape: (seq_len, d_model)
18
+
19
+ # Normalize the mean token embeddings (for cosine similarity)
20
+ mean_token_embeddings = mean_token_embeddings / mean_token_embeddings.norm(dim=1, keepdim=True)
21
+
22
+ # Calculate cosine similarity for all pairs of tokens using matrix multiplication
23
+ similarity_matrix = torch.mm(mean_token_embeddings, mean_token_embeddings.T).cpu().numpy()
24
+
25
+
26
+ return similarity_matrix # Convert to numpy array if needed
interpretation/visualization.py ADDED
@@ -0,0 +1,560 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ from sklearn.metrics import roc_curve, roc_auc_score
6
+ from scipy import stats
7
+ from scipy.stats import ttest_rel
8
+ import pandas as pd
9
+
10
+ def plot_conf_matrix_mlm_vs_nomlm(cms_mlm, cms_nomlm, m_type, only_agg=True, suptitle="Confusion Matrix Comparison"):
11
+
12
+ labels = ['Dead-end', 'Reprogramming']
13
+
14
+ if only_agg:
15
+ # Plot only the aggregated confusion matrices (last one in each list)
16
+ cms_mlm_agg = cms_mlm[-1]
17
+ cms_nomlm_agg = cms_nomlm[-1]
18
+
19
+ f = plt.figure(figsize=(12, 5))
20
+ plt.suptitle(suptitle, fontsize=16)
21
+
22
+ # Plot confusion matrix for aggregated MLM
23
+ plt.subplot(1, 2, 1)
24
+ sns.heatmap(cms_mlm_agg, annot=True, cmap='Blues', fmt='g', xticklabels=labels, yticklabels=labels)
25
+ plt.xlabel('Predicted')
26
+ plt.ylabel('Actual')
27
+ plt.title('Confusion Matrix - MLM (Aggregated)')
28
+
29
+ # Plot confusion matrix for aggregated No MLM
30
+ plt.subplot(1, 2, 2)
31
+ sns.heatmap(cms_nomlm_agg, annot=True, cmap='Blues', fmt='g', xticklabels=labels, yticklabels=labels)
32
+ plt.xlabel('Predicted')
33
+ plt.ylabel('Actual')
34
+ plt.title('Confusion Matrix - No MLM (Aggregated)')
35
+
36
+ f.savefig(f'./figures/confusion_matrices_{m_type}.pdf', bbox_inches='tight')
37
+ plt.tight_layout()
38
+ plt.show()
39
+
40
+ else:
41
+ # Plot confusion matrices for each fold
42
+ n_folds = len(cms_mlm)
43
+ f = plt.figure(figsize=(15, 2 * n_folds)) # Adjust figure size according to the number of folds
44
+ plt.suptitle(suptitle, fontsize=16)
45
+
46
+ for i in range(n_folds):
47
+ # Plot confusion matrix for MLM in the first row (subplot)
48
+ plt.subplot(n_folds, 2, i*2 + 1) # First column (MLM)
49
+ sns.heatmap(cms_mlm[i], annot=True, cmap='Blues', fmt='g', xticklabels=labels, yticklabels=labels)
50
+ plt.xlabel('Predicted')
51
+ plt.ylabel('Actual')
52
+ plt.title(f'Confusion Matrix - MLM (Fold {i+1})')
53
+
54
+ # Plot confusion matrix for No MLM in the second column (subplot)
55
+ plt.subplot(n_folds, 2, i*2 + 2) # Second column (No MLM)
56
+ sns.heatmap(cms_nomlm[i], annot=True, cmap='Blues', fmt='g', xticklabels=labels, yticklabels=labels)
57
+ plt.xlabel('Predicted')
58
+ plt.ylabel('Actual')
59
+ plt.title(f'Confusion Matrix - No MLM (Fold {i+1})')
60
+
61
+ f.savefig(f'./figures/confusion_matrices_folds_{m_type}.pdf', bbox_inches='tight')
62
+ plt.tight_layout(rect=[0, 0, 1, 0.96])
63
+ plt.show()
64
+
65
+ def plot_training_vs_validation_losses(train_losses, val_losses, title="Losses"):
66
+ epochs = len(train_losses)
67
+ f = plt.figure(figsize=(10, 3))
68
+ plt.suptitle(title)
69
+ plt.subplot(1, 2, 1)
70
+ plt.plot(range(1, epochs+1), train_losses)
71
+ plt.xlabel('Epoch')
72
+ plt.ylabel('Train Loss')
73
+ plt.title('Train Loss')
74
+
75
+ plt.subplot(1, 2, 2)
76
+ plt.plot(range(1, epochs+1), val_losses)
77
+ plt.xlabel('Epoch')
78
+ plt.ylabel('Validation Loss')
79
+ plt.title('Validation Loss')
80
+
81
+ f.savefig('./figures/losses.pdf', bbox_inches='tight')
82
+ plt.tight_layout()
83
+ plt.show()
84
+
85
+ def plot_roc_auc_curve(val_preds, val_labels, m_type, aggregate=False):
86
+
87
+ if aggregate:
88
+ # Aggregate all folds into one list
89
+ all_labels = np.concatenate(val_labels).ravel()
90
+ all_preds = np.concatenate(val_preds).ravel()
91
+ auc = roc_auc_score(all_labels, all_preds)
92
+ fpr, tpr, _ = roc_curve(all_labels, all_preds)
93
+
94
+ f = plt.figure()
95
+ plt.plot(fpr, tpr, label=f'Aggregated AUC: {auc:.4f}')
96
+ plt.plot([0, 1], [0, 1], linestyle='--')
97
+ plt.xlabel('False Positive Rate')
98
+ plt.ylabel('True Positive Rate')
99
+ plt.title('ROC Curve (Aggregated)')
100
+ plt.legend()
101
+ f.savefig(f'./figures/roc_curve_{m_type}.pdf', bbox_inches='tight')
102
+ plt.show()
103
+
104
+ else:
105
+ # Plot AUC for each fold separately
106
+ f = plt.figure()
107
+ for i, (labels, preds) in enumerate(zip(val_labels, val_preds), 1):
108
+ auc = roc_auc_score(labels, preds)
109
+ fpr, tpr, _ = roc_curve(labels, preds)
110
+
111
+ plt.plot(fpr, tpr, label=f'Fold {i} AUC: {auc:.4f}')
112
+
113
+ plt.plot([0, 1], [0, 1], linestyle='--')
114
+ plt.xlabel('False Positive Rate')
115
+ plt.ylabel('True Positive Rate')
116
+ plt.title('ROC Curve (Each Fold)')
117
+ plt.legend()
118
+ f.savefig(f'./figures/roc_curve_{m_type}.pdf', bbox_inches='tight')
119
+ plt.show()
120
+
121
+
122
+ def plot_auc_boxplot_comparison(fold_results1, fold_results2, title="AUC Comparison"):
123
+ """Plot AUC box comparison between two models."""
124
+
125
+ train_auc_scores_mlm = [fold['train_auc'] for fold in fold_results1]
126
+ train_auc_scores_nomlm = [fold['train_auc'] for fold in fold_results2]
127
+ val_auc_scores_mlm = [fold['best_val_auc'] for fold in fold_results1]
128
+ val_auc_scores_nomlm = [fold['best_val_auc'] for fold in fold_results2]
129
+
130
+ train_p_value = ttest_rel(train_auc_scores_mlm, train_auc_scores_nomlm).pvalue
131
+ val_p_value = ttest_rel(val_auc_scores_mlm, val_auc_scores_nomlm).pvalue
132
+
133
+ df_train = pd.DataFrame({
134
+ 'Fold': [f'Fold {i+1}' for i in range(len(val_auc_scores_mlm))],
135
+ 'with MLM': train_auc_scores_mlm,
136
+ 'without MLM': train_auc_scores_nomlm,
137
+ })
138
+
139
+ df_valid = pd.DataFrame({
140
+ 'Fold': [f'Fold {i+1}' for i in range(len(val_auc_scores_mlm))],
141
+ 'with MLM': val_auc_scores_mlm,
142
+ 'without MLM': val_auc_scores_nomlm
143
+ })
144
+ f = plt.figure(figsize=(12, 8))
145
+ plt.suptitle(title)
146
+
147
+ plt.subplot(1, 2, 1)
148
+ sns.boxplot(data=df_train, palette=["#1f77b4", "#ff7f0e"]) # Custom colors
149
+ plt.title(f'Train AUC Comparison (p-value = {train_p_value:.4f})')
150
+ plt.ylabel('AUC')
151
+ plt.ylim(0.5, 1)
152
+
153
+ plt.subplot(1, 2, 2)
154
+ sns.boxplot(data=df_valid, palette=["#2ca02c", "#d62728"]) # Custom colors
155
+ plt.title(f'Validation AUC Comparison (p-value = {val_p_value:.4f})')
156
+ plt.ylabel('AUC')
157
+ plt.ylim(0.5, 1)
158
+
159
+ f.savefig('./figures/auc_comparison.pdf', bbox_inches='tight')
160
+ plt.tight_layout()
161
+ plt.show()
162
+
163
+ def plot_loss_comparison_mlm_vs_nomlm(fold_results1, fold_results2, title="Loss Comparison"):
164
+ """Plot loss comparison between two models."""
165
+
166
+ f = plt.figure(figsize=(12, 8))
167
+
168
+ for i, fold in enumerate(fold_results1):
169
+ train_losses_mlm = fold['metrics']['train_loss']
170
+ val_losses_mlm = fold['metrics']['val_loss']
171
+ train_losses_nomlm = fold_results2[i]['metrics']['train_loss']
172
+ val_losses_nomlm = fold_results2[i]['metrics']['val_loss']
173
+ epochs = range(1, len(train_losses_mlm) + 1)
174
+
175
+ plt.plot(epochs, train_losses_mlm, 'o-', label=f'Train Loss w/ Pre-Training - Fold {fold["fold"]}', alpha=0.5)
176
+ plt.plot(epochs, val_losses_mlm, 'x-', label=f'Validation Loss w/ Pre-Training - Fold {fold["fold"]}', alpha=0.5)
177
+ plt.plot(epochs, train_losses_nomlm, 'o--', label=f'Train Loss w/o Pre-Training - Fold {fold["fold"]}', alpha=0.5)
178
+ plt.plot(epochs, val_losses_nomlm, 'x--', label=f'Validation Loss w/o Pre-Training - Fold {fold["fold"]}', alpha=0.5)
179
+
180
+ plt.xlabel('Epochs')
181
+ plt.ylabel('Loss')
182
+ plt.title(title)
183
+ plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1))
184
+ f.savefig('./figures/loss_comparison.pdf', bbox_inches='tight')
185
+ plt.show()
186
+
187
+ def plot_fold_losses(fold_results, title="Losses"):
188
+ """Plot loss for each fold."""
189
+
190
+ f = plt.figure(figsize=(12, 8))
191
+
192
+ for i, fold in enumerate(fold_results):
193
+ train_losses = fold['metrics']['train_loss']
194
+ val_losses = fold['metrics']['val_loss']
195
+ epochs = range(1, len(train_losses) + 1)
196
+
197
+ plt.plot(epochs, train_losses, 'o-', label=f'Train Loss - Fold {fold["fold"]}', alpha=0.5)
198
+ plt.plot(epochs, val_losses, 'x-', label=f'Validation Loss - Fold {fold["fold"]}', alpha=0.5)
199
+
200
+ plt.xlabel('Epochs')
201
+ plt.ylabel('Loss')
202
+ plt.title(title)
203
+ plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1))
204
+ f.savefig('./figures/fold_losses.pdf', bbox_inches='tight')
205
+ plt.show()
206
+
207
+ def plot_data_distribution(adata_RNA, adata_ATAC, adata_Flux, title="Data Distribution"):
208
+ fig, axes = plt.subplots(1, 3, figsize=(15, 5))
209
+
210
+ plt.suptitle(title)
211
+
212
+ data = adata_RNA.X.toarray().flatten()
213
+ sns.histplot(data, bins=100, ax=axes[0], color='skyblue')
214
+ var, mean = data.var(), data.mean()
215
+ axes[0].set_title(f'RNA Distribution, var:{var:.2f}, mean:{mean:.2f}')
216
+ axes[0].set_xlabel('Expression level')
217
+ axes[0].set_ylabel('Frequency')
218
+
219
+ data = adata_ATAC.X.toarray().flatten()
220
+ sns.histplot(data, bins=100, ax=axes[1], color='lightgreen')
221
+ var, mean = data.var(), data.mean()
222
+ axes[1].set_title(f'ATAC Distribution, var:{var:.3f}, mean:{mean:.2f}')
223
+ axes[1].set_xlabel('Accessibility level')
224
+ axes[1].set_ylabel('Frequency')
225
+
226
+ data = adata_Flux.values.flatten()
227
+ sns.histplot(data, bins=100, ax=axes[2], color='salmon')
228
+ var, mean = data.var(), data.mean()
229
+ axes[2].set_title(f'Fluxomic Distribution, var:{var:.5f}, mean:{mean:.2f}')
230
+ axes[2].set_xlabel('Flux value')
231
+ axes[2].set_ylabel('Frequency')
232
+
233
+ fig.savefig('./figures/data_distribution.pdf', bbox_inches='tight')
234
+ plt.tight_layout()
235
+ plt.show()
236
+
237
+
238
+ def plot_att_weights(all_attention, dead_end_attention, reprogramming_attention,
239
+ feature_names=None, print_top_features=False, top_n=5, scale_weights=False, fix_scale=False,
240
+ use_mean_contribution=False):
241
+
242
+
243
+ print(all_attention.shape, "all_attention.shape")
244
+ print(dead_end_attention.shape, "dead_end_attention.shape")
245
+ print(reprogramming_attention.shape, "reprogramming_attention.shape")
246
+ def minmax_scale(arr):
247
+ arr = np.asarray(arr)
248
+ min_val = arr.min()
249
+ max_val = arr.max()
250
+ if max_val - min_val == 0:
251
+ return np.zeros_like(arr) # avoid divide by zero
252
+ return (arr - min_val) / (max_val - min_val)
253
+
254
+ avg_all_attention = all_attention.mean(axis=0) # Average attention weights across samples
255
+ avg_dead_end_attention = dead_end_attention.mean(axis=0)
256
+ avg_reprogramming_attention = reprogramming_attention.mean(axis=0)
257
+
258
+ # Store original unscaled versions for modality contribution calculation
259
+ avg_all_attention_orig = avg_all_attention.copy() if hasattr(avg_all_attention, 'copy') else np.array(avg_all_attention)
260
+ avg_dead_end_attention_orig = avg_dead_end_attention.copy() if hasattr(avg_dead_end_attention, 'copy') else np.array(avg_dead_end_attention)
261
+ avg_reprogramming_attention_orig = avg_reprogramming_attention.copy() if hasattr(avg_reprogramming_attention, 'copy') else np.array(avg_reprogramming_attention)
262
+
263
+ if scale_weights:
264
+ avg_all_attention = minmax_scale(avg_all_attention)
265
+ avg_dead_end_attention = minmax_scale(avg_dead_end_attention)
266
+ avg_reprogramming_attention = minmax_scale(avg_reprogramming_attention)
267
+ vmin, vmax = 0.0, 1.0
268
+ elif fix_scale: # fix scale of all attention weights to the same range
269
+ vmin, vmax = avg_all_attention.min(), avg_all_attention.max()
270
+ else:
271
+ vmin, vmax = None, None
272
+
273
+ # Visualize average attention weights
274
+ f = plt.figure(figsize=(15, 3))
275
+
276
+ divider1 = 945
277
+ divider2 = 945 + 884
278
+
279
+ def add_modality_labels(ax, attention_weights, attention_weights_orig, use_mean=False):
280
+
281
+ rna_weights = attention_weights_orig[:divider1]
282
+ atac_weights = attention_weights_orig[divider1:divider2]
283
+ flux_weights = attention_weights_orig[divider2:]
284
+
285
+ # Calculate metric based on method
286
+ if use_mean is False or use_mean == 'sum':
287
+ # Sum of all attention weights (original behavior)
288
+ rna_metric = rna_weights.sum()
289
+ atac_metric = atac_weights.sum()
290
+ flux_metric = flux_weights.sum()
291
+
292
+ elif use_mean is True or use_mean == 'mean':
293
+ # Mean attention per feature
294
+ rna_metric = rna_weights.mean()
295
+ atac_metric = atac_weights.mean()
296
+ flux_metric = flux_weights.mean()
297
+
298
+ elif use_mean == 'median':
299
+ # Median attention per feature (robust to zeros and outliers)
300
+ rna_metric = np.median(rna_weights)
301
+ atac_metric = np.median(atac_weights)
302
+ flux_metric = np.median(flux_weights)
303
+
304
+ elif use_mean == 'trimmed_mean':
305
+ # Trimmed mean: exclude lowest 25% and highest 5%
306
+ rna_metric = stats.trim_mean(rna_weights, proportiontocut=0.15) # removes 15% from each tail
307
+ atac_metric = stats.trim_mean(atac_weights, proportiontocut=0.15)
308
+ flux_metric = stats.trim_mean(flux_weights, proportiontocut=0.15)
309
+
310
+ elif use_mean == 'active_mean':
311
+ # Mean of only "active" features (attention > threshold)
312
+ threshold = np.percentile(attention_weights_orig, 25) # bottom 25% considered inactive
313
+
314
+ rna_active = rna_weights[rna_weights > threshold]
315
+ atac_active = atac_weights[atac_weights > threshold]
316
+ flux_active = flux_weights[flux_weights > threshold]
317
+
318
+ rna_metric = rna_active.mean() if len(rna_active) > 0 else 0
319
+ atac_metric = atac_active.mean() if len(atac_active) > 0 else 0
320
+ flux_metric = flux_active.mean() if len(flux_active) > 0 else 0
321
+
322
+ else:
323
+ raise ValueError(f"Invalid use_mean value: {use_mean}")
324
+
325
+ # # Normalize to percentages
326
+ # print(rna_metric, atac_metric, flux_metric, "rna_metric, atac_metric, flux_metric")
327
+ # total_metric = rna_metric + atac_metric + flux_metric
328
+ # rna_pct = (rna_metric / total_metric * 100) if total_metric > 0 else 0
329
+ # atac_pct = (atac_metric / total_metric * 100) if total_metric > 0 else 0
330
+ # flux_pct = (flux_metric / total_metric * 100) if total_metric > 0 else 0
331
+
332
+ # Calculate center positions for each modality
333
+ n_rna = divider1
334
+ n_atac = divider2 - divider1
335
+ n_flux = len(attention_weights) - divider2
336
+ rna_center = n_rna / 2
337
+ atac_center = divider1 + n_atac / 2
338
+ flux_center = divider2 + n_flux / 2
339
+ rna_metric_mean = rna_metric / n_rna
340
+ atac_metric_mean = atac_metric / n_atac
341
+ flux_metric_mean = flux_metric / n_flux
342
+
343
+ ax.text(rna_center, -0.3, f'Sum: {rna_metric:.3f}\nMean: {rna_metric_mean:.3f}', ha='center', va='bottom', fontsize=10, fontweight='bold')
344
+ ax.text(atac_center, -0.3, f'Sum: {atac_metric:.3f}\nMean: {atac_metric_mean:.3f}', ha='center', va='bottom', fontsize=10, fontweight='bold')
345
+ ax.text(flux_center, -0.3, f'Sum: {flux_metric:.3f}\nMean: {flux_metric_mean:.3f}', ha='center', va='bottom', fontsize=10, fontweight='bold')
346
+
347
+ plt.subplot(1, 3, 1)
348
+ ax1 = plt.gca()
349
+ sns.heatmap(avg_all_attention.reshape(1, -1), cmap='viridis', yticklabels=['All'], vmin=vmin, vmax=vmax, ax=ax1)
350
+ plt.title('Avg Att. W. (All Samples)')
351
+ plt.xlabel('Features')
352
+ plt.xticks([])
353
+ plt.axvline(x=divider1, color='red', linestyle='--', linewidth=2)
354
+ plt.axvline(x=divider2, color='red', linestyle='--', linewidth=2)
355
+ add_modality_labels(ax1, avg_all_attention, avg_all_attention_orig, use_mean=use_mean_contribution)
356
+
357
+ plt.subplot(1, 3, 2)
358
+ ax2 = plt.gca()
359
+ sns.heatmap(avg_dead_end_attention.reshape(1, -1), cmap='viridis', yticklabels=['Dead-end'], vmin=vmin, vmax=vmax, ax=ax2)
360
+ plt.title('Avg Att. W. (Dead-end Samples)')
361
+ plt.xlabel('Features')
362
+ plt.xticks([])
363
+ plt.axvline(x=divider1, color='red', linestyle='--', linewidth=2)
364
+ plt.axvline(x=divider2, color='red', linestyle='--', linewidth=2)
365
+ add_modality_labels(ax2, avg_dead_end_attention, avg_dead_end_attention_orig, use_mean=use_mean_contribution)
366
+
367
+ plt.subplot(1, 3, 3)
368
+ ax3 = plt.gca()
369
+ sns.heatmap(avg_reprogramming_attention.reshape(1, -1), cmap='viridis', yticklabels=['Reprogramming'], vmin=vmin, vmax=vmax, ax=ax3)
370
+ plt.title('Avg Att. W. (Reprogramming Samples)')
371
+ plt.xlabel('Features')
372
+ plt.xticks([])
373
+ plt.axvline(x=divider1, color='red', linestyle='--', linewidth=2)
374
+ plt.axvline(x=divider2, color='red', linestyle='--', linewidth=2)
375
+ add_modality_labels(ax3, avg_reprogramming_attention, avg_reprogramming_attention_orig, use_mean=use_mean_contribution)
376
+
377
+ # f.savefig('./figures/attention_weights.pdf', bbox_inches='tight')
378
+ plt.tight_layout()
379
+ plt.show()
380
+
381
+ if print_top_features:
382
+ def get_top_features(attention_weights, feature_names, top_n=top_n):
383
+ avg_attention = attention_weights.mean(axis=0).numpy() if hasattr(attention_weights, 'numpy') else attention_weights.mean(axis=0)
384
+ print(avg_attention.shape, len(feature_names))
385
+ top_indices = avg_attention.argsort()[-top_n:][::-1]
386
+ print(top_indices)
387
+ return [(feature_names[i], avg_attention[i]) for i in top_indices]
388
+
389
+ top_all = get_top_features(all_attention, feature_names)
390
+ top_dead_end = get_top_features(dead_end_attention, feature_names)
391
+ top_reprogramming = get_top_features(reprogramming_attention, feature_names)
392
+
393
+ print(f"Top {top_n} attended features (All samples):")
394
+ for feature, weight in top_all:
395
+ print(f"{feature}: {weight:.4f}", end=", ")
396
+
397
+ print(f"\nTop {top_n} attended features (Dead-end samples):")
398
+ for feature, weight in top_dead_end:
399
+ print(f"{feature}: {weight:.4f}", end=", ")
400
+
401
+ print(f"\nTop {top_n} attended features (Reprogramming samples):")
402
+ for feature, weight in top_reprogramming:
403
+ print(f"{feature}: {weight:.4f}", end=", ")
404
+ return f
405
+
406
+ def plot_att_weights_distribution(
407
+ all_attention, dead_end_attention, reprogramming_attention,
408
+ feature_names=None, plot_type='violin', top_n=5, print_means=False
409
+ ):
410
+ divider1 = 944 # RNA ends
411
+ divider2 = 944 + 883 # ATAC ends, Flux begins
412
+ divider1 = 945
413
+ divider2 = 945 + 884
414
+
415
+ # Prepare data for plotting
416
+ def prepare_modality_data(attention_weights, condition_name):
417
+ """Extract attention weights by modality"""
418
+ rna_weights = attention_weights[:, :divider1].flatten()
419
+ atac_weights = attention_weights[:, divider1:divider2].flatten()
420
+ flux_weights = attention_weights[:, divider2:].flatten()
421
+ return {
422
+ 'RNA': rna_weights,
423
+ 'ATAC': atac_weights,
424
+ 'Flux': flux_weights,
425
+ 'condition': condition_name,
426
+ }
427
+
428
+ all_data = prepare_modality_data(all_attention, 'All')
429
+ de_data = prepare_modality_data(dead_end_attention, 'Dead-end')
430
+ re_data = prepare_modality_data(reprogramming_attention, 'Reprogramming')
431
+
432
+ if plot_type in ['violin', 'box']:
433
+ # Create DataFrame for seaborn plotting
434
+ data_list = []
435
+ for condition_data in [all_data, de_data, re_data]:
436
+ condition = condition_data['condition']
437
+ for modality in ['RNA', 'ATAC', 'Flux']:
438
+ weights = condition_data[modality]
439
+ for weight in weights:
440
+ data_list.append({
441
+ 'Condition': condition,
442
+ 'Modality': modality,
443
+ 'Attention Weight': weight
444
+ })
445
+ df = pd.DataFrame(data_list)
446
+
447
+ # Create figure with subplots for each condition
448
+ f, axes = plt.subplots(1, 3, figsize=(18, 5))
449
+
450
+ conditions = ['All', 'Dead-end', 'Reprogramming']
451
+ colors = ['#1f77b4', '#ff7f0e', '#2ca02c'] # RNA, ATAC, Flux colors
452
+
453
+ # Optionally print means
454
+ if print_means:
455
+ print("Mean attention weight values per modality and per condition:")
456
+
457
+ for idx, (ax, condition) in enumerate(zip(axes, conditions)):
458
+ condition_df = df[df['Condition'] == condition]
459
+
460
+ if plot_type == 'violin':
461
+ sns.violinplot(data=condition_df, x='Modality', y='Attention Weight',
462
+ palette=colors, ax=ax)
463
+ else: # box
464
+ sns.boxplot(data=condition_df, x='Modality', y='Attention Weight',
465
+ palette=colors, ax=ax)
466
+
467
+ ax.set_title(f'{condition} Samples', fontsize=12, fontweight='bold')
468
+ ax.set_xlabel('Modality', fontsize=11)
469
+ ax.set_ylabel('Attention Weight', fontsize=11)
470
+ ax.grid(axis='y', alpha=0.3)
471
+
472
+ for i, modality in enumerate(['RNA', 'ATAC', 'Flux']):
473
+ mod_data = condition_df[condition_df['Modality'] == modality]['Attention Weight']
474
+ mean_val = mod_data.mean()
475
+ std_val = mod_data.std()
476
+ ax.hlines(mean_val, i - 0.4, i + 0.4, colors='red', linestyles='--',
477
+ linewidth=2, alpha=0.7, label='Mean' if i == 0 else '')
478
+ if print_means:
479
+ print(f"{condition} - {modality}: mean={mean_val:.8f}, std={std_val:.8f}")
480
+
481
+ if idx == 0:
482
+ ax.legend()
483
+
484
+ else:
485
+ raise ValueError(f"plot_type must be 'violin', 'box', or 'hist', got '{plot_type}'")
486
+
487
+ plt.tight_layout()
488
+ plt.show()
489
+
490
+ return f
491
+
492
+ def plot_att_heads(all_attention_heads, dead_end_attention_heads, reprogramming_attention_heads, stacked=False):
493
+ n_heads = all_attention_heads.shape[1] # Assuming the second dimension is the number of heads
494
+
495
+ if stacked:
496
+
497
+ # Visualize stacked attention weights
498
+ f = plt.figure(figsize=(15, 10)) # Adjust figure size
499
+
500
+ # Plot for "All Samples" attention weights (stacked)
501
+ plt.subplot(1, 3, 1)
502
+ stacked_all_attention = all_attention_heads.mean(axis=0).reshape(n_heads, -1) # Stack attention heads
503
+ sns.heatmap(stacked_all_attention, cmap='viridis', yticklabels=[f'Head {i+1}' for i in range(n_heads)])
504
+ plt.title('Stacked Attention Weights (All Samples)')
505
+ plt.xlabel('Features')
506
+ plt.ylabel('Heads')
507
+ plt.xticks(rotation=90)
508
+
509
+ # Plot for "Dead-end Samples" attention weights (stacked)
510
+ plt.subplot(1, 3, 2)
511
+ stacked_dead_end_attention = dead_end_attention_heads.mean(axis=0).reshape(n_heads, -1)
512
+ sns.heatmap(stacked_dead_end_attention, cmap='viridis', yticklabels=[f'Head {i+1}' for i in range(n_heads)])
513
+ plt.title('Stacked Attention Weights (Dead-end Samples)')
514
+ plt.xlabel('Features')
515
+ plt.ylabel('Heads')
516
+ plt.xticks(rotation=90)
517
+
518
+ # Plot for "Reprogramming Samples" attention weights (stacked)
519
+ plt.subplot(1, 3, 3)
520
+ stacked_reprogramming_attention = reprogramming_attention_heads.mean(axis=0).reshape(n_heads, -1)
521
+ sns.heatmap(stacked_reprogramming_attention, cmap='viridis', yticklabels=[f'Head {i+1}' for i in range(n_heads)])
522
+ plt.title('Stacked Attention Weights (Reprogramming Samples)')
523
+ plt.xlabel('Features')
524
+ plt.ylabel('Heads')
525
+ plt.xticks(rotation=90)
526
+
527
+ f.savefig('./figures/attention_heads_stacked.pdf', bbox_inches='tight')
528
+ plt.tight_layout()
529
+ plt.show()
530
+
531
+ else:
532
+ # Visualize attention weights for each head
533
+ f = plt.figure(figsize=(15, 15)) # Adjusting the figure size to accommodate more subplots
534
+
535
+ # Plot for "All Samples" attention weights
536
+ for head in range(n_heads):
537
+ plt.subplot(n_heads, 3, 3 * head + 1) # (n_heads rows, 3 columns for each category)
538
+ sns.heatmap(all_attention_heads[:, head, :].mean(axis=0).reshape(1, -1), cmap='viridis', yticklabels=[f'Head {head+1}'])
539
+ plt.title(f'Head {head+1} Attention (All Samples)')
540
+ plt.xlabel('Features')
541
+ plt.xticks(rotation=90)
542
+
543
+ # Plot for "Dead-end Samples" attention weights
544
+ plt.subplot(n_heads, 3, 3 * head + 2)
545
+ sns.heatmap(dead_end_attention_heads[:, head, :].mean(axis=0).reshape(1, -1), cmap='viridis', yticklabels=[f'Head {head+1}'])
546
+ plt.title(f'Head {head+1} Attention (Dead-end Samples)')
547
+ plt.xlabel('Features')
548
+ plt.xticks(rotation=90)
549
+
550
+ # Plot for "Reprogramming Samples" attention weights
551
+ plt.subplot(n_heads, 3, 3 * head + 3)
552
+ sns.heatmap(reprogramming_attention_heads[:, head, :].mean(axis=0).reshape(1, -1), cmap='viridis', yticklabels=[f'Head {head+1}'])
553
+ plt.title(f'Head {head+1} Attention (Reprogramming Samples)')
554
+ plt.xlabel('Features')
555
+ plt.xticks(rotation=90)
556
+
557
+ f.savefig('./figures/attention_heads.pdf', bbox_inches='tight')
558
+ plt.tight_layout()
559
+ plt.show()
560
+
models/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .transformers import SingleTransformer, MultiModalTransformer
models/transformers.py ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ import math
4
+
5
+
6
+ class CustomTransformerEncoderLayer(nn.TransformerEncoderLayer):
7
+ def __init__(self, *args, **kwargs):
8
+ super().__init__(*args, **kwargs)
9
+ def forward(self, src, src_mask=None, src_key_padding_mask=None):
10
+ # Obtain the output and attention weights directly from self.self_attn
11
+ src2, attn_weights = self.self_attn(
12
+ src, src, src,
13
+ attn_mask=src_mask,
14
+ key_padding_mask=src_key_padding_mask,
15
+ average_attn_weights=False,
16
+ need_weights=True
17
+ )
18
+ src = src + self.dropout1(src2)
19
+ src = self.norm1(src)
20
+ src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
21
+ src = src + self.dropout2(src2)
22
+ src = self.norm2(src)
23
+ return src, attn_weights
24
+
25
+ class SingleTransformer(nn.Module):
26
+
27
+ """
28
+ Transformer-based model for each modality.
29
+ Args:
30
+ vocab_size (int): Vocabulary size. (set 1 if projection is used.)
31
+ seq_len (int): Sequence length.
32
+ n_encoder_layers (int): Number of transformer encoder layers.
33
+ n_heads (int): Number of attention heads.
34
+ n_batches (int): Number of batches.
35
+ d_tokens (int): Dimension of the token embeddings.
36
+ d_ff (int): Dimension of the feedforward layer.
37
+ d_batch (int): Dimension of the batch embeddings.
38
+ dropout_rate (float, optional): Dropout rate. Defaults to 0.1.
39
+ Attributes:
40
+ count_embedding (torch.Tensor): Count embeddings.
41
+ id_embeddings (torch.Tensor): ID embeddings.
42
+ batch_embedding (nn.Embedding): Batch embeddings.
43
+ layer_norm (nn.LayerNorm): Layer normalization.
44
+ cls_token (torch.Tensor): CLS token.
45
+ encoder (nn.TransformerEncoder): Transformer encoder.
46
+ mask_output_layer (nn.Linear): Mask output layer.
47
+ cls_attention (nn.MultiheadAttention): Multihead attention for CLS token.
48
+ cls_norm1 (nn.LayerNorm): Layer normalization for CLS token.
49
+ cls_norm2 (nn.LayerNorm): Layer normalization for CLS token.
50
+ cls_ffn (nn.Sequential): Feedforward network for CLS token.
51
+ cls_output_layer (nn.Linear): Output layer for CLS token.
52
+ pretrained (bool): Flag indicating if pretrained weights are frozen.
53
+ Methods:
54
+ forward(x, batch_indices, masked_lm=False, return_attention=False, return_embeddings=False):
55
+ Forward pass of the module.
56
+ freeze_pretrained_weights():
57
+ Freeze the pretrained weights.
58
+ unfreeze_pretrained_weights():
59
+ Unfreeze the pretrained weights.
60
+ create_count_embeddings(max_count, embed_size):
61
+ Create count embeddings.
62
+ get_latent_space(inputs, batch_indices, batch_size=32):
63
+ Get the latent space representation and predictions.
64
+ """
65
+ def __init__(self, model_type, vocab_size, seq_len,
66
+ n_encoder_layers, n_heads, n_batches,
67
+ d_model, d_ff,
68
+ dropout_rate=0.0):
69
+ super(SingleTransformer, self).__init__()
70
+
71
+ if model_type not in ['RNA', 'ATAC', 'Flux']:
72
+ raise ValueError("model_type must be one of 'RNA', 'ATAC', 'Flux'")
73
+
74
+ self.model_type = model_type
75
+
76
+ if self.model_type == 'RNA':
77
+ self.count_embedding_fix = self.create_count_embeddings(vocab_size, d_model)
78
+ else:
79
+ self.count_embedding_proj = nn.Linear(1, d_model)
80
+
81
+ self.id_embeddings = nn.Parameter(torch.zeros(1, seq_len, d_model))
82
+ nn.init.normal_(self.id_embeddings, mean=0.0, std=0.02)
83
+ self.batch_embedding = nn.Embedding(n_batches, d_model)
84
+
85
+ self.layer_norm = nn.LayerNorm(d_model)
86
+ self.token_layer_norm = nn.LayerNorm(d_model)
87
+ self.batch_layer_norm = nn.LayerNorm(d_model)
88
+ # self.alpha = nn.Parameter(torch.tensor(1.0))
89
+ # self.beta = nn.Parameter(torch.tensor(1.0))
90
+
91
+ self.cls_token = nn.Parameter(torch.zeros(1, 1, d_model))
92
+ nn.init.normal_(self.cls_token, mean=0.0, std=0.02)
93
+
94
+ # encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=n_heads, dim_feedforward=d_ff, dropout=dropout_rate, batch_first=True)
95
+ encoder_layer = CustomTransformerEncoderLayer(
96
+ d_model=d_model,
97
+ nhead=n_heads,
98
+ dim_feedforward=d_ff,
99
+ dropout=dropout_rate,
100
+ batch_first=True
101
+ )
102
+ self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=n_encoder_layers)
103
+
104
+ self.mask_output_layer = nn.Linear(d_model, vocab_size)
105
+
106
+ self.cls_attention = nn.MultiheadAttention(embed_dim=d_model, num_heads=n_heads, batch_first=True)
107
+ self.cls_norm1 = nn.LayerNorm(d_model)
108
+ self.cls_norm2 = nn.LayerNorm(d_model)
109
+ self.cls_ffn = nn.Sequential(
110
+ nn.Linear(d_model, d_ff),
111
+ nn.ReLU(),
112
+ nn.Dropout(dropout_rate),
113
+ nn.Linear(d_ff, d_model)
114
+ )
115
+ self.dropout = nn.Dropout(dropout_rate)
116
+ self.cls_output_layer = nn.Linear(d_model, 1)
117
+
118
+ def forward(self, x, batch_indices, masked_lm=False, return_attention=False, return_embeddings=False, return_flow_attention=False):
119
+
120
+ # [batch_dim, seq_dim, embed_dim]
121
+
122
+ if self.model_type == 'RNA':
123
+ self.count_embedding_fix = self.count_embedding_fix.to(x.device)
124
+ x = x.long()
125
+ x = self.count_embedding_fix[x]
126
+ else:
127
+ x = x.unsqueeze(-1).float()
128
+ x = self.count_embedding_proj(x)
129
+
130
+ x = x + self.id_embeddings[:, :x.size(1), :]
131
+
132
+ batch_embeddings = self.batch_embedding(batch_indices).unsqueeze(1)#.expand(-1, x.size(1), -1) # repeat for the token dim
133
+
134
+ # token_embeddings = self.token_layer_norm(x)
135
+ # batch_embeddings = self.batch_layer_norm(batch_embeddings)
136
+ # x = token_embeddings + batch_embeddings
137
+ # print(batch_embeddings.shape, x.shape)
138
+ # print(torch.max(batch_embeddings.flatten()), torch.max(token_embeddings.flatten()))
139
+ # print(torch.min(batch_embeddings.flatten()), torch.min(token_embeddings.flatten()))
140
+ # print("===")
141
+ x = torch.cat((x, batch_embeddings), dim=1) #x + batch_embeddings #
142
+
143
+ x = self.layer_norm(x)
144
+
145
+ attention_flow = []
146
+ for layer in self.encoder.layers:
147
+ x, attn_weights = layer(x)
148
+ if return_flow_attention:
149
+ attention_flow.append(attn_weights)
150
+
151
+ other_tokens = x #self.encoder(x)
152
+
153
+ if return_embeddings:
154
+ return other_tokens, attention_flow
155
+
156
+ if masked_lm:
157
+ # exclude the batch embeddings
158
+ other_tokens = other_tokens[:, :-1, :]
159
+ return self.mask_output_layer(other_tokens)
160
+
161
+ cls_token = self.cls_token.expand(x.size(0), -1, -1) # repeat for the batch dim
162
+ attended_cls, attention_weights = self.cls_attention(cls_token, other_tokens, other_tokens, need_weights=True, average_attn_weights=False)
163
+ attended_cls = attended_cls.squeeze(1)
164
+
165
+ cls_output = self.cls_norm1(cls_token.squeeze(1) + self.dropout(attended_cls))
166
+ cls_output = self.cls_norm2(cls_output + self.dropout(self.cls_ffn(cls_output)))
167
+
168
+ preds = self.cls_output_layer(cls_output)
169
+ preds = torch.sigmoid(preds)
170
+
171
+ if return_flow_attention:
172
+ return preds, cls_output, attention_weights, attention_flow
173
+ elif return_attention:
174
+ return preds, cls_output, attention_weights
175
+ else:
176
+ return preds, cls_output
177
+
178
+ def freeze_pretrained_weights(self):
179
+ for name, param in self.named_parameters():
180
+ if not any(x in name for x in ['cls_attention', 'cls_norm', 'cls_ffn', 'cls_token', 'cls_ff_dim', 'cls_output_layer']):
181
+ param.requires_grad = False
182
+ self.pretrained = True
183
+
184
+ def unfreeze_pretrained_weights(self):
185
+ for param in self.parameters():
186
+ param.requires_grad = True
187
+ self.pretrained = False
188
+
189
+ def create_count_embeddings(self, max_count, embed_size):
190
+ embeddings = torch.zeros(max_count + 1, embed_size)
191
+ for i in range(max_count + 1):
192
+ embeddings[i] = torch.tensor([math.sin(i / (10000 ** (2 * (j // 2) / embed_size)))
193
+ if j % 2 == 0 else math.cos(i / (10000 ** (2 * (j // 2) / embed_size)))
194
+ for j in range(embed_size)])
195
+ return embeddings
196
+
197
+ def get_latent_space(self, inputs, batch_indices, batch_size=32):
198
+ """
199
+ Get the latent space representation and predictions.
200
+ Args:
201
+ inputs (torch.Tensor): Input tensor.
202
+ batch_indices (torch.Tensor): Batch indices tensor.
203
+ batch_size (int, optional): Batch size. Defaults to 32.
204
+ Returns:
205
+ torch.Tensor: Latent space representation.
206
+ torch.Tensor: Predictions.
207
+ """
208
+ self.eval()
209
+ latent_space_list, preds_list = [], []
210
+ with torch.no_grad():
211
+ for i in range(0, inputs.shape[0], batch_size):
212
+ inputs_batch = inputs[i:i + batch_size].float()
213
+ batch_indices_batch = batch_indices[i:i + batch_size].int()
214
+ preds, reduced_dim = self(inputs_batch, batch_indices_batch)
215
+ latent_space_list.append(reduced_dim)
216
+ preds_list.append(preds)
217
+ latent_space = torch.cat(latent_space_list, dim=0)
218
+ preds = torch.cat(preds_list, dim=0)
219
+ return latent_space, preds
220
+
221
+
222
+ class MultiModalTransformer(nn.Module):
223
+ def __init__(self, rna_model, atac_model, flux_model, d_model, n_heads_cls, d_ff_cls, dropout_rate=0.0):
224
+ super(MultiModalTransformer, self).__init__()
225
+
226
+ self.rna_model = rna_model
227
+ self.atac_model = atac_model
228
+ self.flux_model = flux_model
229
+
230
+ self.cls_token = nn.Parameter(torch.zeros(1, 1, d_model))
231
+ nn.init.normal_(self.cls_token, mean=0.0, std=0.02)
232
+ # self.modality_embeddings = nn.Embedding(3, d_model)
233
+ self.layer_norm = nn.LayerNorm(d_model)
234
+
235
+ self.cls_attention = nn.MultiheadAttention(embed_dim=d_model, num_heads=n_heads_cls, dropout=dropout_rate, batch_first=True)
236
+ self.cls_norm1 = nn.LayerNorm(d_model)
237
+ self.cls_norm2 = nn.LayerNorm(d_model)
238
+ self.cls_ffn = nn.Sequential(
239
+ nn.Linear(d_model, d_ff_cls),
240
+ nn.ReLU(),
241
+ nn.Dropout(dropout_rate),
242
+ nn.Linear(d_ff_cls, d_model))
243
+ self.cls_output_layer = nn.Linear(d_model, 1)
244
+
245
+ self.dropout = nn.Dropout(dropout_rate)
246
+
247
+ def forward(self, x, batch_indices, return_attention=False, return_embeddings=False, return_flow_attention=False):
248
+ rna_input, atac_input, flux_input = x[0], x[1], x[2]
249
+
250
+ rna_tokens, rna_attention = self.rna_model(rna_input, batch_indices, return_embeddings=True, return_flow_attention=return_flow_attention) # [32, 944, 128]
251
+ atac_tokens, atac_attention = self.atac_model(atac_input, batch_indices, return_embeddings=True, return_flow_attention=return_flow_attention) # [32, 883, 128]
252
+ flux_tokens, flux_attention = self.flux_model(flux_input, batch_indices, return_embeddings=True, return_flow_attention=return_flow_attention) # [32, 168, 128]
253
+ # rna_tokens += self.modality_embeddings(torch.tensor([0]).to(rna_tokens.device))
254
+ # atac_tokens += self.modality_embeddings(torch.tensor([1]).to(atac_tokens.device))
255
+ # flux_tokens += self.modality_embeddings(torch.tensor([2]).to(flux_tokens.device))
256
+ other_tokens = torch.cat((rna_tokens, atac_tokens, flux_tokens), dim=-2) # [32, 1995, 128]
257
+
258
+ if return_embeddings:
259
+ return other_tokens
260
+
261
+ # create mask
262
+ rna_mask = (rna_input.sum(dim=1) != 0).float() # [32]
263
+ # b1 = rna_mask.sum()
264
+ atac_mask = (atac_input.sum(dim=1) != 0).float() # [32]
265
+ # b2 = atac_mask.sum()
266
+ flux_mask = (flux_input.sum(dim=1) != 0).float() # [32]
267
+
268
+ rna_mask = rna_mask.unsqueeze(-1).expand(-1, rna_tokens.size(1)) # [32, 944]
269
+ atac_mask = atac_mask.unsqueeze(-1).expand(-1, atac_tokens.size(1)) # [32, 883]
270
+ flux_mask = flux_mask.unsqueeze(-1).expand(-1, flux_tokens.size(1)) # [32, 168]
271
+ other_tokens_mask = torch.cat((rna_mask, atac_mask, flux_mask), dim=1) # [32, 1995]
272
+
273
+ other_tokens = self.layer_norm(other_tokens)
274
+ cls_token = self.cls_token.expand(other_tokens.size(0), -1, -1) # [32, 1, 128]
275
+ attended_cls, attention_weights = self.cls_attention(cls_token, other_tokens, other_tokens,
276
+ key_padding_mask=(1 - other_tokens_mask).bool(),
277
+ need_weights=True, average_attn_weights=False)
278
+
279
+ attended_cls = attended_cls.squeeze(1)
280
+ cls_output = self.cls_norm1(cls_token.squeeze(1) + self.dropout(attended_cls))
281
+ cls_output = self.cls_norm2(cls_output + self.dropout(self.cls_ffn(cls_output)))
282
+
283
+ preds = self.cls_output_layer(cls_output)
284
+
285
+ preds = torch.sigmoid(preds)
286
+
287
+ if return_flow_attention:
288
+ return preds, cls_output, {
289
+ 'rna': rna_attention,
290
+ 'atac': atac_attention,
291
+ 'flux': flux_attention,
292
+ 'cls': attention_weights
293
+ }
294
+ elif return_attention:
295
+ return preds, cls_output, attention_weights
296
+ else:
297
+ return preds, cls_output
298
+
299
+ def freeze_pretrained_weights(self):
300
+ self.rna_model.freeze_pretrained_weights()
301
+ self.atac_model.freeze_pretrained_weights()
302
+ self.flux_model.freeze_pretrained_weights()
303
+ for name, param in self.named_parameters():
304
+ if not any(x in name for x in ['cls_attention', 'cls_norm', 'cls_ffn', 'cls_token', 'cls_output_layer']):
305
+ param.requires_grad = False
306
+
307
+ def unfreeze_pretrained_weights(self):
308
+ self.rna_model.unfreeze_pretrained_weights()
309
+ self.atac_model.unfreeze_pretrained_weights()
310
+ self.flux_model.unfreeze_pretrained_weights()
311
+ for param in self.parameters():
312
+ param.requires_grad = True
313
+
314
+ def get_latent_space(self, X, batch_indices, batch_size=32):
315
+ self.eval()
316
+ latent_space_list, preds_list = [], []
317
+ rna_input, atac_input, flux_input = X[0], X[1], X[2]
318
+ with torch.no_grad():
319
+ for i in range(0, rna_input.shape[0], batch_size):
320
+ rna_input_batch = rna_input[i:i + batch_size].float()
321
+ atac_input_batch = atac_input[i:i + batch_size].float()
322
+ flux_input_batch = flux_input[i:i + batch_size].float()
323
+ batch_indices_batch = batch_indices[i:i + batch_size].int()
324
+ preds, reduced_dim = self((rna_input_batch, atac_input_batch, flux_input_batch), batch_indices_batch)
325
+ latent_space_list.append(reduced_dim)
326
+ preds_list.append(preds)
327
+ latent_space = torch.cat(latent_space_list, dim=0)
328
+ preds = torch.cat(preds_list, dim=0)
329
+ return latent_space, preds
330
+
331
+
332
+ if __name__=='__main__':
333
+ model = SingleTransformer(model_type='ATAC', vocab_size=1, seq_len=883, n_encoder_layers=2, n_heads=2, n_batches=3, d_tokens=508, d_ff=128, d_batch=4)
334
+ x = torch.rand(32, 883)
335
+ batch_indices = torch.randint(1, 3, (32,))
336
+ print(model(x, batch_indices, masked_lm=True).shape)
337
+ print(model(x, batch_indices, return_attention=True)[0].shape)
338
+ print(model(x, batch_indices, return_embeddings=True).shape)
339
+ print(model(x, batch_indices).shape)
notebooks/analysis_plots.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
objects/degs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53d798eebf646f4c238db5a5a41e23e4c1ea47a950d0ed412e9ec4bae0bda3f3
3
+ size 185265
objects/fi_shift_atac.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db1ce30c3708df8397ffa064d0c4d9a8ff4b9514d02c8f64d721411f21a69b98
3
+ size 25453
objects/fi_shift_flux.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5d75180a0d4ecf5ef7aadc4ca10ed7023c742fec6b0326c3f289341803874b0
3
+ size 7687
objects/fi_shift_rna.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0231c7701e898c975488279721546846fa919a471666ceabaaf90d8778050e46
3
+ size 24332
objects/fold_results_multi.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb4c67a606b1af107afb9061830971126b82c3cf7e2b78c431f668a380102dc5
3
+ size 50371
objects/mutlimodal_dataset.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09ffeeee89cfc06b4d0858434ba60c3eab008ba70bf5ea27101a6ff6c1ec2376
3
+ size 33966477
utils/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from . import helpers
2
+ from . import losses
3
+ from .losses import MLMLoss
4
+ from .helpers import create_masked_input, create_multimodal_model, get_max, get_token_embeddings
utils/helpers.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.utils.data import TensorDataset, DataLoader
3
+ import numpy as np
4
+ from models import SingleTransformer, MultiModalTransformer
5
+ import config
6
+ from data import create_dataset
7
+
8
+ def create_masked_input(input_tensor, mask_token, mask_prob=0.20):
9
+ """
10
+ Creates a masked input tensor by randomly replacing elements with a mask token.
11
+ Args:
12
+ input_tensor (torch.Tensor): The input tensor to be masked.
13
+ mask_token: The token to be used for masking.
14
+ mask_prob (float, optional): The probability of masking an element. Defaults to 0.20.
15
+ Returns:
16
+ torch.Tensor: The masked input tensor.
17
+ torch.Tensor: A boolean mask indicating which elements were masked.
18
+ """
19
+
20
+ mask = torch.rand(input_tensor.shape) < mask_prob
21
+ masked_input = input_tensor.clone()
22
+ masked_input[mask] = mask_token
23
+ return masked_input, mask
24
+
25
+ def get_max(adata):
26
+ """
27
+ Get the maximum value in the data.
28
+ Args:
29
+ adata (list): A list of AnnData objects.
30
+ Returns:
31
+ float: The maximum value in the list data.
32
+ """
33
+ assert(isinstance(adata, list)), "adata must be a list of AnnData objects."
34
+ x_s = []
35
+ for i in adata:
36
+ X = torch.tensor(i.X.toarray().copy())
37
+ x_s.append(np.array(X).flatten().max())
38
+ return max(x_s)
39
+
40
+ def get_token_embeddings(model, dataset, device):
41
+ """
42
+ Get the token embeddings for the dataset.
43
+ Args:
44
+ model (torch.nn.Module): Model.
45
+ dataset (torch.utils.data.Dataset): Dataset.
46
+ device (str): Device to use.
47
+ Returns:
48
+ torch.Tensor: Embeddings.
49
+ """
50
+ model.eval()
51
+ embeddings = []
52
+ loader = DataLoader(dataset, batch_size=32, shuffle=False)
53
+ with torch.no_grad():
54
+ for batch in loader:
55
+ if len(batch) == 3:
56
+ inputs, bi, _ = batch
57
+ elif len(batch) == 2:
58
+ inputs, bi = batch
59
+ if isinstance(inputs, list):
60
+ rna= inputs[0].to(device)
61
+ atac = inputs[1].to(device)
62
+ flux = inputs[2].to(device)
63
+ inputs = (rna, atac, flux)
64
+ else:
65
+ inputs = inputs.to(device)
66
+ bi = bi.to(device)
67
+
68
+ output = model(inputs, bi, return_embeddings=True)
69
+ embeddings.append(output.cpu().detach())
70
+
71
+ # Concatenate embeddings across batches
72
+ embeddings = torch.cat(embeddings, dim=0) # shape: (n_samples, seq_len, d_model)
73
+ return embeddings
74
+
75
+ def get_all_modalities_available_samples(dataset):
76
+
77
+ rna = dataset.rna_data
78
+ atac = dataset.atac_data
79
+ flux = dataset.flux_data
80
+ mask = (rna != 0).any(axis=1) & (atac != 0).any(axis=1) & (flux != 0).any(axis=1)
81
+ new_ds = create_dataset.MultiModalDataset((rna[mask], atac[mask], flux[mask]),
82
+ dataset.batch_no[mask],
83
+ dataset.labels[mask])
84
+ return new_ds
85
+
86
+ def separate_dataset(ds):
87
+ """
88
+ Separate a dataset into two groups based on the labels.
89
+ Args:
90
+ ds (TensorDataset): Dataset.
91
+ Returns:
92
+ TensorDataset: Dataset with label 0.
93
+ TensorDataset: Dataset with label 1.
94
+ """
95
+ X, b, y = ds.tensors
96
+
97
+ # Create masks for labels 0 and 1
98
+ mask_0 = (y == 0)
99
+ mask_1 = (y == 1)
100
+
101
+ # Filter the tensors based on the masks
102
+ X_0, b_0, y_0 = X[mask_0], b[mask_0], y[mask_0]
103
+ X_1, b_1, y_1 = X[mask_1], b[mask_1], y[mask_1]
104
+
105
+ # Create new datasets for each group
106
+ dataset_0 = TensorDataset(X_0, b_0, y_0) # Dataset with y == 0
107
+ dataset_1 = TensorDataset(X_1, b_1, y_1)
108
+
109
+ return dataset_0, dataset_1
110
+
111
+ def create_multimodal_model(model_config, device, use_mlm=False):
112
+ """
113
+ Create a multimodal model.
114
+ Args:
115
+ model_config (dict): Model configuration.
116
+ device (str): Device to use.
117
+ use_mlm (bool, optional): Whether to use MLM pretraining. Defaults to False.
118
+ Returns:
119
+ MultiModalTransformer: Multimodal model.
120
+ """
121
+ model_config_rna, model_config_atac, model_config_flux = model_config['RNA'], model_config['ATAC'], model_config['Flux']
122
+ share_config, model_config_multi = model_config['Share'], model_config['Multi']
123
+ rna_model = SingleTransformer("RNA", **model_config_rna, **share_config).to(device)
124
+ atac_model = SingleTransformer("ATAC", **model_config_atac, **share_config).to(device)
125
+ flux_model = SingleTransformer("Flux", **model_config_flux, **share_config).to(device)
126
+ if use_mlm:
127
+ rna_model.load_state_dict(torch.load(config.MLM_RNA_CKP), strict=False)
128
+ atac_model.load_state_dict(torch.load(config.MLM_ATAC_CKP), strict=False)
129
+ flux_model.load_state_dict(torch.load(config.MLM_FLUX_CKP), strict=False)
130
+ # print("Loaded MLM pretraining weights.: \n RNA: {}, ATAC: {}, Flux: {}".format(config.MLM_RNA_CKP, config.MLM_ATAC_CKP, config.MLM_FLUX_CKP))
131
+ model = MultiModalTransformer(rna_model, atac_model, flux_model, **model_config_multi).to(device)
132
+ return model
utils/losses.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from torch import nn
2
+
3
+ class MLMLoss(nn.Module):
4
+ """
5
+ Masked Language Modeling loss.
6
+ """
7
+ def __init__(self, mse_based=False):
8
+ super(MLMLoss, self).__init__()
9
+ self.mse_based = mse_based
10
+ if self.mse_based:
11
+ self.loss_fn = nn.MSELoss(reduction='none')
12
+ else:
13
+ self.loss_fn = nn.CrossEntropyLoss(reduction='none')
14
+
15
+ def forward(self, predictions, targets, mask):
16
+ if self.mse_based:
17
+ predictions = predictions.squeeze(-1)
18
+ else:
19
+ predictions = predictions.permute(0, 2, 1) # (batch_size, vocab_size, seq_len)
20
+ targets = targets.long()
21
+
22
+ masked_loss = self.loss_fn(predictions, targets)
23
+ masked_loss = masked_loss * mask.float()
24
+ return masked_loss.sum() / mask.sum()