svincoff commited on
Commit
f936712
·
1 Parent(s): a887ffc
configs/data_task/fimo/pre_fimo.yaml CHANGED
@@ -2,5 +2,7 @@ name: pre_fimo
2
  type: fimo
3
 
4
  input_csv: dpacman/data_files/processed/remap/remap2022_crm_macs2_hg38_v1_0_clean.tsv
5
- output_csv: dpacman/data_files/processed/fimo/remap2022_crm_fimo_input.tsv
6
- window_total: 500
 
 
 
2
  type: fimo
3
 
4
  input_csv: dpacman/data_files/processed/remap/remap2022_crm_macs2_hg38_v1_0_clean.tsv
5
+ output_csv: dpacman/data_files/processed/fimo/remap2022_crm_fimo_input.csv
6
+ window_total: 500
7
+
8
+ save_example_files: true
configs/data_task/fimo/run_fimo.yaml CHANGED
@@ -1,6 +1,8 @@
1
  name: post_fimo
2
  type: fimo
3
 
 
 
4
  paths:
5
  input_csv: dpacman/data_files/processed/fimo/remap2022_crm_fimo_input.tsv
6
  output_csv: dpacman/data_files/processed/fimo/remap2022_crm_fimo_output.csv
 
1
  name: post_fimo
2
  type: fimo
3
 
4
+ debug: false
5
+
6
  paths:
7
  input_csv: dpacman/data_files/processed/fimo/remap2022_crm_fimo_input.tsv
8
  output_csv: dpacman/data_files/processed/fimo/remap2022_crm_fimo_output.csv
dpacman/data_tasks/fimo/pre_fimo.py CHANGED
@@ -10,7 +10,6 @@ from pathlib import Path
10
  root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
11
  logger = logging.getLogger(__name__)
12
 
13
-
14
  def main(cfg: DictConfig):
15
  # 1) load
16
  input_path = Path(root) / cfg.data_task.input_csv
@@ -48,7 +47,7 @@ def main(cfg: DictConfig):
48
  "chromEnd", # original ChIPEnd
49
  "contextEnd",
50
  "score", # original score column
51
- "TF",
52
  ]
53
  ].rename(
54
  columns={
@@ -63,9 +62,26 @@ def main(cfg: DictConfig):
63
  output_path = Path(root) / cfg.data_task.output_csv
64
  os.makedirs(output_path.parent, exist_ok=True)
65
 
66
- # 8) write tsv
67
- out.to_csv(output_path, sep="\t", index=False)
68
- print(f"Wrote {len(out)} rows to {output_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
 
71
  if __name__ == "__main__":
 
10
  root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
11
  logger = logging.getLogger(__name__)
12
 
 
13
  def main(cfg: DictConfig):
14
  # 1) load
15
  input_path = Path(root) / cfg.data_task.input_csv
 
47
  "chromEnd", # original ChIPEnd
48
  "contextEnd",
49
  "score", # original score column
50
+ "TR",
51
  ]
52
  ].rename(
53
  columns={
 
62
  output_path = Path(root) / cfg.data_task.output_csv
63
  os.makedirs(output_path.parent, exist_ok=True)
64
 
65
+ # 8) write csv
66
+ out.to_csv(output_path, index=False)
67
+ logger.info(f"Wrote {len(out)} rows to {output_path}")
68
+
69
+ # 9) write example csv if necessary
70
+ if cfg.data_task.save_example_files:
71
+ example_dir = output_path.parent / "examples"
72
+ os.makedirs(example_dir, exist_ok=True)
73
+ output_csv_name = cfg.data_task.output_csv.split("/")[-1]
74
+ example_savepath = os.path.join(
75
+ example_dir, "example500_" + output_csv_name
76
+ )
77
+
78
+ if not (os.path.exists(example_savepath)):
79
+ out.sample(n=500, random_state=42).reset_index(drop=True).to_csv(
80
+ example_savepath, sep="\t", index=False
81
+ )
82
+ logger.info(
83
+ f"Saved example FIMO input file with 500 rows to: {example_savepath}"
84
+ )
85
 
86
 
87
  if __name__ == "__main__":
environment.yaml CHANGED
@@ -25,7 +25,7 @@ dependencies:
25
  - dask[complete]
26
  - pip>=23
27
  - pip:
28
- - rootutils
29
  - hydra-core==1.3.2 # Hydra for config management
30
  - hydra-colorlog==1.2.0 # Allow colorful logging in Hydra
31
  - omegaconf==2.3.0 # Required by hydra-core
@@ -34,5 +34,6 @@ dependencies:
34
  - pymex==0.9.31
35
  - gitpython==3.1.44
36
  - black==25.1.0 # code formatter
 
37
  - matplotlib==3.10.3
38
  - -e .
 
25
  - dask[complete]
26
  - pip>=23
27
  - pip:
28
+ - rootutils==1.0.7
29
  - hydra-core==1.3.2 # Hydra for config management
30
  - hydra-colorlog==1.2.0 # Allow colorful logging in Hydra
31
  - omegaconf==2.3.0 # Required by hydra-core
 
34
  - pymex==0.9.31
35
  - gitpython==3.1.44
36
  - black==25.1.0 # code formatter
37
+ - tqdm==4.67.1
38
  - matplotlib==3.10.3
39
  - -e .