import rootutils import hydra from omegaconf import DictConfig import logging root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) logger = logging.getLogger(__name__) # import your processing entry points here from dpacman.data_tasks.download.genome import main as download_genome_main from dpacman.data_tasks.download.remap import main as download_remap_main from dpacman.data_tasks.clean.remap import main as clean_remap_main from dpacman.data_tasks.fimo.pre_fimo import main as pre_fimo_main from dpacman.data_tasks.fimo.run_fimo import main as run_fimo_main from dpacman.data_tasks.fimo.post_fimo import main as post_fimo_main from dpacman.data_tasks.cluster.remap import main as cluster_remap_main from dpacman.data_tasks.split.remap import main as split_remap_main from dpacman.data_tasks.embeddings.dna import main as embed_dna_main from dpacman.data_tasks.embeddings.protein import main as embed_protein_main @hydra.main( config_path=str(root / "configs"), config_name="preprocess", version_base="1.3" ) def main(cfg: DictConfig): task_type = cfg.data_task.type task_name = cfg.data_task.name.lower() logger.info(f"Running {task_type} task: {task_name}") # Download if task_type == "download": if task_name == "genome": download_genome_main(cfg) elif task_name == "remap": download_remap_main(cfg) else: raise ValueError(f"No download pipeline defined for: {task_name}") # Clean elif task_type == "clean": if task_name == "remap": clean_remap_main(cfg) else: raise ValueError(f"No clean pipeline defined for: {task_name}") # Fimo elif task_type == "fimo": if task_name == "pre_fimo": pre_fimo_main(cfg) elif task_name == "run_fimo": run_fimo_main(cfg) elif task_name == "post_fimo": post_fimo_main(cfg) else: raise ValueError(f"No clean pipeline defined for: {task_name}") # Cluster elif task_type == "cluster": if task_name == "remap": cluster_remap_main(cfg) else: raise ValueError(f"No clean pipeline defined for: {task_name}") # Split elif task_type == "split": if task_name == "remap": split_remap_main(cfg) else: raise ValueError(f"No clean pipeline defined for: {task_name}") # Embed elif task_type == "embeddings": if task_name == "dna": embed_dna_main(cfg) elif task_name == "protein": embed_protein_main(cfg) else: raise ValueError(f"No clean pipeline defined for: {task_name}") # Unknown - error else: raise ValueError(f"Unknown task type: {task_type}") if __name__ == "__main__": main()