name: remap type: split max_protein_length: 1998 cluster_output_paths: dna: dpacman/data_files/processed/mmseqs/outputs/fimo_hits_only/dna_full/mmseqs_cluster.tsv protein: dpacman/data_files/processed/mmseqs/outputs/fimo_hits_only/protein/mmseqs_cluster.tsv input_data_path: dpacman/data_files/processed/fimo/post_fimo/fimo_hits_only/remap2022_crm_fimo_output_q_processed_seed0.parquet split_out_dir: dpacman/data_files/processed/splits dna_map_path: dpacman/data_files/processed/fimo/post_fimo/fimo_hits_only/maps/dna_seqid_to_dna_sequence.json split_by: dna # protein, dna, or both test_trs: ["trseq23","trseq26","trseq17"] test_dnas: null augment_rc: true test_ratio: 0.10 val_ratio: 0.10 train_ratio: 0.80 require_nonempty: true ratio_tolerance: null bigM: null seed: 0