File size: 785 Bytes
80b6a2c 29899b4 9da03b7 29899b4 80b6a2c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | name: remap
type: split
max_protein_length: 1998
cluster_output_paths:
dna: dpacman/data_files/processed/mmseqs/outputs/fimo_hits_only/dna_full/mmseqs_cluster.tsv
protein: dpacman/data_files/processed/mmseqs/outputs/fimo_hits_only/protein/mmseqs_cluster.tsv
input_data_path: dpacman/data_files/processed/fimo/post_fimo/fimo_hits_only/remap2022_crm_fimo_output_q_processed_seed0.parquet
split_out_dir: dpacman/data_files/processed/splits
dna_map_path: dpacman/data_files/processed/fimo/post_fimo/fimo_hits_only/maps/dna_seqid_to_dna_sequence.json
split_by: dna # protein, dna, or both
test_trs: ["trseq23","trseq26","trseq17"]
test_dnas: null
augment_rc: true
test_ratio: 0.10
val_ratio: 0.10
train_ratio: 0.80
require_nonempty: true
ratio_tolerance: null
bigM: null
seed: 0 |