File size: 785 Bytes
80b6a2c
 
 
 
 
 
 
 
 
 
 
 
29899b4
 
9da03b7
 
 
29899b4
80b6a2c
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
name: remap
type: split

max_protein_length: 1998

cluster_output_paths:
  dna: dpacman/data_files/processed/mmseqs/outputs/fimo_hits_only/dna_full/mmseqs_cluster.tsv
  protein: dpacman/data_files/processed/mmseqs/outputs/fimo_hits_only/protein/mmseqs_cluster.tsv

input_data_path: dpacman/data_files/processed/fimo/post_fimo/fimo_hits_only/remap2022_crm_fimo_output_q_processed_seed0.parquet
split_out_dir: dpacman/data_files/processed/splits

dna_map_path: dpacman/data_files/processed/fimo/post_fimo/fimo_hits_only/maps/dna_seqid_to_dna_sequence.json

split_by: dna # protein, dna, or both
test_trs: ["trseq23","trseq26","trseq17"]
test_dnas: null
augment_rc: true

test_ratio: 0.10
val_ratio: 0.10
train_ratio: 0.80

require_nonempty: true
ratio_tolerance: null
bigM: null

seed: 0