DPACMAN / configs /data_task /split /remap.yaml
svincoff's picture
added dropout and overfit prevention
9da03b7
raw
history blame contribute delete
785 Bytes
name: remap
type: split
max_protein_length: 1998
cluster_output_paths:
dna: dpacman/data_files/processed/mmseqs/outputs/fimo_hits_only/dna_full/mmseqs_cluster.tsv
protein: dpacman/data_files/processed/mmseqs/outputs/fimo_hits_only/protein/mmseqs_cluster.tsv
input_data_path: dpacman/data_files/processed/fimo/post_fimo/fimo_hits_only/remap2022_crm_fimo_output_q_processed_seed0.parquet
split_out_dir: dpacman/data_files/processed/splits
dna_map_path: dpacman/data_files/processed/fimo/post_fimo/fimo_hits_only/maps/dna_seqid_to_dna_sequence.json
split_by: dna # protein, dna, or both
test_trs: ["trseq23","trseq26","trseq17"]
test_dnas: null
augment_rc: true
test_ratio: 0.10
val_ratio: 0.10
train_ratio: 0.80
require_nonempty: true
ratio_tolerance: null
bigM: null
seed: 0