bart-autoencoder-c4 / args.json
Bary's picture
upload model
c16cbc9
raw
history blame contribute delete
774 Bytes
{
"dataset_name": "c4",
"max_seq_len": 256,
"enc_dec_model": "facebook/bart-base",
"train_batch_size": 32,
"eval_batch_size": 16,
"num_encoder_latents": 128,
"num_decoder_latents": 128,
"dim_ae": 768,
"num_layers": 3,
"l2_normalize_latents": true,
"output_dir": "saved_latent_models/c4/2024-09-22_22-29-54",
"save_dir": "saved_latent_models",
"learning_rate": 0.0001,
"num_train_steps": 50000,
"lr_schedule": "linear",
"lr_warmup_steps": 1000,
"optimizer": "adamw",
"adam_beta1": 0.9,
"adam_beta2": 0.999,
"adam_weight_decay": 0.01,
"eval_every": 1000,
"mixed_precision": "no",
"wandb_name": "bart-c4-l2norm-128-768",
"lm_mode": "freeze",
"eval": false,
"resume_training": false,
"resume_dir": null,
"num_devices": 1
}