| { | |
| "model": { | |
| "vocab_size": 50257, | |
| "max_seq_len": 256, | |
| "embed_dim": 384, | |
| "depth": 4, | |
| "edim": 16, | |
| "feat_dim": 96, | |
| "hidden": 384, | |
| "num_heads": 8, | |
| "num_blocks": 8, | |
| "dropout": 0.1, | |
| "params": 54107168 | |
| }, | |
| "training": { | |
| "batch_size": 12, | |
| "seq_len": 256, | |
| "lr": 0.0003, | |
| "weight_decay": 0.1, | |
| "num_epochs": 14, | |
| "grad_clip": 1.0, | |
| "ce_weight": 1.0, | |
| "validity_weight": 0.1 | |
| }, | |
| "data": { | |
| "train_tokens": 304222, | |
| "val_tokens": 33803, | |
| "vocab_size": 50257 | |
| }, | |
| "run_name": "run_1770236129" | |
| } |