model / checkpoints /metadata_000000212992.json
eoinf's picture
Upload folder using huggingface_hub
d338dd2 verified
raw
history blame contribute delete
990 Bytes
{"step": 13, "tokens_seen": 212992, "config": {"model_name": "pile_llama_replace_17367_L8_1024", "n_layers": 12, "d_model": 512, "d_mlp": 2048, "d_head": 64, "n_heads": 8, "attn_only": false, "layer_norm_eps": 1e-05, "init_range": 0.02, "n_ctx": 1024, "d_vocab": 32000, "dataset_name": "eoinf/PL_Replace17367_L2_alldataset", "tokenizer_name": "", "seed": 10, "data_seed": 10, "device": "cuda", "use_bfloat16_matmul": false, "batch_size_per_device": 16, "n_devices": 1, "batches_per_step": 1, "max_tokens": 200000000, "lr_hidden": 0.002, "lr_vector": 0.001, "lr_schedule": "constant_with_warmup", "warmup_tokens": 30000000, "weight_decay": 0.05, "grad_norm_clip": 1.0, "train_loss_moving_average_beta": 0.99, "log_interval": 25, "save_checkpoints": true, "checkpoint_interval": 500, "checkpoint_interval_ratio": 1.1, "save_log_checkpoints": true, "use_wandb": true, "batch_size": 16, "tokens_per_step": 16384, "warmup_steps": 1831, "max_steps": 12207}, "train_loss_ewma": 10.481672404723923}