Buckets:
| { | |
| "best_global_step": 200, | |
| "best_metric": 0.0, | |
| "best_model_checkpoint": "cache/tiny/checkpoints/checkpoint-200", | |
| "epoch": 10.309278350515465, | |
| "eval_steps": 200, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.25773195876288657, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.800000000000001e-06, | |
| "loss": 3.6191610717773437, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.5154639175257731, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.9800000000000004e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.7731958762886598, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.98e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.0309278350515463, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.9800000000000005e-05, | |
| "loss": 3.607529602050781, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.0309278350515463, | |
| "eval_loss": 3.965736150741577, | |
| "eval_runtime": 1.0875, | |
| "eval_samples_per_second": 941.638, | |
| "eval_steps_per_second": 0.92, | |
| "eval_teacher_cosine_loss": 1.0, | |
| "eval_teacher_cosine_mean": 0.0, | |
| "eval_teacher_cosine_p10": 0.0, | |
| "eval_teacher_cosine_p50": 0.0, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.2886597938144329, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.9800000000000004e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.5463917525773194, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.9800000000000003e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.8041237113402062, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.98e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.0618556701030926, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.98e-05, | |
| "loss": 3.607529602050781, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.0618556701030926, | |
| "eval_loss": 3.965736150741577, | |
| "eval_runtime": 0.7607, | |
| "eval_samples_per_second": 1346.156, | |
| "eval_steps_per_second": 1.315, | |
| "eval_teacher_cosine_loss": 1.0, | |
| "eval_teacher_cosine_mean": 0.0, | |
| "eval_teacher_cosine_p10": 0.0, | |
| "eval_teacher_cosine_p50": 0.0, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.319587628865979, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.98e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.5773195876288657, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.98e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.8350515463917527, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.731947483588622e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.0927835051546393, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.458424507658644e-05, | |
| "loss": 3.607529602050781, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.0927835051546393, | |
| "eval_loss": 3.965736150741577, | |
| "eval_runtime": 0.7515, | |
| "eval_samples_per_second": 1362.605, | |
| "eval_steps_per_second": 1.331, | |
| "eval_teacher_cosine_loss": 1.0, | |
| "eval_teacher_cosine_mean": 0.0, | |
| "eval_teacher_cosine_p10": 0.0, | |
| "eval_teacher_cosine_p50": 0.0, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.350515463917526, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.184901531728666e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 3.6082474226804124, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.911378555798688e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.865979381443299, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.63785557986871e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 4.123711340206185, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.364332603938731e-05, | |
| "loss": 3.607529602050781, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 4.123711340206185, | |
| "eval_loss": 3.965736150741577, | |
| "eval_runtime": 0.7515, | |
| "eval_samples_per_second": 1362.612, | |
| "eval_steps_per_second": 1.331, | |
| "eval_teacher_cosine_loss": 1.0, | |
| "eval_teacher_cosine_mean": 0.0, | |
| "eval_teacher_cosine_p10": 0.0, | |
| "eval_teacher_cosine_p50": 0.0, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 4.381443298969073, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.090809628008753e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 4.639175257731958, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.817286652078775e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 4.896907216494846, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.543763676148797e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 5.154639175257732, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.270240700218819e-05, | |
| "loss": 3.607529602050781, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 5.154639175257732, | |
| "eval_loss": 3.965736150741577, | |
| "eval_runtime": 0.7666, | |
| "eval_samples_per_second": 1335.826, | |
| "eval_steps_per_second": 1.305, | |
| "eval_teacher_cosine_loss": 1.0, | |
| "eval_teacher_cosine_mean": 0.0, | |
| "eval_teacher_cosine_p10": 0.0, | |
| "eval_teacher_cosine_p50": 0.0, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 5.412371134020619, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.99671772428884e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 5.670103092783505, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.723194748358862e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 5.927835051546392, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.449671772428884e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 6.185567010309279, | |
| "grad_norm": 0.0, | |
| "learning_rate": 6.176148796498906e-05, | |
| "loss": 3.607529602050781, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 6.185567010309279, | |
| "eval_loss": 3.965736150741577, | |
| "eval_runtime": 0.7699, | |
| "eval_samples_per_second": 1330.067, | |
| "eval_steps_per_second": 1.299, | |
| "eval_teacher_cosine_loss": 1.0, | |
| "eval_teacher_cosine_mean": 0.0, | |
| "eval_teacher_cosine_p10": 0.0, | |
| "eval_teacher_cosine_p50": 0.0, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 6.443298969072165, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.902625820568928e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 6.701030927835052, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.6291028446389504e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 6.958762886597938, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.355579868708972e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 7.216494845360825, | |
| "grad_norm": 0.0, | |
| "learning_rate": 5.0820568927789934e-05, | |
| "loss": 3.607529602050781, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 7.216494845360825, | |
| "eval_loss": 3.965736150741577, | |
| "eval_runtime": 0.759, | |
| "eval_samples_per_second": 1349.227, | |
| "eval_steps_per_second": 1.318, | |
| "eval_teacher_cosine_loss": 1.0, | |
| "eval_teacher_cosine_mean": 0.0, | |
| "eval_teacher_cosine_p10": 0.0, | |
| "eval_teacher_cosine_p50": 0.0, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 7.474226804123711, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.808533916849016e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 7.731958762886598, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.535010940919037e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 7.989690721649485, | |
| "grad_norm": 0.0, | |
| "learning_rate": 4.2614879649890596e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 8.24742268041237, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.987964989059081e-05, | |
| "loss": 3.607529602050781, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 8.24742268041237, | |
| "eval_loss": 3.965736150741577, | |
| "eval_runtime": 0.7563, | |
| "eval_samples_per_second": 1353.992, | |
| "eval_steps_per_second": 1.322, | |
| "eval_teacher_cosine_loss": 1.0, | |
| "eval_teacher_cosine_mean": 0.0, | |
| "eval_teacher_cosine_p10": 0.0, | |
| "eval_teacher_cosine_p50": 0.0, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 8.505154639175258, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.714442013129103e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 8.762886597938145, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.440919037199125e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 9.02061855670103, | |
| "grad_norm": 0.0, | |
| "learning_rate": 3.167396061269147e-05, | |
| "loss": 3.607529602050781, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 9.278350515463918, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.8938730853391688e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 9.278350515463918, | |
| "eval_loss": 3.965736150741577, | |
| "eval_runtime": 0.755, | |
| "eval_samples_per_second": 1356.302, | |
| "eval_steps_per_second": 1.325, | |
| "eval_teacher_cosine_loss": 1.0, | |
| "eval_teacher_cosine_mean": 0.0, | |
| "eval_teacher_cosine_p10": 0.0, | |
| "eval_teacher_cosine_p50": 0.0, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 9.536082474226804, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.6203501094091903e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 9.793814432989691, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.3468271334792125e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 10.051546391752577, | |
| "grad_norm": 0.0, | |
| "learning_rate": 2.0733041575492343e-05, | |
| "loss": 3.607529602050781, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 10.309278350515465, | |
| "grad_norm": 0.0, | |
| "learning_rate": 1.799781181619256e-05, | |
| "loss": 3.6191610717773437, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 10.309278350515465, | |
| "eval_loss": 3.965736150741577, | |
| "eval_runtime": 0.7731, | |
| "eval_samples_per_second": 1324.472, | |
| "eval_steps_per_second": 1.293, | |
| "eval_teacher_cosine_loss": 1.0, | |
| "eval_teacher_cosine_mean": 0.0, | |
| "eval_teacher_cosine_p10": 0.0, | |
| "eval_teacher_cosine_p50": 0.0, | |
| "step": 2000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 2328, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 12, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 512, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |
Xet Storage Details
- Size:
- 11.2 kB
- Xet hash:
- 4c00e302fb92795c1f70b52cc05712eafc588452ab48283b67bbd891469f4820
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.