| { |
| "best_metric": 0.1180819422006607, |
| "best_model_checkpoint": "CXR-Classifier/checkpoint-1224", |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1224, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05, |
| "grad_norm": 3.592426061630249, |
| "learning_rate": 8.130081300813009e-06, |
| "loss": 0.5972, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 2.819566249847412, |
| "learning_rate": 1.6260162601626018e-05, |
| "loss": 0.4976, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 5.789632320404053, |
| "learning_rate": 2.4390243902439026e-05, |
| "loss": 0.3321, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 5.508607864379883, |
| "learning_rate": 3.2520325203252037e-05, |
| "loss": 0.415, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 7.562315464019775, |
| "learning_rate": 4.065040650406504e-05, |
| "loss": 0.2412, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 4.414723873138428, |
| "learning_rate": 4.878048780487805e-05, |
| "loss": 0.3456, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 2.0423896312713623, |
| "learning_rate": 4.922797456857402e-05, |
| "loss": 0.2415, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 2.041806221008301, |
| "learning_rate": 4.83197093551317e-05, |
| "loss": 0.3546, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 1.6937503814697266, |
| "learning_rate": 4.741144414168938e-05, |
| "loss": 0.3947, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 6.492763996124268, |
| "learning_rate": 4.650317892824705e-05, |
| "loss": 0.3063, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 1.9708950519561768, |
| "learning_rate": 4.559491371480473e-05, |
| "loss": 0.3115, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 12.533012390136719, |
| "learning_rate": 4.46866485013624e-05, |
| "loss": 0.5087, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 8.02456283569336, |
| "learning_rate": 4.377838328792008e-05, |
| "loss": 0.2745, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 1.0878229141235352, |
| "learning_rate": 4.287011807447775e-05, |
| "loss": 0.1905, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 7.465769290924072, |
| "learning_rate": 4.196185286103542e-05, |
| "loss": 0.2509, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 15.646003723144531, |
| "learning_rate": 4.10535876475931e-05, |
| "loss": 0.4353, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 3.2481565475463867, |
| "learning_rate": 4.014532243415077e-05, |
| "loss": 0.3478, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 2.395519733428955, |
| "learning_rate": 3.923705722070845e-05, |
| "loss": 0.2199, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 8.089118003845215, |
| "learning_rate": 3.832879200726612e-05, |
| "loss": 0.2715, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 8.150867462158203, |
| "learning_rate": 3.74205267938238e-05, |
| "loss": 0.2074, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.9387254901960784, |
| "eval_auc": 0.9766835240883684, |
| "eval_f1": 0.957841483979764, |
| "eval_loss": 0.2350389063358307, |
| "eval_precision": 0.961082910321489, |
| "eval_recall": 0.9546218487394958, |
| "eval_runtime": 246.6561, |
| "eval_samples_per_second": 3.308, |
| "eval_steps_per_second": 0.207, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.03, |
| "grad_norm": 6.791078090667725, |
| "learning_rate": 3.651226158038147e-05, |
| "loss": 0.1235, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 5.592333793640137, |
| "learning_rate": 3.560399636693915e-05, |
| "loss": 0.4199, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.13, |
| "grad_norm": 0.2713923752307892, |
| "learning_rate": 3.469573115349682e-05, |
| "loss": 0.3119, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.18, |
| "grad_norm": 5.907072067260742, |
| "learning_rate": 3.37874659400545e-05, |
| "loss": 0.2118, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.23, |
| "grad_norm": 0.9097113013267517, |
| "learning_rate": 3.287920072661217e-05, |
| "loss": 0.2174, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.27, |
| "grad_norm": 6.9212141036987305, |
| "learning_rate": 3.197093551316985e-05, |
| "loss": 0.2448, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 6.113616466522217, |
| "learning_rate": 3.106267029972752e-05, |
| "loss": 0.1619, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.37, |
| "grad_norm": 0.9741531014442444, |
| "learning_rate": 3.0154405086285197e-05, |
| "loss": 0.3296, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.42, |
| "grad_norm": 1.604313611984253, |
| "learning_rate": 2.924613987284287e-05, |
| "loss": 0.1598, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.47, |
| "grad_norm": 5.160298824310303, |
| "learning_rate": 2.8337874659400547e-05, |
| "loss": 0.2605, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 7.961933135986328, |
| "learning_rate": 2.7429609445958222e-05, |
| "loss": 0.295, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.57, |
| "grad_norm": 3.545825719833374, |
| "learning_rate": 2.6521344232515894e-05, |
| "loss": 0.2613, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.62, |
| "grad_norm": 0.7656643390655518, |
| "learning_rate": 2.5613079019073572e-05, |
| "loss": 0.1684, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.67, |
| "grad_norm": 14.269344329833984, |
| "learning_rate": 2.4704813805631247e-05, |
| "loss": 0.3285, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.72, |
| "grad_norm": 0.21142134070396423, |
| "learning_rate": 2.379654859218892e-05, |
| "loss": 0.2071, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 1.0282666683197021, |
| "learning_rate": 2.2888283378746594e-05, |
| "loss": 0.2701, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.81, |
| "grad_norm": 12.365777969360352, |
| "learning_rate": 2.198001816530427e-05, |
| "loss": 0.1753, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.86, |
| "grad_norm": 6.909509181976318, |
| "learning_rate": 2.1071752951861944e-05, |
| "loss": 0.185, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.91, |
| "grad_norm": 10.059576034545898, |
| "learning_rate": 2.016348773841962e-05, |
| "loss": 0.1403, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 13.194554328918457, |
| "learning_rate": 1.9255222524977297e-05, |
| "loss": 0.177, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.9522058823529411, |
| "eval_auc": 0.9864329442184113, |
| "eval_f1": 0.967418546365915, |
| "eval_loss": 0.15405645966529846, |
| "eval_precision": 0.9617940199335548, |
| "eval_recall": 0.973109243697479, |
| "eval_runtime": 257.0506, |
| "eval_samples_per_second": 3.174, |
| "eval_steps_per_second": 0.198, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.01, |
| "grad_norm": 0.45505988597869873, |
| "learning_rate": 1.834695731153497e-05, |
| "loss": 0.1334, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.06, |
| "grad_norm": 0.5608593821525574, |
| "learning_rate": 1.7438692098092644e-05, |
| "loss": 0.1801, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.11, |
| "grad_norm": 1.9215396642684937, |
| "learning_rate": 1.653042688465032e-05, |
| "loss": 0.1397, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.16, |
| "grad_norm": 0.03459596261382103, |
| "learning_rate": 1.5622161671207994e-05, |
| "loss": 0.0797, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.21, |
| "grad_norm": 4.931589603424072, |
| "learning_rate": 1.4713896457765669e-05, |
| "loss": 0.1547, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.25, |
| "grad_norm": 12.403867721557617, |
| "learning_rate": 1.3805631244323344e-05, |
| "loss": 0.1008, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.3, |
| "grad_norm": 6.834578514099121, |
| "learning_rate": 1.2897366030881017e-05, |
| "loss": 0.3086, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.35, |
| "grad_norm": 0.12356822937726974, |
| "learning_rate": 1.1989100817438692e-05, |
| "loss": 0.1367, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.23836758732795715, |
| "learning_rate": 1.1080835603996367e-05, |
| "loss": 0.1204, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.45, |
| "grad_norm": 0.645460307598114, |
| "learning_rate": 1.0172570390554042e-05, |
| "loss": 0.2857, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 6.155028820037842, |
| "learning_rate": 9.264305177111717e-06, |
| "loss": 0.1514, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.55, |
| "grad_norm": 6.625197410583496, |
| "learning_rate": 8.356039963669392e-06, |
| "loss": 0.1973, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 0.4476400911808014, |
| "learning_rate": 7.447774750227067e-06, |
| "loss": 0.1153, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.65, |
| "grad_norm": 11.432110786437988, |
| "learning_rate": 6.539509536784741e-06, |
| "loss": 0.1943, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.7, |
| "grad_norm": 6.038093090057373, |
| "learning_rate": 5.631244323342416e-06, |
| "loss": 0.0998, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 0.24591827392578125, |
| "learning_rate": 4.722979109900091e-06, |
| "loss": 0.1767, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.79, |
| "grad_norm": 3.9476640224456787, |
| "learning_rate": 3.814713896457766e-06, |
| "loss": 0.1798, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.84, |
| "grad_norm": 9.382974624633789, |
| "learning_rate": 2.9064486830154405e-06, |
| "loss": 0.1707, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.89, |
| "grad_norm": 0.10719335079193115, |
| "learning_rate": 1.9981834695731155e-06, |
| "loss": 0.2662, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.94, |
| "grad_norm": 10.07032299041748, |
| "learning_rate": 1.0899182561307902e-06, |
| "loss": 0.218, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.99, |
| "grad_norm": 17.199472427368164, |
| "learning_rate": 1.8165304268846503e-07, |
| "loss": 0.1692, |
| "step": 1220 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.9644607843137255, |
| "eval_auc": 0.9916270580630442, |
| "eval_f1": 0.9755686604886269, |
| "eval_loss": 0.1180819422006607, |
| "eval_precision": 0.9780405405405406, |
| "eval_recall": 0.973109243697479, |
| "eval_runtime": 252.4161, |
| "eval_samples_per_second": 3.233, |
| "eval_steps_per_second": 0.202, |
| "step": 1224 |
| } |
| ], |
| "logging_steps": 20, |
| "max_steps": 1224, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "total_flos": 7.581041343995535e+17, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|