add gnn/deepseek-1.3b-l3-h2048/train_log.json
Browse files
gnn/deepseek-1.3b-l3-h2048/train_log.json
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epochs": [
|
| 3 |
+
{
|
| 4 |
+
"epoch": 1,
|
| 5 |
+
"train_loss": 0.14642783589029718,
|
| 6 |
+
"dev": {
|
| 7 |
+
"recall@10": 0.7964624634894119,
|
| 8 |
+
"recall@20": 0.9046263551774205,
|
| 9 |
+
"roc_auc": 0.9018882940290538,
|
| 10 |
+
"n_graphs": 1534
|
| 11 |
+
},
|
| 12 |
+
"elapsed_min": 2.0654218554496766
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"epoch": 2,
|
| 16 |
+
"train_loss": 0.10374713705505355,
|
| 17 |
+
"dev": {
|
| 18 |
+
"recall@10": 0.7974501290932939,
|
| 19 |
+
"recall@20": 0.9117327374266522,
|
| 20 |
+
"roc_auc": 0.908548279582934,
|
| 21 |
+
"n_graphs": 1534
|
| 22 |
+
},
|
| 23 |
+
"elapsed_min": 2.0896940271059674
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 3,
|
| 27 |
+
"train_loss": 0.09884775448906219,
|
| 28 |
+
"dev": {
|
| 29 |
+
"recall@10": 0.8264013095194234,
|
| 30 |
+
"recall@20": 0.9310374906216171,
|
| 31 |
+
"roc_auc": 0.9216109309698016,
|
| 32 |
+
"n_graphs": 1534
|
| 33 |
+
},
|
| 34 |
+
"elapsed_min": 2.097282079855601
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"epoch": 4,
|
| 38 |
+
"train_loss": 0.0956651443899688,
|
| 39 |
+
"dev": {
|
| 40 |
+
"recall@10": 0.8185160212228475,
|
| 41 |
+
"recall@20": 0.9309376373576217,
|
| 42 |
+
"roc_auc": 0.9188639129826706,
|
| 43 |
+
"n_graphs": 1534
|
| 44 |
+
},
|
| 45 |
+
"elapsed_min": 2.0883803764979043
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"epoch": 5,
|
| 49 |
+
"train_loss": 0.09481076456732669,
|
| 50 |
+
"dev": {
|
| 51 |
+
"recall@10": 0.8437255573261049,
|
| 52 |
+
"recall@20": 0.9432467262332256,
|
| 53 |
+
"roc_auc": 0.9279701909290776,
|
| 54 |
+
"n_graphs": 1534
|
| 55 |
+
},
|
| 56 |
+
"elapsed_min": 2.1061467568079633
|
| 57 |
+
}
|
| 58 |
+
],
|
| 59 |
+
"args": {
|
| 60 |
+
"bird_train_chunk": "output/embeddings/bird-train-reranker-deepseek-1.3B/bird_train_samples_graph_embeddings.pkl",
|
| 61 |
+
"bird_dev_pkl": "output/embeddings/bird-dev-reranker-deepseek-1.3B/bird_dev_samples_graph_embeddings.pkl",
|
| 62 |
+
"resume_from": "griffith-bigdata/GRAST-SQL-0.6B-BIRD-Reranker/best-bird-dev-roc-auc-layer-3-hidden-2048.pt",
|
| 63 |
+
"output_dir": "output/gnn_ckpts/deepseek_1.3B_l3_h2048",
|
| 64 |
+
"epochs": 5,
|
| 65 |
+
"lr": 1e-05,
|
| 66 |
+
"batch_size": 32,
|
| 67 |
+
"edge_dropout": 0.0,
|
| 68 |
+
"fk_dropout": 0.0,
|
| 69 |
+
"aug": false,
|
| 70 |
+
"aug_dev": false
|
| 71 |
+
},
|
| 72 |
+
"baseline_dev": {
|
| 73 |
+
"recall@10": 0.2270557850579861,
|
| 74 |
+
"recall@20": 0.3773923374984457,
|
| 75 |
+
"roc_auc": 0.49568533948014903,
|
| 76 |
+
"n_graphs": 1534
|
| 77 |
+
},
|
| 78 |
+
"best_rec10_ckpt": "output/gnn_ckpts/deepseek_1.3B_l3_h2048/best_rec10_epoch05.pt",
|
| 79 |
+
"best_rec10_value": 0.8437255573261049
|
| 80 |
+
}
|