dzungpham commited on Apr 17

Commit

b00f41c

verified ·

1 Parent(s): caed328

upload best checkpoints 200 with f1 score 0.68

Browse files

Files changed (24) hide show

graphcodebert-robust/checkpoint-200/config.json +3 -2
graphcodebert-robust/checkpoint-200/model.safetensors +1 -1
graphcodebert-robust/checkpoint-200/optimizer.pt +2 -2
graphcodebert-robust/checkpoint-200/rng_state.pth +2 -2
graphcodebert-robust/checkpoint-200/scaler.pt +1 -1
graphcodebert-robust/checkpoint-200/scheduler.pt +1 -1
graphcodebert-robust/checkpoint-200/tokenizer.json +1 -6
graphcodebert-robust/checkpoint-200/trainer_state.json +85 -85
graphcodebert-robust/checkpoint-200/training_args.bin +1 -1
graphcodebert-robust/checkpoint-400/model.safetensors +1 -1
graphcodebert-robust/checkpoint-400/optimizer.pt +1 -1
graphcodebert-robust/checkpoint-400/trainer_state.json +39 -39
graphcodebert-robust/checkpoint-400/training_args.bin +1 -1
graphcodebert-robust/checkpoint-600/model.safetensors +1 -1
graphcodebert-robust/checkpoint-600/optimizer.pt +1 -1
graphcodebert-robust/checkpoint-600/rng_state.pth +1 -1
graphcodebert-robust/checkpoint-600/trainer_state.json +80 -80
graphcodebert-robust/checkpoint-600/training_args.bin +1 -1
graphcodebert-robust/checkpoint-800/model.safetensors +1 -1
graphcodebert-robust/checkpoint-800/optimizer.pt +1 -1
graphcodebert-robust/checkpoint-800/rng_state.pth +1 -1
graphcodebert-robust/checkpoint-800/trainer_state.json +120 -120
graphcodebert-robust/checkpoint-800/training_args.bin +1 -1
graphcodebert-robust/training.log +45 -21

graphcodebert-robust/checkpoint-200/config.json CHANGED Viewed

@@ -2,14 +2,14 @@
   "architectures": [
     "RobertaForSequenceClassification"
   ],
-  "attention_probs_dropout_prob": 0.2,
   "bos_token_id": 0,
   "classifier_dropout": null,
   "dtype": "float32",
   "eos_token_id": 2,
   "gradient_checkpointing": false,
   "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.2,
   "hidden_size": 768,
   "initializer_range": 0.02,
   "intermediate_size": 3072,
@@ -21,6 +21,7 @@
   "output_past": true,
   "pad_token_id": 1,
   "position_embedding_type": "absolute",
   "transformers_version": "4.56.0",
   "type_vocab_size": 1,
   "use_cache": true,

   "architectures": [
     "RobertaForSequenceClassification"
   ],
+  "attention_probs_dropout_prob": 0.1,
   "bos_token_id": 0,
   "classifier_dropout": null,
   "dtype": "float32",
   "eos_token_id": 2,
   "gradient_checkpointing": false,
   "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "output_past": true,
   "pad_token_id": 1,
   "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
   "transformers_version": "4.56.0",
   "type_vocab_size": 1,
   "use_cache": true,

graphcodebert-robust/checkpoint-200/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:984b47ee0cbc9e8aff5459859ab8785583eda66a482745e97fa137aac9d69a20
 size 498612824

 version https://git-lfs.github.com/spec/v1
+oid sha256:34f62f2e2935abbdd0f8d5567e447c234e77e119d414ca9ce31e3a1ce06552e2
 size 498612824

graphcodebert-robust/checkpoint-200/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8cc66376057788af21d1adb5a92f0a63c44af3eccb38c6f45cfafc48c80f02d4
-size 4741923

 version https://git-lfs.github.com/spec/v1
+oid sha256:94ced15c772e225b8afaaa561ce73077f5f491b910b543982886ee79b2be71c0
+size 4741859

graphcodebert-robust/checkpoint-200/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e231312cfb6dd836b89c3a8dd38d52af114294447c5e2294714ea9206abde6af
-size 14581

 version https://git-lfs.github.com/spec/v1
+oid sha256:a69a2dd012809f4c1402b56a463f5f04ca5d8c3ea0ff42d1da133d0f80b1c5b9
+size 14645

graphcodebert-robust/checkpoint-200/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:124625e167eb28acbfc793cfcb3e8a08b32e7fea06501462bc9e420a5e1beb2a
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:b21c5349d5e7d02de630ebc1cb53ade1d9c6079eeb8594d223bb786011a0428b
 size 1383

graphcodebert-robust/checkpoint-200/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:003a1651b59b96bfbd66a9a12f6e0705e877f877138a8695267f15672bef92e3
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:1620ef2f1785b97a0cabdbea3b6cfd78a32feee0218de95157fc0dbbc14db4ba
 size 1465

graphcodebert-robust/checkpoint-200/tokenizer.json CHANGED Viewed

@@ -1,11 +1,6 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 512,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
   "padding": null,
   "added_tokens": [
     {

 {
   "version": "1.0",
+  "truncation": null,
   "padding": null,
   "added_tokens": [
     {

graphcodebert-robust/checkpoint-200/trainer_state.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0128,
   "eval_steps": 1000,
   "global_step": 200,
   "is_hyper_param_search": false,
@@ -10,150 +10,150 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.00064,
-      "grad_norm": 1.6144306659698486,
-      "learning_rate": 1.1520000000000002e-08,
-      "loss": 0.729,
       "step": 10
     },
     {
-      "epoch": 0.00128,
-      "grad_norm": 2.0952296257019043,
-      "learning_rate": 2.4320000000000002e-08,
-      "loss": 0.7295,
       "step": 20
     },
     {
-      "epoch": 0.00192,
-      "grad_norm": 1.3587689399719238,
-      "learning_rate": 3.7120000000000004e-08,
-      "loss": 0.73,
       "step": 30
     },
     {
-      "epoch": 0.00256,
-      "grad_norm": 1.2531732320785522,
-      "learning_rate": 4.9920000000000006e-08,
-      "loss": 0.7221,
       "step": 40
     },
     {
-      "epoch": 0.0032,
-      "grad_norm": 1.437932014465332,
-      "learning_rate": 6.272000000000001e-08,
-      "loss": 0.7209,
       "step": 50
     },
     {
-      "epoch": 0.00384,
-      "grad_norm": 1.418426752090454,
-      "learning_rate": 7.552e-08,
-      "loss": 0.729,
       "step": 60
     },
     {
-      "epoch": 0.00448,
-      "grad_norm": 1.9476298093795776,
-      "learning_rate": 8.832e-08,
-      "loss": 0.7242,
       "step": 70
     },
     {
-      "epoch": 0.00512,
-      "grad_norm": 1.7948051691055298,
-      "learning_rate": 1.0112000000000001e-07,
-      "loss": 0.7227,
       "step": 80
     },
     {
-      "epoch": 0.00576,
-      "grad_norm": 1.6534360647201538,
-      "learning_rate": 1.1392e-07,
-      "loss": 0.7234,
       "step": 90
     },
     {
-      "epoch": 0.0064,
-      "grad_norm": 1.0920158624649048,
-      "learning_rate": 1.2672e-07,
-      "loss": 0.7328,
       "step": 100
     },
     {
-      "epoch": 0.00704,
-      "grad_norm": 1.977837085723877,
-      "learning_rate": 1.3952000000000002e-07,
-      "loss": 0.7263,
       "step": 110
     },
     {
-      "epoch": 0.00768,
-      "grad_norm": 1.388983130455017,
-      "learning_rate": 1.5232000000000003e-07,
-      "loss": 0.7286,
       "step": 120
     },
     {
-      "epoch": 0.00832,
-      "grad_norm": 1.2956682443618774,
-      "learning_rate": 1.6512e-07,
-      "loss": 0.7251,
       "step": 130
     },
     {
-      "epoch": 0.00896,
-      "grad_norm": 1.8125052452087402,
-      "learning_rate": 1.7792e-07,
-      "loss": 0.7251,
       "step": 140
     },
     {
-      "epoch": 0.0096,
-      "grad_norm": 1.626846194267273,
-      "learning_rate": 1.9072e-07,
-      "loss": 0.727,
       "step": 150
     },
     {
-      "epoch": 0.01024,
-      "grad_norm": 2.3243086338043213,
-      "learning_rate": 2.0352e-07,
-      "loss": 0.726,
       "step": 160
     },
     {
-      "epoch": 0.01088,
-      "grad_norm": 1.4734737873077393,
-      "learning_rate": 2.1632e-07,
-      "loss": 0.7252,
       "step": 170
     },
     {
-      "epoch": 0.01152,
-      "grad_norm": 2.090498685836792,
-      "learning_rate": 2.2912e-07,
-      "loss": 0.7273,
       "step": 180
     },
     {
-      "epoch": 0.01216,
-      "grad_norm": 1.7563093900680542,
-      "learning_rate": 2.4192000000000004e-07,
-      "loss": 0.719,
       "step": 190
     },
     {
-      "epoch": 0.0128,
-      "grad_norm": 1.449843168258667,
-      "learning_rate": 2.5472000000000005e-07,
-      "loss": 0.7237,
       "step": 200
     }
   ],
   "logging_steps": 10,
-  "max_steps": 156250,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 10,
   "save_steps": 200,
   "stateful_callbacks": {
     "EarlyStoppingCallback": {
@@ -176,8 +176,8 @@
       "attributes": {}
     }
   },
-  "total_flos": 1683910754304000.0,
-  "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null
 }

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.02559836170485089,
   "eval_steps": 1000,
   "global_step": 200,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.0012799180852425445,
+      "grad_norm": 89788.1796875,
+      "learning_rate": 2.304147465437788e-08,
+      "loss": 0.7088,
       "step": 10
     },
     {
+      "epoch": 0.002559836170485089,
+      "grad_norm": 39479.36328125,
+      "learning_rate": 4.86431131592422e-08,
+      "loss": 0.7087,
       "step": 20
     },
     {
+      "epoch": 0.0038397542557276334,
+      "grad_norm": 82478.765625,
+      "learning_rate": 7.424475166410652e-08,
+      "loss": 0.7074,
       "step": 30
     },
     {
+      "epoch": 0.005119672340970178,
+      "grad_norm": 58003.75390625,
+      "learning_rate": 9.984639016897082e-08,
+      "loss": 0.703,
       "step": 40
     },
     {
+      "epoch": 0.006399590426212722,
+      "grad_norm": 95491.0859375,
+      "learning_rate": 1.2544802867383514e-07,
+      "loss": 0.7073,
       "step": 50
     },
     {
+      "epoch": 0.007679508511455267,
+      "grad_norm": 44903.296875,
+      "learning_rate": 1.5104966717869944e-07,
+      "loss": 0.7061,
       "step": 60
     },
     {
+      "epoch": 0.008959426596697812,
+      "grad_norm": 142410.484375,
+      "learning_rate": 1.7665130568356375e-07,
+      "loss": 0.7082,
       "step": 70
     },
     {
+      "epoch": 0.010239344681940356,
+      "grad_norm": 148763.109375,
+      "learning_rate": 2.0225294418842808e-07,
+      "loss": 0.707,
       "step": 80
     },
     {
+      "epoch": 0.011519262767182901,
+      "grad_norm": 62031.30859375,
+      "learning_rate": 2.2785458269329238e-07,
+      "loss": 0.7036,
       "step": 90
     },
     {
+      "epoch": 0.012799180852425445,
+      "grad_norm": 135708.875,
+      "learning_rate": 2.5345622119815674e-07,
+      "loss": 0.7078,
       "step": 100
     },
     {
+      "epoch": 0.01407909893766799,
+      "grad_norm": 91129.421875,
+      "learning_rate": 2.79057859703021e-07,
+      "loss": 0.7035,
       "step": 110
     },
     {
+      "epoch": 0.015359017022910534,
+      "grad_norm": 39290.72265625,
+      "learning_rate": 3.0465949820788535e-07,
+      "loss": 0.7083,
       "step": 120
     },
     {
+      "epoch": 0.016638935108153077,
+      "grad_norm": 49473.61328125,
+      "learning_rate": 3.302611367127496e-07,
+      "loss": 0.7023,
       "step": 130
     },
     {
+      "epoch": 0.017918853193395624,
+      "grad_norm": 61292.984375,
+      "learning_rate": 3.5586277521761395e-07,
+      "loss": 0.7014,
       "step": 140
     },
     {
+      "epoch": 0.019198771278638168,
+      "grad_norm": 79102.0390625,
+      "learning_rate": 3.814644137224783e-07,
+      "loss": 0.7041,
       "step": 150
     },
     {
+      "epoch": 0.02047868936388071,
+      "grad_norm": 61779.62890625,
+      "learning_rate": 4.0706605222734256e-07,
+      "loss": 0.7039,
       "step": 160
     },
     {
+      "epoch": 0.021758607449123255,
+      "grad_norm": 63492.18359375,
+      "learning_rate": 4.326676907322069e-07,
+      "loss": 0.7035,
       "step": 170
     },
     {
+      "epoch": 0.023038525534365802,
+      "grad_norm": 44190.3203125,
+      "learning_rate": 4.582693292370712e-07,
+      "loss": 0.7019,
       "step": 180
     },
     {
+      "epoch": 0.024318443619608346,
+      "grad_norm": 67509.15625,
+      "learning_rate": 4.838709677419355e-07,
+      "loss": 0.6991,
       "step": 190
     },
     {
+      "epoch": 0.02559836170485089,
+      "grad_norm": 94820.5078125,
+      "learning_rate": 5.094726062467999e-07,
+      "loss": 0.7011,
       "step": 200
     }
   ],
   "logging_steps": 10,
+  "max_steps": 39065,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
   "save_steps": 200,
   "stateful_callbacks": {
     "EarlyStoppingCallback": {
       "attributes": {}
     }
   },
+  "total_flos": 3367821508608000.0,
+  "train_batch_size": 64,
   "trial_name": null,
   "trial_params": null
 }

graphcodebert-robust/checkpoint-200/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9ff659b85d84ec0bae53596bc271ba773db9c463626db0f13fd8e747f433dad4
 size 5841

 version https://git-lfs.github.com/spec/v1
+oid sha256:7ec2974753acccea9af7a8eb9c2abfaaba85cdcf89c926488b103f5662876bb0
 size 5841

graphcodebert-robust/checkpoint-400/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a01766ea37053c4e1086db23a592ccd390b6f66d530273ae2dae69fbf9aa39e
 size 498612824

 version https://git-lfs.github.com/spec/v1
+oid sha256:92bce3c4e38ffa8155e9197c360622fa05c939bec62afcbfa3bf8fd778f88527
 size 498612824

graphcodebert-robust/checkpoint-400/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3993e14f8e5395da15ce3350b7a6c24a8b0c21921fd8cce7a29d5175f071b2fc
 size 4741923

 version https://git-lfs.github.com/spec/v1
+oid sha256:a83a52f1a15705e175493b2425539a92f6edb4c30253eadc01cb8a3f3c98b492
 size 4741923

graphcodebert-robust/checkpoint-400/trainer_state.json CHANGED Viewed

@@ -151,142 +151,142 @@
     },
     {
       "epoch": 0.01344,
-      "grad_norm": 141396.296875,
       "learning_rate": 5.350742447516642e-07,
-      "loss": 0.7217,
       "step": 210
     },
     {
       "epoch": 0.01408,
-      "grad_norm": 102339.1640625,
       "learning_rate": 5.606758832565284e-07,
-      "loss": 0.7215,
       "step": 220
     },
     {
       "epoch": 0.01472,
-      "grad_norm": 134052.9375,
       "learning_rate": 5.862775217613928e-07,
-      "loss": 0.7115,
       "step": 230
     },
     {
       "epoch": 0.01536,
-      "grad_norm": 87181.984375,
       "learning_rate": 6.118791602662571e-07,
-      "loss": 0.7241,
       "step": 240
     },
     {
       "epoch": 0.016,
-      "grad_norm": 100231.328125,
       "learning_rate": 6.374807987711214e-07,
-      "loss": 0.71,
       "step": 250
     },
     {
       "epoch": 0.01664,
-      "grad_norm": 136721.484375,
       "learning_rate": 6.630824372759858e-07,
-      "loss": 0.7188,
       "step": 260
     },
     {
       "epoch": 0.01728,
-      "grad_norm": 115868.8125,
       "learning_rate": 6.8868407578085e-07,
-      "loss": 0.7199,
       "step": 270
     },
     {
       "epoch": 0.01792,
-      "grad_norm": 70205.1484375,
       "learning_rate": 7.142857142857143e-07,
-      "loss": 0.7299,
       "step": 280
     },
     {
       "epoch": 0.01856,
-      "grad_norm": 98926.4453125,
       "learning_rate": 7.398873527905787e-07,
-      "loss": 0.7159,
       "step": 290
     },
     {
       "epoch": 0.0192,
-      "grad_norm": 134108.140625,
       "learning_rate": 7.65488991295443e-07,
       "loss": 0.7122,
       "step": 300
     },
     {
       "epoch": 0.01984,
-      "grad_norm": 103719.140625,
       "learning_rate": 7.910906298003073e-07,
-      "loss": 0.7185,
       "step": 310
     },
     {
       "epoch": 0.02048,
-      "grad_norm": 85624.953125,
       "learning_rate": 8.166922683051716e-07,
-      "loss": 0.718,
       "step": 320
     },
     {
       "epoch": 0.02112,
-      "grad_norm": 138824.15625,
       "learning_rate": 8.422939068100359e-07,
-      "loss": 0.713,
       "step": 330
     },
     {
       "epoch": 0.02176,
-      "grad_norm": 73629.0859375,
       "learning_rate": 8.678955453149002e-07,
-      "loss": 0.7186,
       "step": 340
     },
     {
       "epoch": 0.0224,
-      "grad_norm": 132493.0,
       "learning_rate": 8.934971838197646e-07,
-      "loss": 0.7133,
       "step": 350
     },
     {
       "epoch": 0.02304,
-      "grad_norm": 85223.625,
       "learning_rate": 9.190988223246289e-07,
-      "loss": 0.7124,
       "step": 360
     },
     {
       "epoch": 0.02368,
-      "grad_norm": 77868.78125,
       "learning_rate": 9.447004608294931e-07,
-      "loss": 0.7058,
       "step": 370
     },
     {
       "epoch": 0.02432,
-      "grad_norm": 75874.3046875,
       "learning_rate": 9.703020993343575e-07,
-      "loss": 0.7139,
       "step": 380
     },
     {
       "epoch": 0.02496,
-      "grad_norm": 151937.703125,
       "learning_rate": 9.959037378392218e-07,
-      "loss": 0.713,
       "step": 390
     },
     {
       "epoch": 0.0256,
-      "grad_norm": 161711.671875,
       "learning_rate": 1.021505376344086e-06,
-      "loss": 0.7137,
       "step": 400
     }
   ],

     },
     {
       "epoch": 0.01344,
+      "grad_norm": 144219.625,
       "learning_rate": 5.350742447516642e-07,
+      "loss": 0.7218,
       "step": 210
     },
     {
       "epoch": 0.01408,
+      "grad_norm": 105046.0234375,
       "learning_rate": 5.606758832565284e-07,
+      "loss": 0.718,
       "step": 220
     },
     {
       "epoch": 0.01472,
+      "grad_norm": 126142.4296875,
       "learning_rate": 5.862775217613928e-07,
+      "loss": 0.7107,
       "step": 230
     },
     {
       "epoch": 0.01536,
+      "grad_norm": 92423.2265625,
       "learning_rate": 6.118791602662571e-07,
+      "loss": 0.7271,
       "step": 240
     },
     {
       "epoch": 0.016,
+      "grad_norm": 98091.828125,
       "learning_rate": 6.374807987711214e-07,
+      "loss": 0.7123,
       "step": 250
     },
     {
       "epoch": 0.01664,
+      "grad_norm": 131949.578125,
       "learning_rate": 6.630824372759858e-07,
+      "loss": 0.7204,
       "step": 260
     },
     {
       "epoch": 0.01728,
+      "grad_norm": 112228.5625,
       "learning_rate": 6.8868407578085e-07,
+      "loss": 0.722,
       "step": 270
     },
     {
       "epoch": 0.01792,
+      "grad_norm": 64587.734375,
       "learning_rate": 7.142857142857143e-07,
+      "loss": 0.7263,
       "step": 280
     },
     {
       "epoch": 0.01856,
+      "grad_norm": 99893.203125,
       "learning_rate": 7.398873527905787e-07,
+      "loss": 0.7169,
       "step": 290
     },
     {
       "epoch": 0.0192,
+      "grad_norm": 135749.875,
       "learning_rate": 7.65488991295443e-07,
       "loss": 0.7122,
       "step": 300
     },
     {
       "epoch": 0.01984,
+      "grad_norm": 103292.5703125,
       "learning_rate": 7.910906298003073e-07,
+      "loss": 0.7183,
       "step": 310
     },
     {
       "epoch": 0.02048,
+      "grad_norm": 86927.28125,
       "learning_rate": 8.166922683051716e-07,
+      "loss": 0.7192,
       "step": 320
     },
     {
       "epoch": 0.02112,
+      "grad_norm": 153738.390625,
       "learning_rate": 8.422939068100359e-07,
+      "loss": 0.711,
       "step": 330
     },
     {
       "epoch": 0.02176,
+      "grad_norm": 69994.7734375,
       "learning_rate": 8.678955453149002e-07,
+      "loss": 0.7176,
       "step": 340
     },
     {
       "epoch": 0.0224,
+      "grad_norm": 141370.6875,
       "learning_rate": 8.934971838197646e-07,
+      "loss": 0.7105,
       "step": 350
     },
     {
       "epoch": 0.02304,
+      "grad_norm": 71139.453125,
       "learning_rate": 9.190988223246289e-07,
+      "loss": 0.7126,
       "step": 360
     },
     {
       "epoch": 0.02368,
+      "grad_norm": 82039.1953125,
       "learning_rate": 9.447004608294931e-07,
+      "loss": 0.7078,
       "step": 370
     },
     {
       "epoch": 0.02432,
+      "grad_norm": 71275.7890625,
       "learning_rate": 9.703020993343575e-07,
+      "loss": 0.7145,
       "step": 380
     },
     {
       "epoch": 0.02496,
+      "grad_norm": 145801.21875,
       "learning_rate": 9.959037378392218e-07,
+      "loss": 0.7102,
       "step": 390
     },
     {
       "epoch": 0.0256,
+      "grad_norm": 171507.0,
       "learning_rate": 1.021505376344086e-06,
+      "loss": 0.7123,
       "step": 400
     }
   ],

graphcodebert-robust/checkpoint-400/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b5fc4023adc281644437a690ea6a6001846d7762699cd428d4ad38e1888076db
 size 5841

 version https://git-lfs.github.com/spec/v1
+oid sha256:82e524f8f7de87947806acfb17c136195f8d3668b26513da260f1a2f14442156
 size 5841

graphcodebert-robust/checkpoint-600/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75ec427b92df30abfd117ca61bf8855a95bff5b8e2f300c83f23131aa83f89a3
 size 498612824

 version https://git-lfs.github.com/spec/v1
+oid sha256:320da2fc28dfd7f2b08f5a311e169db9c3172c660ca5f1f28958df59ff94a372
 size 498612824

graphcodebert-robust/checkpoint-600/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e6eaf9c7a3d50e76cca47c4da094a2db7ca99a2b289f3509dc98882e9debad13
 size 4741923

 version https://git-lfs.github.com/spec/v1
+oid sha256:41ff1d1389d831b2bc7715b986dcf40f64372807ce80b3368515da1fcaa1cb7a
 size 4741923

graphcodebert-robust/checkpoint-600/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:820bebfae8bbd9452955c53efeeb042e6227f4bb5c733fac637c835bd717c752
 size 14581

 version https://git-lfs.github.com/spec/v1
+oid sha256:d667b0153bf32427b60333b1fe4a206d72e36eefc1792fdf3d499d50e466bd30
 size 14581

graphcodebert-robust/checkpoint-600/trainer_state.json CHANGED Viewed

@@ -151,282 +151,282 @@
     },
     {
       "epoch": 0.01344,
-      "grad_norm": 141396.296875,
       "learning_rate": 5.350742447516642e-07,
-      "loss": 0.7217,
       "step": 210
     },
     {
       "epoch": 0.01408,
-      "grad_norm": 102339.1640625,
       "learning_rate": 5.606758832565284e-07,
-      "loss": 0.7215,
       "step": 220
     },
     {
       "epoch": 0.01472,
-      "grad_norm": 134052.9375,
       "learning_rate": 5.862775217613928e-07,
-      "loss": 0.7115,
       "step": 230
     },
     {
       "epoch": 0.01536,
-      "grad_norm": 87181.984375,
       "learning_rate": 6.118791602662571e-07,
-      "loss": 0.7241,
       "step": 240
     },
     {
       "epoch": 0.016,
-      "grad_norm": 100231.328125,
       "learning_rate": 6.374807987711214e-07,
-      "loss": 0.71,
       "step": 250
     },
     {
       "epoch": 0.01664,
-      "grad_norm": 136721.484375,
       "learning_rate": 6.630824372759858e-07,
-      "loss": 0.7188,
       "step": 260
     },
     {
       "epoch": 0.01728,
-      "grad_norm": 115868.8125,
       "learning_rate": 6.8868407578085e-07,
-      "loss": 0.7199,
       "step": 270
     },
     {
       "epoch": 0.01792,
-      "grad_norm": 70205.1484375,
       "learning_rate": 7.142857142857143e-07,
-      "loss": 0.7299,
       "step": 280
     },
     {
       "epoch": 0.01856,
-      "grad_norm": 98926.4453125,
       "learning_rate": 7.398873527905787e-07,
-      "loss": 0.7159,
       "step": 290
     },
     {
       "epoch": 0.0192,
-      "grad_norm": 134108.140625,
       "learning_rate": 7.65488991295443e-07,
       "loss": 0.7122,
       "step": 300
     },
     {
       "epoch": 0.01984,
-      "grad_norm": 103719.140625,
       "learning_rate": 7.910906298003073e-07,
-      "loss": 0.7185,
       "step": 310
     },
     {
       "epoch": 0.02048,
-      "grad_norm": 85624.953125,
       "learning_rate": 8.166922683051716e-07,
-      "loss": 0.718,
       "step": 320
     },
     {
       "epoch": 0.02112,
-      "grad_norm": 138824.15625,
       "learning_rate": 8.422939068100359e-07,
-      "loss": 0.713,
       "step": 330
     },
     {
       "epoch": 0.02176,
-      "grad_norm": 73629.0859375,
       "learning_rate": 8.678955453149002e-07,
-      "loss": 0.7186,
       "step": 340
     },
     {
       "epoch": 0.0224,
-      "grad_norm": 132493.0,
       "learning_rate": 8.934971838197646e-07,
-      "loss": 0.7133,
       "step": 350
     },
     {
       "epoch": 0.02304,
-      "grad_norm": 85223.625,
       "learning_rate": 9.190988223246289e-07,
-      "loss": 0.7124,
       "step": 360
     },
     {
       "epoch": 0.02368,
-      "grad_norm": 77868.78125,
       "learning_rate": 9.447004608294931e-07,
-      "loss": 0.7058,
       "step": 370
     },
     {
       "epoch": 0.02432,
-      "grad_norm": 75874.3046875,
       "learning_rate": 9.703020993343575e-07,
-      "loss": 0.7139,
       "step": 380
     },
     {
       "epoch": 0.02496,
-      "grad_norm": 151937.703125,
       "learning_rate": 9.959037378392218e-07,
-      "loss": 0.713,
       "step": 390
     },
     {
       "epoch": 0.0256,
-      "grad_norm": 161711.671875,
       "learning_rate": 1.021505376344086e-06,
-      "loss": 0.7137,
       "step": 400
     },
     {
       "epoch": 0.02624,
-      "grad_norm": 90800.234375,
       "learning_rate": 1.0471070148489503e-06,
-      "loss": 0.7091,
       "step": 410
     },
     {
       "epoch": 0.02688,
-      "grad_norm": 82131.34375,
       "learning_rate": 1.0727086533538148e-06,
-      "loss": 0.7098,
       "step": 420
     },
     {
       "epoch": 0.02752,
-      "grad_norm": 92818.9140625,
       "learning_rate": 1.0983102918586791e-06,
-      "loss": 0.7099,
       "step": 430
     },
     {
       "epoch": 0.02816,
-      "grad_norm": 88555.5078125,
       "learning_rate": 1.1239119303635434e-06,
-      "loss": 0.7086,
       "step": 440
     },
     {
       "epoch": 0.0288,
-      "grad_norm": 73428.6015625,
       "learning_rate": 1.1495135688684077e-06,
-      "loss": 0.7117,
       "step": 450
     },
     {
       "epoch": 0.02944,
-      "grad_norm": 128938.7421875,
       "learning_rate": 1.175115207373272e-06,
-      "loss": 0.7182,
       "step": 460
     },
     {
       "epoch": 0.03008,
-      "grad_norm": 102742.3359375,
       "learning_rate": 1.2007168458781362e-06,
-      "loss": 0.7108,
       "step": 470
     },
     {
       "epoch": 0.03072,
-      "grad_norm": 73825.8125,
       "learning_rate": 1.2263184843830007e-06,
-      "loss": 0.7087,
       "step": 480
     },
     {
       "epoch": 0.03136,
-      "grad_norm": 110930.75,
       "learning_rate": 1.251920122887865e-06,
-      "loss": 0.7232,
       "step": 490
     },
     {
       "epoch": 0.032,
-      "grad_norm": 95068.84375,
       "learning_rate": 1.2775217613927293e-06,
-      "loss": 0.703,
       "step": 500
     },
     {
       "epoch": 0.03264,
-      "grad_norm": 118731.9296875,
       "learning_rate": 1.3031233998975938e-06,
-      "loss": 0.7063,
       "step": 510
     },
     {
       "epoch": 0.03328,
-      "grad_norm": 80511.828125,
       "learning_rate": 1.3287250384024578e-06,
-      "loss": 0.7143,
       "step": 520
     },
     {
       "epoch": 0.03392,
-      "grad_norm": 84864.484375,
       "learning_rate": 1.354326676907322e-06,
-      "loss": 0.7055,
       "step": 530
     },
     {
       "epoch": 0.03456,
-      "grad_norm": 107800.109375,
       "learning_rate": 1.3799283154121864e-06,
-      "loss": 0.7119,
       "step": 540
     },
     {
       "epoch": 0.0352,
-      "grad_norm": 83667.671875,
       "learning_rate": 1.4055299539170509e-06,
-      "loss": 0.7082,
       "step": 550
     },
     {
       "epoch": 0.03584,
-      "grad_norm": 75656.4140625,
       "learning_rate": 1.4311315924219151e-06,
-      "loss": 0.7062,
       "step": 560
     },
     {
       "epoch": 0.03648,
-      "grad_norm": 79985.875,
       "learning_rate": 1.4567332309267796e-06,
-      "loss": 0.7155,
       "step": 570
     },
     {
       "epoch": 0.03712,
-      "grad_norm": 76334.078125,
       "learning_rate": 1.4823348694316437e-06,
-      "loss": 0.7075,
       "step": 580
     },
     {
       "epoch": 0.03776,
-      "grad_norm": 140764.03125,
       "learning_rate": 1.507936507936508e-06,
-      "loss": 0.7065,
       "step": 590
     },
     {
       "epoch": 0.0384,
-      "grad_norm": 100877.296875,
       "learning_rate": 1.5335381464413722e-06,
-      "loss": 0.7096,
       "step": 600
     }
   ],
@@ -456,7 +456,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5049397152295680.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

     },
     {
       "epoch": 0.01344,
+      "grad_norm": 144219.625,
       "learning_rate": 5.350742447516642e-07,
+      "loss": 0.7218,
       "step": 210
     },
     {
       "epoch": 0.01408,
+      "grad_norm": 105046.0234375,
       "learning_rate": 5.606758832565284e-07,
+      "loss": 0.718,
       "step": 220
     },
     {
       "epoch": 0.01472,
+      "grad_norm": 126142.4296875,
       "learning_rate": 5.862775217613928e-07,
+      "loss": 0.7107,
       "step": 230
     },
     {
       "epoch": 0.01536,
+      "grad_norm": 92423.2265625,
       "learning_rate": 6.118791602662571e-07,
+      "loss": 0.7271,
       "step": 240
     },
     {
       "epoch": 0.016,
+      "grad_norm": 98091.828125,
       "learning_rate": 6.374807987711214e-07,
+      "loss": 0.7123,
       "step": 250
     },
     {
       "epoch": 0.01664,
+      "grad_norm": 131949.578125,
       "learning_rate": 6.630824372759858e-07,
+      "loss": 0.7204,
       "step": 260
     },
     {
       "epoch": 0.01728,
+      "grad_norm": 112228.5625,
       "learning_rate": 6.8868407578085e-07,
+      "loss": 0.722,
       "step": 270
     },
     {
       "epoch": 0.01792,
+      "grad_norm": 64587.734375,
       "learning_rate": 7.142857142857143e-07,
+      "loss": 0.7263,
       "step": 280
     },
     {
       "epoch": 0.01856,
+      "grad_norm": 99893.203125,
       "learning_rate": 7.398873527905787e-07,
+      "loss": 0.7169,
       "step": 290
     },
     {
       "epoch": 0.0192,
+      "grad_norm": 135749.875,
       "learning_rate": 7.65488991295443e-07,
       "loss": 0.7122,
       "step": 300
     },
     {
       "epoch": 0.01984,
+      "grad_norm": 103292.5703125,
       "learning_rate": 7.910906298003073e-07,
+      "loss": 0.7183,
       "step": 310
     },
     {
       "epoch": 0.02048,
+      "grad_norm": 86927.28125,
       "learning_rate": 8.166922683051716e-07,
+      "loss": 0.7192,
       "step": 320
     },
     {
       "epoch": 0.02112,
+      "grad_norm": 153738.390625,
       "learning_rate": 8.422939068100359e-07,
+      "loss": 0.711,
       "step": 330
     },
     {
       "epoch": 0.02176,
+      "grad_norm": 69994.7734375,
       "learning_rate": 8.678955453149002e-07,
+      "loss": 0.7176,
       "step": 340
     },
     {
       "epoch": 0.0224,
+      "grad_norm": 141370.6875,
       "learning_rate": 8.934971838197646e-07,
+      "loss": 0.7105,
       "step": 350
     },
     {
       "epoch": 0.02304,
+      "grad_norm": 71139.453125,
       "learning_rate": 9.190988223246289e-07,
+      "loss": 0.7126,
       "step": 360
     },
     {
       "epoch": 0.02368,
+      "grad_norm": 82039.1953125,
       "learning_rate": 9.447004608294931e-07,
+      "loss": 0.7078,
       "step": 370
     },
     {
       "epoch": 0.02432,
+      "grad_norm": 71275.7890625,
       "learning_rate": 9.703020993343575e-07,
+      "loss": 0.7145,
       "step": 380
     },
     {
       "epoch": 0.02496,
+      "grad_norm": 145801.21875,
       "learning_rate": 9.959037378392218e-07,
+      "loss": 0.7102,
       "step": 390
     },
     {
       "epoch": 0.0256,
+      "grad_norm": 171507.0,
       "learning_rate": 1.021505376344086e-06,
+      "loss": 0.7123,
       "step": 400
     },
     {
       "epoch": 0.02624,
+      "grad_norm": 79134.203125,
       "learning_rate": 1.0471070148489503e-06,
+      "loss": 0.7083,
       "step": 410
     },
     {
       "epoch": 0.02688,
+      "grad_norm": 69231.640625,
       "learning_rate": 1.0727086533538148e-06,
+      "loss": 0.7105,
       "step": 420
     },
     {
       "epoch": 0.02752,
+      "grad_norm": 113099.3984375,
       "learning_rate": 1.0983102918586791e-06,
+      "loss": 0.7141,
       "step": 430
     },
     {
       "epoch": 0.02816,
+      "grad_norm": 121013.734375,
       "learning_rate": 1.1239119303635434e-06,
+      "loss": 0.7146,
       "step": 440
     },
     {
       "epoch": 0.0288,
+      "grad_norm": 89184.609375,
       "learning_rate": 1.1495135688684077e-06,
+      "loss": 0.7133,
       "step": 450
     },
     {
       "epoch": 0.02944,
+      "grad_norm": 176246.890625,
       "learning_rate": 1.175115207373272e-06,
+      "loss": 0.7086,
       "step": 460
     },
     {
       "epoch": 0.03008,
+      "grad_norm": 88161.2265625,
       "learning_rate": 1.2007168458781362e-06,
+      "loss": 0.709,
       "step": 470
     },
     {
       "epoch": 0.03072,
+      "grad_norm": 74441.015625,
       "learning_rate": 1.2263184843830007e-06,
+      "loss": 0.7023,
       "step": 480
     },
     {
       "epoch": 0.03136,
+      "grad_norm": 96409.40625,
       "learning_rate": 1.251920122887865e-06,
+      "loss": 0.715,
       "step": 490
     },
     {
       "epoch": 0.032,
+      "grad_norm": 81090.6484375,
       "learning_rate": 1.2775217613927293e-06,
+      "loss": 0.7109,
       "step": 500
     },
     {
       "epoch": 0.03264,
+      "grad_norm": 98153.8828125,
       "learning_rate": 1.3031233998975938e-06,
+      "loss": 0.7092,
       "step": 510
     },
     {
       "epoch": 0.03328,
+      "grad_norm": 78782.546875,
       "learning_rate": 1.3287250384024578e-06,
+      "loss": 0.7048,
       "step": 520
     },
     {
       "epoch": 0.03392,
+      "grad_norm": 110360.5,
       "learning_rate": 1.354326676907322e-06,
+      "loss": 0.7108,
       "step": 530
     },
     {
       "epoch": 0.03456,
+      "grad_norm": 88462.0703125,
       "learning_rate": 1.3799283154121864e-06,
+      "loss": 0.7041,
       "step": 540
     },
     {
       "epoch": 0.0352,
+      "grad_norm": 97624.7421875,
       "learning_rate": 1.4055299539170509e-06,
+      "loss": 0.7114,
       "step": 550
     },
     {
       "epoch": 0.03584,
+      "grad_norm": 99471.4375,
       "learning_rate": 1.4311315924219151e-06,
+      "loss": 0.7191,
       "step": 560
     },
     {
       "epoch": 0.03648,
+      "grad_norm": 79087.90625,
       "learning_rate": 1.4567332309267796e-06,
+      "loss": 0.7022,
       "step": 570
     },
     {
       "epoch": 0.03712,
+      "grad_norm": 65275.0,
       "learning_rate": 1.4823348694316437e-06,
+      "loss": 0.7088,
       "step": 580
     },
     {
       "epoch": 0.03776,
+      "grad_norm": 153826.28125,
       "learning_rate": 1.507936507936508e-06,
+      "loss": 0.7079,
       "step": 590
     },
     {
       "epoch": 0.0384,
+      "grad_norm": 64280.38671875,
       "learning_rate": 1.5335381464413722e-06,
+      "loss": 0.7018,
       "step": 600
     }
   ],
       "attributes": {}
     }
   },
+  "total_flos": 5049545152264320.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

graphcodebert-robust/checkpoint-600/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b5fc4023adc281644437a690ea6a6001846d7762699cd428d4ad38e1888076db
 size 5841

 version https://git-lfs.github.com/spec/v1
+oid sha256:82e524f8f7de87947806acfb17c136195f8d3668b26513da260f1a2f14442156
 size 5841

graphcodebert-robust/checkpoint-800/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d19fdc7a5fa21c91052f15414ec14e1da4bbc85f75aa66510c1c463b2f14e2f6
 size 498612824

 version https://git-lfs.github.com/spec/v1
+oid sha256:c038fee615aa3289704b6c8446543a8902b07b09cc79c21ef54c5fe8590f914e
 size 498612824

graphcodebert-robust/checkpoint-800/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ddec1d294331a984f4091595913e06b171ba550334d359ca9c07a294409ad9c1
 size 4741923

 version https://git-lfs.github.com/spec/v1
+oid sha256:554c3a189d14a7538050afbd400501c37378790e4b17a4a388758bad08d098a0
 size 4741923

graphcodebert-robust/checkpoint-800/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:36c90ae3575630687b6a7d64bf93dded50adb1dbab4b74db0c9cdd2945f93577
 size 14581

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc41893e18124a5b4346e5ad2eec904a9b13636e7df7f9d4e28520206d9aac00
 size 14581

graphcodebert-robust/checkpoint-800/trainer_state.json CHANGED Viewed

@@ -151,422 +151,422 @@
     },
     {
       "epoch": 0.01344,
-      "grad_norm": 141396.296875,
       "learning_rate": 5.350742447516642e-07,
-      "loss": 0.7217,
       "step": 210
     },
     {
       "epoch": 0.01408,
-      "grad_norm": 102339.1640625,
       "learning_rate": 5.606758832565284e-07,
-      "loss": 0.7215,
       "step": 220
     },
     {
       "epoch": 0.01472,
-      "grad_norm": 134052.9375,
       "learning_rate": 5.862775217613928e-07,
-      "loss": 0.7115,
       "step": 230
     },
     {
       "epoch": 0.01536,
-      "grad_norm": 87181.984375,
       "learning_rate": 6.118791602662571e-07,
-      "loss": 0.7241,
       "step": 240
     },
     {
       "epoch": 0.016,
-      "grad_norm": 100231.328125,
       "learning_rate": 6.374807987711214e-07,
-      "loss": 0.71,
       "step": 250
     },
     {
       "epoch": 0.01664,
-      "grad_norm": 136721.484375,
       "learning_rate": 6.630824372759858e-07,
-      "loss": 0.7188,
       "step": 260
     },
     {
       "epoch": 0.01728,
-      "grad_norm": 115868.8125,
       "learning_rate": 6.8868407578085e-07,
-      "loss": 0.7199,
       "step": 270
     },
     {
       "epoch": 0.01792,
-      "grad_norm": 70205.1484375,
       "learning_rate": 7.142857142857143e-07,
-      "loss": 0.7299,
       "step": 280
     },
     {
       "epoch": 0.01856,
-      "grad_norm": 98926.4453125,
       "learning_rate": 7.398873527905787e-07,
-      "loss": 0.7159,
       "step": 290
     },
     {
       "epoch": 0.0192,
-      "grad_norm": 134108.140625,
       "learning_rate": 7.65488991295443e-07,
       "loss": 0.7122,
       "step": 300
     },
     {
       "epoch": 0.01984,
-      "grad_norm": 103719.140625,
       "learning_rate": 7.910906298003073e-07,
-      "loss": 0.7185,
       "step": 310
     },
     {
       "epoch": 0.02048,
-      "grad_norm": 85624.953125,
       "learning_rate": 8.166922683051716e-07,
-      "loss": 0.718,
       "step": 320
     },
     {
       "epoch": 0.02112,
-      "grad_norm": 138824.15625,
       "learning_rate": 8.422939068100359e-07,
-      "loss": 0.713,
       "step": 330
     },
     {
       "epoch": 0.02176,
-      "grad_norm": 73629.0859375,
       "learning_rate": 8.678955453149002e-07,
-      "loss": 0.7186,
       "step": 340
     },
     {
       "epoch": 0.0224,
-      "grad_norm": 132493.0,
       "learning_rate": 8.934971838197646e-07,
-      "loss": 0.7133,
       "step": 350
     },
     {
       "epoch": 0.02304,
-      "grad_norm": 85223.625,
       "learning_rate": 9.190988223246289e-07,
-      "loss": 0.7124,
       "step": 360
     },
     {
       "epoch": 0.02368,
-      "grad_norm": 77868.78125,
       "learning_rate": 9.447004608294931e-07,
-      "loss": 0.7058,
       "step": 370
     },
     {
       "epoch": 0.02432,
-      "grad_norm": 75874.3046875,
       "learning_rate": 9.703020993343575e-07,
-      "loss": 0.7139,
       "step": 380
     },
     {
       "epoch": 0.02496,
-      "grad_norm": 151937.703125,
       "learning_rate": 9.959037378392218e-07,
-      "loss": 0.713,
       "step": 390
     },
     {
       "epoch": 0.0256,
-      "grad_norm": 161711.671875,
       "learning_rate": 1.021505376344086e-06,
-      "loss": 0.7137,
       "step": 400
     },
     {
       "epoch": 0.02624,
-      "grad_norm": 90800.234375,
       "learning_rate": 1.0471070148489503e-06,
-      "loss": 0.7091,
       "step": 410
     },
     {
       "epoch": 0.02688,
-      "grad_norm": 82131.34375,
       "learning_rate": 1.0727086533538148e-06,
-      "loss": 0.7098,
       "step": 420
     },
     {
       "epoch": 0.02752,
-      "grad_norm": 92818.9140625,
       "learning_rate": 1.0983102918586791e-06,
-      "loss": 0.7099,
       "step": 430
     },
     {
       "epoch": 0.02816,
-      "grad_norm": 88555.5078125,
       "learning_rate": 1.1239119303635434e-06,
-      "loss": 0.7086,
       "step": 440
     },
     {
       "epoch": 0.0288,
-      "grad_norm": 73428.6015625,
       "learning_rate": 1.1495135688684077e-06,
-      "loss": 0.7117,
       "step": 450
     },
     {
       "epoch": 0.02944,
-      "grad_norm": 128938.7421875,
       "learning_rate": 1.175115207373272e-06,
-      "loss": 0.7182,
       "step": 460
     },
     {
       "epoch": 0.03008,
-      "grad_norm": 102742.3359375,
       "learning_rate": 1.2007168458781362e-06,
-      "loss": 0.7108,
       "step": 470
     },
     {
       "epoch": 0.03072,
-      "grad_norm": 73825.8125,
       "learning_rate": 1.2263184843830007e-06,
-      "loss": 0.7087,
       "step": 480
     },
     {
       "epoch": 0.03136,
-      "grad_norm": 110930.75,
       "learning_rate": 1.251920122887865e-06,
-      "loss": 0.7232,
       "step": 490
     },
     {
       "epoch": 0.032,
-      "grad_norm": 95068.84375,
       "learning_rate": 1.2775217613927293e-06,
-      "loss": 0.703,
       "step": 500
     },
     {
       "epoch": 0.03264,
-      "grad_norm": 118731.9296875,
       "learning_rate": 1.3031233998975938e-06,
-      "loss": 0.7063,
       "step": 510
     },
     {
       "epoch": 0.03328,
-      "grad_norm": 80511.828125,
       "learning_rate": 1.3287250384024578e-06,
-      "loss": 0.7143,
       "step": 520
     },
     {
       "epoch": 0.03392,
-      "grad_norm": 84864.484375,
       "learning_rate": 1.354326676907322e-06,
-      "loss": 0.7055,
       "step": 530
     },
     {
       "epoch": 0.03456,
-      "grad_norm": 107800.109375,
       "learning_rate": 1.3799283154121864e-06,
-      "loss": 0.7119,
       "step": 540
     },
     {
       "epoch": 0.0352,
-      "grad_norm": 83667.671875,
       "learning_rate": 1.4055299539170509e-06,
-      "loss": 0.7082,
       "step": 550
     },
     {
       "epoch": 0.03584,
-      "grad_norm": 75656.4140625,
       "learning_rate": 1.4311315924219151e-06,
-      "loss": 0.7062,
       "step": 560
     },
     {
       "epoch": 0.03648,
-      "grad_norm": 79985.875,
       "learning_rate": 1.4567332309267796e-06,
-      "loss": 0.7155,
       "step": 570
     },
     {
       "epoch": 0.03712,
-      "grad_norm": 76334.078125,
       "learning_rate": 1.4823348694316437e-06,
-      "loss": 0.7075,
       "step": 580
     },
     {
       "epoch": 0.03776,
-      "grad_norm": 140764.03125,
       "learning_rate": 1.507936507936508e-06,
-      "loss": 0.7065,
       "step": 590
     },
     {
       "epoch": 0.0384,
-      "grad_norm": 100877.296875,
       "learning_rate": 1.5335381464413722e-06,
-      "loss": 0.7096,
       "step": 600
     },
     {
       "epoch": 0.03904,
-      "grad_norm": 104088.1171875,
       "learning_rate": 1.5591397849462367e-06,
-      "loss": 0.6987,
       "step": 610
     },
     {
       "epoch": 0.03968,
-      "grad_norm": 80806.2265625,
       "learning_rate": 1.584741423451101e-06,
-      "loss": 0.707,
       "step": 620
     },
     {
       "epoch": 0.04032,
-      "grad_norm": 109884.765625,
       "learning_rate": 1.6103430619559655e-06,
-      "loss": 0.6991,
       "step": 630
     },
     {
       "epoch": 0.04096,
-      "grad_norm": 79944.890625,
       "learning_rate": 1.6359447004608298e-06,
-      "loss": 0.7047,
       "step": 640
     },
     {
       "epoch": 0.0416,
-      "grad_norm": 93673.3828125,
       "learning_rate": 1.6615463389656938e-06,
-      "loss": 0.6971,
       "step": 650
     },
     {
       "epoch": 0.04224,
-      "grad_norm": 76641.265625,
       "learning_rate": 1.6871479774705581e-06,
-      "loss": 0.6957,
       "step": 660
     },
     {
       "epoch": 0.04288,
-      "grad_norm": 73583.5546875,
       "learning_rate": 1.7127496159754226e-06,
-      "loss": 0.7028,
       "step": 670
     },
     {
       "epoch": 0.04352,
-      "grad_norm": 75177.9609375,
       "learning_rate": 1.7383512544802869e-06,
-      "loss": 0.7012,
       "step": 680
     },
     {
       "epoch": 0.04416,
-      "grad_norm": 78340.8515625,
       "learning_rate": 1.7639528929851512e-06,
-      "loss": 0.6987,
       "step": 690
     },
     {
       "epoch": 0.0448,
-      "grad_norm": 86004.1171875,
       "learning_rate": 1.7895545314900157e-06,
-      "loss": 0.7061,
       "step": 700
     },
     {
       "epoch": 0.04544,
-      "grad_norm": 94212.0390625,
       "learning_rate": 1.8151561699948797e-06,
-      "loss": 0.6993,
       "step": 710
     },
     {
       "epoch": 0.04608,
-      "grad_norm": 83918.2421875,
       "learning_rate": 1.840757808499744e-06,
-      "loss": 0.7009,
       "step": 720
     },
     {
       "epoch": 0.04672,
-      "grad_norm": 68374.3125,
       "learning_rate": 1.8663594470046085e-06,
-      "loss": 0.6964,
       "step": 730
     },
     {
       "epoch": 0.04736,
-      "grad_norm": 90348.78125,
       "learning_rate": 1.8919610855094728e-06,
-      "loss": 0.7011,
       "step": 740
     },
     {
       "epoch": 0.048,
-      "grad_norm": 146658.0,
       "learning_rate": 1.9175627240143373e-06,
-      "loss": 0.7003,
       "step": 750
     },
     {
       "epoch": 0.04864,
-      "grad_norm": 112037.1640625,
       "learning_rate": 1.9431643625192015e-06,
-      "loss": 0.7051,
       "step": 760
     },
     {
       "epoch": 0.04928,
-      "grad_norm": 70628.625,
       "learning_rate": 1.9687660010240654e-06,
-      "loss": 0.6923,
       "step": 770
     },
     {
       "epoch": 0.04992,
-      "grad_norm": 109922.125,
       "learning_rate": 1.99436763952893e-06,
-      "loss": 0.6893,
       "step": 780
     },
     {
       "epoch": 0.05056,
-      "grad_norm": 135306.375,
       "learning_rate": 2.0199692780337944e-06,
-      "loss": 0.7008,
       "step": 790
     },
     {
       "epoch": 0.0512,
-      "grad_norm": 82354.8046875,
       "learning_rate": 2.0455709165386586e-06,
-      "loss": 0.705,
       "step": 800
     }
   ],
@@ -596,7 +596,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6733094128867200.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

     },
     {
       "epoch": 0.01344,
+      "grad_norm": 144219.625,
       "learning_rate": 5.350742447516642e-07,
+      "loss": 0.7218,
       "step": 210
     },
     {
       "epoch": 0.01408,
+      "grad_norm": 105046.0234375,
       "learning_rate": 5.606758832565284e-07,
+      "loss": 0.718,
       "step": 220
     },
     {
       "epoch": 0.01472,
+      "grad_norm": 126142.4296875,
       "learning_rate": 5.862775217613928e-07,
+      "loss": 0.7107,
       "step": 230
     },
     {
       "epoch": 0.01536,
+      "grad_norm": 92423.2265625,
       "learning_rate": 6.118791602662571e-07,
+      "loss": 0.7271,
       "step": 240
     },
     {
       "epoch": 0.016,
+      "grad_norm": 98091.828125,
       "learning_rate": 6.374807987711214e-07,
+      "loss": 0.7123,
       "step": 250
     },
     {
       "epoch": 0.01664,
+      "grad_norm": 131949.578125,
       "learning_rate": 6.630824372759858e-07,
+      "loss": 0.7204,
       "step": 260
     },
     {
       "epoch": 0.01728,
+      "grad_norm": 112228.5625,
       "learning_rate": 6.8868407578085e-07,
+      "loss": 0.722,
       "step": 270
     },
     {
       "epoch": 0.01792,
+      "grad_norm": 64587.734375,
       "learning_rate": 7.142857142857143e-07,
+      "loss": 0.7263,
       "step": 280
     },
     {
       "epoch": 0.01856,
+      "grad_norm": 99893.203125,
       "learning_rate": 7.398873527905787e-07,
+      "loss": 0.7169,
       "step": 290
     },
     {
       "epoch": 0.0192,
+      "grad_norm": 135749.875,
       "learning_rate": 7.65488991295443e-07,
       "loss": 0.7122,
       "step": 300
     },
     {
       "epoch": 0.01984,
+      "grad_norm": 103292.5703125,
       "learning_rate": 7.910906298003073e-07,
+      "loss": 0.7183,
       "step": 310
     },
     {
       "epoch": 0.02048,
+      "grad_norm": 86927.28125,
       "learning_rate": 8.166922683051716e-07,
+      "loss": 0.7192,
       "step": 320
     },
     {
       "epoch": 0.02112,
+      "grad_norm": 153738.390625,
       "learning_rate": 8.422939068100359e-07,
+      "loss": 0.711,
       "step": 330
     },
     {
       "epoch": 0.02176,
+      "grad_norm": 69994.7734375,
       "learning_rate": 8.678955453149002e-07,
+      "loss": 0.7176,
       "step": 340
     },
     {
       "epoch": 0.0224,
+      "grad_norm": 141370.6875,
       "learning_rate": 8.934971838197646e-07,
+      "loss": 0.7105,
       "step": 350
     },
     {
       "epoch": 0.02304,
+      "grad_norm": 71139.453125,
       "learning_rate": 9.190988223246289e-07,
+      "loss": 0.7126,
       "step": 360
     },
     {
       "epoch": 0.02368,
+      "grad_norm": 82039.1953125,
       "learning_rate": 9.447004608294931e-07,
+      "loss": 0.7078,
       "step": 370
     },
     {
       "epoch": 0.02432,
+      "grad_norm": 71275.7890625,
       "learning_rate": 9.703020993343575e-07,
+      "loss": 0.7145,
       "step": 380
     },
     {
       "epoch": 0.02496,
+      "grad_norm": 145801.21875,
       "learning_rate": 9.959037378392218e-07,
+      "loss": 0.7102,
       "step": 390
     },
     {
       "epoch": 0.0256,
+      "grad_norm": 171507.0,
       "learning_rate": 1.021505376344086e-06,
+      "loss": 0.7123,
       "step": 400
     },
     {
       "epoch": 0.02624,
+      "grad_norm": 79134.203125,
       "learning_rate": 1.0471070148489503e-06,
+      "loss": 0.7083,
       "step": 410
     },
     {
       "epoch": 0.02688,
+      "grad_norm": 69231.640625,
       "learning_rate": 1.0727086533538148e-06,
+      "loss": 0.7105,
       "step": 420
     },
     {
       "epoch": 0.02752,
+      "grad_norm": 113099.3984375,
       "learning_rate": 1.0983102918586791e-06,
+      "loss": 0.7141,
       "step": 430
     },
     {
       "epoch": 0.02816,
+      "grad_norm": 121013.734375,
       "learning_rate": 1.1239119303635434e-06,
+      "loss": 0.7146,
       "step": 440
     },
     {
       "epoch": 0.0288,
+      "grad_norm": 89184.609375,
       "learning_rate": 1.1495135688684077e-06,
+      "loss": 0.7133,
       "step": 450
     },
     {
       "epoch": 0.02944,
+      "grad_norm": 176246.890625,
       "learning_rate": 1.175115207373272e-06,
+      "loss": 0.7086,
       "step": 460
     },
     {
       "epoch": 0.03008,
+      "grad_norm": 88161.2265625,
       "learning_rate": 1.2007168458781362e-06,
+      "loss": 0.709,
       "step": 470
     },
     {
       "epoch": 0.03072,
+      "grad_norm": 74441.015625,
       "learning_rate": 1.2263184843830007e-06,
+      "loss": 0.7023,
       "step": 480
     },
     {
       "epoch": 0.03136,
+      "grad_norm": 96409.40625,
       "learning_rate": 1.251920122887865e-06,
+      "loss": 0.715,
       "step": 490
     },
     {
       "epoch": 0.032,
+      "grad_norm": 81090.6484375,
       "learning_rate": 1.2775217613927293e-06,
+      "loss": 0.7109,
       "step": 500
     },
     {
       "epoch": 0.03264,
+      "grad_norm": 98153.8828125,
       "learning_rate": 1.3031233998975938e-06,
+      "loss": 0.7092,
       "step": 510
     },
     {
       "epoch": 0.03328,
+      "grad_norm": 78782.546875,
       "learning_rate": 1.3287250384024578e-06,
+      "loss": 0.7048,
       "step": 520
     },
     {
       "epoch": 0.03392,
+      "grad_norm": 110360.5,
       "learning_rate": 1.354326676907322e-06,
+      "loss": 0.7108,
       "step": 530
     },
     {
       "epoch": 0.03456,
+      "grad_norm": 88462.0703125,
       "learning_rate": 1.3799283154121864e-06,
+      "loss": 0.7041,
       "step": 540
     },
     {
       "epoch": 0.0352,
+      "grad_norm": 97624.7421875,
       "learning_rate": 1.4055299539170509e-06,
+      "loss": 0.7114,
       "step": 550
     },
     {
       "epoch": 0.03584,
+      "grad_norm": 99471.4375,
       "learning_rate": 1.4311315924219151e-06,
+      "loss": 0.7191,
       "step": 560
     },
     {
       "epoch": 0.03648,
+      "grad_norm": 79087.90625,
       "learning_rate": 1.4567332309267796e-06,
+      "loss": 0.7022,
       "step": 570
     },
     {
       "epoch": 0.03712,
+      "grad_norm": 65275.0,
       "learning_rate": 1.4823348694316437e-06,
+      "loss": 0.7088,
       "step": 580
     },
     {
       "epoch": 0.03776,
+      "grad_norm": 153826.28125,
       "learning_rate": 1.507936507936508e-06,
+      "loss": 0.7079,
       "step": 590
     },
     {
       "epoch": 0.0384,
+      "grad_norm": 64280.38671875,
       "learning_rate": 1.5335381464413722e-06,
+      "loss": 0.7018,
       "step": 600
     },
     {
       "epoch": 0.03904,
+      "grad_norm": 65060.80078125,
       "learning_rate": 1.5591397849462367e-06,
+      "loss": 0.7027,
       "step": 610
     },
     {
       "epoch": 0.03968,
+      "grad_norm": 77339.2890625,
       "learning_rate": 1.584741423451101e-06,
+      "loss": 0.7038,
       "step": 620
     },
     {
       "epoch": 0.04032,
+      "grad_norm": 123140.5546875,
       "learning_rate": 1.6103430619559655e-06,
+      "loss": 0.7019,
       "step": 630
     },
     {
       "epoch": 0.04096,
+      "grad_norm": 67502.71875,
       "learning_rate": 1.6359447004608298e-06,
+      "loss": 0.7094,
       "step": 640
     },
     {
       "epoch": 0.0416,
+      "grad_norm": 95452.1796875,
       "learning_rate": 1.6615463389656938e-06,
+      "loss": 0.6998,
       "step": 650
     },
     {
       "epoch": 0.04224,
+      "grad_norm": 68556.421875,
       "learning_rate": 1.6871479774705581e-06,
+      "loss": 0.694,
       "step": 660
     },
     {
       "epoch": 0.04288,
+      "grad_norm": 78265.8046875,
       "learning_rate": 1.7127496159754226e-06,
+      "loss": 0.7051,
       "step": 670
     },
     {
       "epoch": 0.04352,
+      "grad_norm": 93559.3359375,
       "learning_rate": 1.7383512544802869e-06,
+      "loss": 0.6997,
       "step": 680
     },
     {
       "epoch": 0.04416,
+      "grad_norm": 88091.9375,
       "learning_rate": 1.7639528929851512e-06,
+      "loss": 0.6963,
       "step": 690
     },
     {
       "epoch": 0.0448,
+      "grad_norm": 73024.359375,
       "learning_rate": 1.7895545314900157e-06,
+      "loss": 0.7021,
       "step": 700
     },
     {
       "epoch": 0.04544,
+      "grad_norm": 100058.2890625,
       "learning_rate": 1.8151561699948797e-06,
+      "loss": 0.7022,
       "step": 710
     },
     {
       "epoch": 0.04608,
+      "grad_norm": 99197.1953125,
       "learning_rate": 1.840757808499744e-06,
+      "loss": 0.7017,
       "step": 720
     },
     {
       "epoch": 0.04672,
+      "grad_norm": 102018.984375,
       "learning_rate": 1.8663594470046085e-06,
+      "loss": 0.6985,
       "step": 730
     },
     {
       "epoch": 0.04736,
+      "grad_norm": 101586.0234375,
       "learning_rate": 1.8919610855094728e-06,
+      "loss": 0.6991,
       "step": 740
     },
     {
       "epoch": 0.048,
+      "grad_norm": 151948.25,
       "learning_rate": 1.9175627240143373e-06,
+      "loss": 0.6977,
       "step": 750
     },
     {
       "epoch": 0.04864,
+      "grad_norm": 88698.7109375,
       "learning_rate": 1.9431643625192015e-06,
+      "loss": 0.6961,
       "step": 760
     },
     {
       "epoch": 0.04928,
+      "grad_norm": 82451.9296875,
       "learning_rate": 1.9687660010240654e-06,
+      "loss": 0.6898,
       "step": 770
     },
     {
       "epoch": 0.04992,
+      "grad_norm": 82236.453125,
       "learning_rate": 1.99436763952893e-06,
+      "loss": 0.6886,
       "step": 780
     },
     {
       "epoch": 0.05056,
+      "grad_norm": 155064.484375,
       "learning_rate": 2.0199692780337944e-06,
+      "loss": 0.6921,
       "step": 790
     },
     {
       "epoch": 0.0512,
+      "grad_norm": 72238.6328125,
       "learning_rate": 2.0455709165386586e-06,
+      "loss": 0.6932,
       "step": 800
     }
   ],
       "attributes": {}
     }
   },
+  "total_flos": 6733455906568320.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

graphcodebert-robust/checkpoint-800/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b5fc4023adc281644437a690ea6a6001846d7762699cd428d4ad38e1888076db
 size 5841

 version https://git-lfs.github.com/spec/v1
+oid sha256:82e524f8f7de87947806acfb17c136195f8d3668b26513da260f1a2f14442156
 size 5841

graphcodebert-robust/training.log CHANGED Viewed

@@ -1,10 +1,34 @@
-2026-04-16 10:18:35,455 - INFO - train_pipeline - Logging to ./output_checkpoints/graphcodebert-robust/training.log
-2026-04-16 10:18:35,457 - INFO - train_pipeline - Training config: TrainConfig(model_name='microsoft/graphcodebert-base', output_dir='./output_checkpoints/graphcodebert-robust', num_epochs=5, batch_size=32, learning_rate=2e-05, max_length=512, num_labels=2, use_wandb=True, freeze_base=True, loss_type='r-drop', focal_alpha=1.0, focal_gamma=2.0, r_drop_alpha=4.0, infonce_temperature=0.07, infonce_weight=0.5, seed=42, resume_from_checkpoint='output_checkpoints/graphcodebert-robust/checkpoint-1000', label_smoothing=0.1, adversarial_epsilon=0.5, use_swa=True, swa_start_epoch=2, swa_lr=1e-05, data_augmentation=True, aug_rename_prob=0.3, aug_format_prob=0.3, device=device(type='cuda'))
-2026-04-16 10:18:35,458 - INFO - train_pipeline - Loading model & tokenizer for 'microsoft/graphcodebert-base'
-2026-04-16 10:18:36,698 - INFO - train_pipeline - Model placed on cuda
-2026-04-16 10:18:36,701 - INFO - train_pipeline - Base model weights frozen – only classifier head will be trained.
-2026-04-16 10:18:36,702 - INFO - train_pipeline - ===== Model Architecture =====
-2026-04-16 10:18:36,705 - INFO - train_pipeline -
 RobertaForSequenceClassification(
   (roberta): RobertaModel(
     (embeddings): RobertaEmbeddings(
@@ -12,7 +36,7 @@ RobertaForSequenceClassification(
       (position_embeddings): Embedding(514, 768, padding_idx=1)
       (token_type_embeddings): Embedding(1, 768)
       (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
-      (dropout): Dropout(p=0.2, inplace=False)
     )
     (encoder): RobertaEncoder(
       (layer): ModuleList(
@@ -22,12 +46,12 @@ RobertaForSequenceClassification(
               (query): Linear(in_features=768, out_features=768, bias=True)
               (key): Linear(in_features=768, out_features=768, bias=True)
               (value): Linear(in_features=768, out_features=768, bias=True)
-              (dropout): Dropout(p=0.2, inplace=False)
             )
             (output): RobertaSelfOutput(
               (dense): Linear(in_features=768, out_features=768, bias=True)
               (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
-              (dropout): Dropout(p=0.2, inplace=False)
             )
           )
           (intermediate): RobertaIntermediate(
@@ -37,7 +61,7 @@ RobertaForSequenceClassification(
           (output): RobertaOutput(
             (dense): Linear(in_features=3072, out_features=768, bias=True)
             (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
-            (dropout): Dropout(p=0.2, inplace=False)
           )
         )
       )
@@ -45,16 +69,16 @@ RobertaForSequenceClassification(
   )
   (classifier): RobertaClassificationHead(
     (dense): Linear(in_features=768, out_features=768, bias=True)
-    (dropout): Dropout(p=0.2, inplace=False)
     (out_proj): Linear(in_features=768, out_features=2, bias=True)
   )
 )
-2026-04-16 10:18:36,707 - INFO - train_pipeline - ===== Parameter Summary =====
-2026-04-16 10:18:36,709 - INFO - train_pipeline - Total Parameters:         124,647,170
-2026-04-16 10:18:36,711 - INFO - train_pipeline - Trainable Parameters:     592,130
-2026-04-16 10:18:36,712 - INFO - train_pipeline - Non-trainable Parameters: 124,055,040
-2026-04-16 10:18:36,713 - INFO - train_pipeline - ===== Tokenizer Summary =====
-2026-04-16 10:18:36,732 - INFO - train_pipeline - Vocab size: 50265 | Special tokens: ['<s>', '</s>', '<unk>', '<pad>', '<mask>']
-2026-04-16 10:18:36,734 - INFO - train_pipeline - ===== End of Architecture Log =====
-2026-04-16 10:18:36,735 - INFO - train_pipeline - Data augmentation enabled (rename=0.3, format=0.3)
-2026-04-16 10:18:38,005 - INFO - train_pipeline - === Starting training with robust regularisation ===

+2026-04-17 08:00:34,522 - INFO - train_pipeline - Logging to ./output_checkpoints/graphcodebert-robust/training.log
+2026-04-17 08:00:34,525 - INFO - train_pipeline - ===== Training Configuration =====
+2026-04-17 08:00:34,526 - INFO - train_pipeline - model_name           : microsoft/graphcodebert-base
+2026-04-17 08:00:34,528 - INFO - train_pipeline - output_dir           : ./output_checkpoints/graphcodebert-robust
+2026-04-17 08:00:34,529 - INFO - train_pipeline - num_epochs           : 5
+2026-04-17 08:00:34,531 - INFO - train_pipeline - batch_size           : 32
+2026-04-17 08:00:34,533 - INFO - train_pipeline - learning_rate        : 2e-05
+2026-04-17 08:00:34,535 - INFO - train_pipeline - max_length           : 512
+2026-04-17 08:00:34,536 - INFO - train_pipeline - num_labels           : 2
+2026-04-17 08:00:34,538 - INFO - train_pipeline - use_wandb            : True
+2026-04-17 08:00:34,540 - INFO - train_pipeline - freeze_base          : True
+2026-04-17 08:00:34,541 - INFO - train_pipeline - loss_type            : r-drop
+2026-04-17 08:00:34,542 - INFO - train_pipeline - focal_alpha          : 1.0
+2026-04-17 08:00:34,544 - INFO - train_pipeline - focal_gamma          : 2.0
+2026-04-17 08:00:34,545 - INFO - train_pipeline - r_drop_alpha         : 4.0
+2026-04-17 08:00:34,546 - INFO - train_pipeline - infonce_temperature  : 0.07
+2026-04-17 08:00:34,548 - INFO - train_pipeline - infonce_weight       : 0.5
+2026-04-17 08:00:34,550 - INFO - train_pipeline - seed                 : 42
+2026-04-17 08:00:34,552 - INFO - train_pipeline - resume_from_checkpoint : None
+2026-04-17 08:00:34,553 - INFO - train_pipeline - label_smoothing      : 0.1
+2026-04-17 08:00:34,554 - INFO - train_pipeline - adversarial_epsilon  : 0.5
+2026-04-17 08:00:34,556 - INFO - train_pipeline - use_swa              : True
+2026-04-17 08:00:34,557 - INFO - train_pipeline - swa_start_epoch      : 2
+2026-04-17 08:00:34,558 - INFO - train_pipeline - swa_lr               : 1e-05
+2026-04-17 08:00:34,559 - INFO - train_pipeline - data_augmentation    : True
+2026-04-17 08:00:34,561 - INFO - train_pipeline - aug_rename_prob      : 0.3
+2026-04-17 08:00:34,562 - INFO - train_pipeline - aug_format_prob      : 0.3
+2026-04-17 08:00:34,564 - INFO - train_pipeline - =================================
+2026-04-17 08:00:35,711 - INFO - train_pipeline - Model placed on cuda
+2026-04-17 08:00:35,716 - INFO - train_pipeline - ===== Model Architecture =====
+2026-04-17 08:00:35,718 - INFO - train_pipeline -
 RobertaForSequenceClassification(
   (roberta): RobertaModel(
     (embeddings): RobertaEmbeddings(
       (position_embeddings): Embedding(514, 768, padding_idx=1)
       (token_type_embeddings): Embedding(1, 768)
       (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+      (dropout): Dropout(p=0.1, inplace=False)
     )
     (encoder): RobertaEncoder(
       (layer): ModuleList(
               (query): Linear(in_features=768, out_features=768, bias=True)
               (key): Linear(in_features=768, out_features=768, bias=True)
               (value): Linear(in_features=768, out_features=768, bias=True)
+              (dropout): Dropout(p=0.1, inplace=False)
             )
             (output): RobertaSelfOutput(
               (dense): Linear(in_features=768, out_features=768, bias=True)
               (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
             )
           )
           (intermediate): RobertaIntermediate(
           (output): RobertaOutput(
             (dense): Linear(in_features=3072, out_features=768, bias=True)
             (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+            (dropout): Dropout(p=0.1, inplace=False)
           )
         )
       )
   )
   (classifier): RobertaClassificationHead(
     (dense): Linear(in_features=768, out_features=768, bias=True)
+    (dropout): Dropout(p=0.1, inplace=False)
     (out_proj): Linear(in_features=768, out_features=2, bias=True)
   )
 )
+2026-04-17 08:00:35,722 - INFO - train_pipeline - ===== Parameter Summary =====
+2026-04-17 08:00:35,723 - INFO - train_pipeline - Total Parameters:         124,647,170
+2026-04-17 08:00:35,724 - INFO - train_pipeline - Trainable Parameters:     592,130
+2026-04-17 08:00:35,725 - INFO - train_pipeline - Non-trainable Parameters: 124,055,040
+2026-04-17 08:00:35,727 - INFO - train_pipeline - ===== Tokenizer Summary =====
+2026-04-17 08:00:35,747 - INFO - train_pipeline - Vocab size: 50265 | Special tokens: ['<s>', '</s>', '<unk>', '<pad>', '<mask>']
+2026-04-17 08:00:35,749 - INFO - train_pipeline - ===== End of Architecture Log =====
+2026-04-17 08:00:35,751 - INFO - train_pipeline - Data augmentation enabled (rename=0.3, format=0.3)
+2026-04-17 08:00:36,645 - INFO - train_pipeline - === Starting training with robust regularisation ===