nvan15 commited on Jan 15

Commit

ade215c

verified ·

1 Parent(s): 27ff7e3

Batch upload part 8

Browse files

Files changed (50) hide show

nl_tasks/exps/run_ex28/ft/adapter_config.json +18 -0
nl_tasks/exps/run_ex28/ft/special_tokens_map.json +24 -0
nl_tasks/exps/run_ex28/ft/tokenizer.json +0 -0
nl_tasks/exps/run_ex28/ft/tokenizer.model +3 -0
nl_tasks/exps/run_ex28/ft/tokenizer_config.json +43 -0
nl_tasks/exps/run_ex28/ft2/adapter_config.json +18 -0
nl_tasks/exps/run_ex28/ft2/adapter_model.bin +3 -0
nl_tasks/exps/run_ex29/ft/adapter_config.json +18 -0
nl_tasks/exps/run_ex29/ft/special_tokens_map.json +24 -0
nl_tasks/exps/run_ex29/ft/tokenizer.json +0 -0
nl_tasks/exps/run_ex29/ft/tokenizer.model +3 -0
nl_tasks/exps/run_ex29/ft/tokenizer_config.json +43 -0
nl_tasks/exps/run_ex29/ft2/adapter_config.json +18 -0
nl_tasks/exps/run_ex29/ft2/adapter_model.bin +3 -0
nl_tasks/exps/run_ex29/trainer_state.json +505 -0
nl_tasks/exps/run_ex30/ft/adapter_config.json +18 -0
nl_tasks/exps/run_ex30/ft/special_tokens_map.json +24 -0
nl_tasks/exps/run_ex30/ft/tokenizer.json +0 -0
nl_tasks/exps/run_ex30/ft/tokenizer.model +3 -0
nl_tasks/exps/run_ex30/ft/tokenizer_config.json +43 -0
nl_tasks/exps/run_ex30/ft2/adapter_config.json +18 -0
nl_tasks/exps/run_ex30/ft2/adapter_model.bin +3 -0
nl_tasks/exps/run_ex30/trainer_state.json +505 -0
nl_tasks/exps/run_ex31/ft/adapter_config.json +18 -0
nl_tasks/exps/run_ex31/ft/special_tokens_map.json +24 -0
nl_tasks/exps/run_ex31/ft/tokenizer.json +0 -0
nl_tasks/exps/run_ex31/ft/tokenizer.model +3 -0
nl_tasks/exps/run_ex31/ft/tokenizer_config.json +43 -0
nl_tasks/exps/run_ex31/ft2/adapter_config.json +18 -0
nl_tasks/exps/run_ex31/ft2/adapter_model.bin +3 -0
nl_tasks/exps/run_ex31/trainer_state.json +743 -0
nl_tasks/exps/run_ex32/ft/adapter_config.json +18 -0
nl_tasks/exps/run_ex32/ft/special_tokens_map.json +24 -0
nl_tasks/exps/run_ex32/ft/tokenizer.json +0 -0
nl_tasks/exps/run_ex32/ft/tokenizer.model +3 -0
nl_tasks/exps/run_ex32/ft/tokenizer_config.json +43 -0
nl_tasks/exps/run_ex32/ft2/adapter_config.json +18 -0
nl_tasks/exps/run_ex32/ft2/adapter_model.bin +3 -0
nl_tasks/exps/run_ex32/trainer_state.json +743 -0
nl_tasks/exps/run_ex33/ft/adapter_config.json +18 -0
nl_tasks/exps/run_ex33/ft/special_tokens_map.json +24 -0
nl_tasks/exps/run_ex33/ft/tokenizer.json +0 -0
nl_tasks/exps/run_ex33/ft/tokenizer.model +3 -0
nl_tasks/exps/run_ex33/ft/tokenizer_config.json +43 -0
nl_tasks/exps/run_ex33/ft2/adapter_config.json +18 -0
nl_tasks/exps/run_ex33/ft2/adapter_model.bin +3 -0
nl_tasks/exps/run_ex33/trainer_state.json +743 -0
nl_tasks/exps/run_ex34/gsm8k.txt +1 -0
nl_tasks/exps/run_ex34/math.txt +1 -0
nl_tasks/exps/run_ex34/trainer_state.json +743 -0

nl_tasks/exps/run_ex28/ft/adapter_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "T": 1.0,
+  "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+  "bias": "none",
+  "inference_mode": false,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "num_rotations": 1,
+  "peft_type": "ROTATION",
+  "r": 16,
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj"
+  ],
+  "target_modules_to_skip": null,
+  "task_type": "CAUSAL_LM"
+}

nl_tasks/exps/run_ex28/ft/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<unk>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

nl_tasks/exps/run_ex28/ft/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

nl_tasks/exps/run_ex28/ft/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

nl_tasks/exps/run_ex28/ft/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "legacy": false,
+  "model_max_length": 512,
+  "pad_token": "<unk>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

nl_tasks/exps/run_ex28/ft2/adapter_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "T": 1.0,
+  "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+  "bias": "none",
+  "inference_mode": true,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "num_rotations": 1,
+  "peft_type": "ROTATION",
+  "r": 16,
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj"
+  ],
+  "target_modules_to_skip": null,
+  "task_type": "CAUSAL_LM"
+}

nl_tasks/exps/run_ex28/ft2/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b2ff3c37a243e0a7907b8e6da8bde1c03c0404c3c881e0b71b1698879447d68
+size 33602915

nl_tasks/exps/run_ex29/ft/adapter_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "T": 1.0,
+  "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+  "bias": "none",
+  "inference_mode": false,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "num_rotations": 1,
+  "peft_type": "ROTATION",
+  "r": 16,
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj"
+  ],
+  "target_modules_to_skip": null,
+  "task_type": "CAUSAL_LM"
+}

nl_tasks/exps/run_ex29/ft/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<unk>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

nl_tasks/exps/run_ex29/ft/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

nl_tasks/exps/run_ex29/ft/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

nl_tasks/exps/run_ex29/ft/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "legacy": false,
+  "model_max_length": 512,
+  "pad_token": "<unk>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

nl_tasks/exps/run_ex29/ft2/adapter_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "T": 1.0,
+  "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+  "bias": "none",
+  "inference_mode": true,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "num_rotations": 1,
+  "peft_type": "ROTATION",
+  "r": 16,
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj"
+  ],
+  "target_modules_to_skip": null,
+  "task_type": "CAUSAL_LM"
+}

nl_tasks/exps/run_ex29/ft2/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:84e3c739b20c3790118a8b7ea87a0218b5c9c9e771866690dea91b3c76edfd03
+size 33602915

nl_tasks/exps/run_ex29/trainer_state.json ADDED Viewed

	@@ -0,0 +1,505 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 1668,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02997601918465228,
+      "grad_norm": 0.26481908559799194,
+      "learning_rate": 0.000718562874251497,
+      "loss": 0.5019,
+      "step": 25
+    },
+    {
+      "epoch": 0.05995203836930456,
+      "grad_norm": 0.21658311784267426,
+      "learning_rate": 0.001467065868263473,
+      "loss": 0.3441,
+      "step": 50
+    },
+    {
+      "epoch": 0.08992805755395683,
+      "grad_norm": 0.4752499461174011,
+      "learning_rate": 0.002215568862275449,
+      "loss": 0.3298,
+      "step": 75
+    },
+    {
+      "epoch": 0.11990407673860912,
+      "grad_norm": 56.11571502685547,
+      "learning_rate": 0.002964071856287425,
+      "loss": 0.3863,
+      "step": 100
+    },
+    {
+      "epoch": 0.1498800959232614,
+      "grad_norm": 0.24988949298858643,
+      "learning_rate": 0.003712574850299401,
+      "loss": 0.3536,
+      "step": 125
+    },
+    {
+      "epoch": 0.17985611510791366,
+      "grad_norm": 0.23253102600574493,
+      "learning_rate": 0.004461077844311378,
+      "loss": 0.3441,
+      "step": 150
+    },
+    {
+      "epoch": 0.20983213429256595,
+      "grad_norm": 0.20779232680797577,
+      "learning_rate": 0.0049997316901074056,
+      "loss": 0.3304,
+      "step": 175
+    },
+    {
+      "epoch": 0.23980815347721823,
+      "grad_norm": 0.14326857030391693,
+      "learning_rate": 0.004994394866271345,
+      "loss": 0.3232,
+      "step": 200
+    },
+    {
+      "epoch": 0.2697841726618705,
+      "grad_norm": 0.1106962114572525,
+      "learning_rate": 0.004982230184254933,
+      "loss": 0.3079,
+      "step": 225
+    },
+    {
+      "epoch": 0.2997601918465228,
+      "grad_norm": 0.10388347506523132,
+      "learning_rate": 0.004963270942203842,
+      "loss": 0.2993,
+      "step": 250
+    },
+    {
+      "epoch": 0.32973621103117506,
+      "grad_norm": 0.10831473022699356,
+      "learning_rate": 0.004937569036879761,
+      "loss": 0.289,
+      "step": 275
+    },
+    {
+      "epoch": 0.3597122302158273,
+      "grad_norm": 0.10159999877214432,
+      "learning_rate": 0.004905194821604405,
+      "loss": 0.2792,
+      "step": 300
+    },
+    {
+      "epoch": 0.38968824940047964,
+      "grad_norm": 0.09414353221654892,
+      "learning_rate": 0.004866236913682755,
+      "loss": 0.2742,
+      "step": 325
+    },
+    {
+      "epoch": 0.4196642685851319,
+      "grad_norm": 0.08423851430416107,
+      "learning_rate": 0.004820801951832635,
+      "loss": 0.2746,
+      "step": 350
+    },
+    {
+      "epoch": 0.44964028776978415,
+      "grad_norm": 0.10220842808485031,
+      "learning_rate": 0.004769014304284648,
+      "loss": 0.2689,
+      "step": 375
+    },
+    {
+      "epoch": 0.47961630695443647,
+      "grad_norm": 0.07861992716789246,
+      "learning_rate": 0.0047110157283514545,
+      "loss": 0.2684,
+      "step": 400
+    },
+    {
+      "epoch": 0.5095923261390888,
+      "grad_norm": 0.09534072130918503,
+      "learning_rate": 0.004646964982398253,
+      "loss": 0.2748,
+      "step": 425
+    },
+    {
+      "epoch": 0.539568345323741,
+      "grad_norm": 0.06600063294172287,
+      "learning_rate": 0.0045770373912766265,
+      "loss": 0.2578,
+      "step": 450
+    },
+    {
+      "epoch": 0.5695443645083933,
+      "grad_norm": 0.08592315763235092,
+      "learning_rate": 0.004501424366411254,
+      "loss": 0.2567,
+      "step": 475
+    },
+    {
+      "epoch": 0.5995203836930456,
+      "grad_norm": 0.08367173373699188,
+      "learning_rate": 0.00442033288185318,
+      "loss": 0.2631,
+      "step": 500
+    },
+    {
+      "epoch": 0.6294964028776978,
+      "grad_norm": 0.08196345716714859,
+      "learning_rate": 0.004333984907733788,
+      "loss": 0.2505,
+      "step": 525
+    },
+    {
+      "epoch": 0.6594724220623501,
+      "grad_norm": 0.07102052867412567,
+      "learning_rate": 0.004242616802670323,
+      "loss": 0.2464,
+      "step": 550
+    },
+    {
+      "epoch": 0.6894484412470024,
+      "grad_norm": 0.07556530088186264,
+      "learning_rate": 0.00414647866678607,
+      "loss": 0.2542,
+      "step": 575
+    },
+    {
+      "epoch": 0.7194244604316546,
+      "grad_norm": 0.0706329271197319,
+      "learning_rate": 0.004045833657116195,
+      "loss": 0.2484,
+      "step": 600
+    },
+    {
+      "epoch": 0.749400479616307,
+      "grad_norm": 0.07402704656124115,
+      "learning_rate": 0.003940957267273149,
+      "loss": 0.2453,
+      "step": 625
+    },
+    {
+      "epoch": 0.7793764988009593,
+      "grad_norm": 0.06807030737400055,
+      "learning_rate": 0.0038321365733434,
+      "loss": 0.2431,
+      "step": 650
+    },
+    {
+      "epoch": 0.8093525179856115,
+      "grad_norm": 0.07543069124221802,
+      "learning_rate": 0.0037196694480796876,
+      "loss": 0.2497,
+      "step": 675
+    },
+    {
+      "epoch": 0.8393285371702638,
+      "grad_norm": 0.06862358748912811,
+      "learning_rate": 0.0036038637455397798,
+      "loss": 0.238,
+      "step": 700
+    },
+    {
+      "epoch": 0.8693045563549161,
+      "grad_norm": 0.09762419760227203,
+      "learning_rate": 0.0034850364584035876,
+      "loss": 0.2339,
+      "step": 725
+    },
+    {
+      "epoch": 0.8992805755395683,
+      "grad_norm": 0.0853116512298584,
+      "learning_rate": 0.0033635128502753193,
+      "loss": 0.241,
+      "step": 750
+    },
+    {
+      "epoch": 0.9292565947242206,
+      "grad_norm": 0.05775105208158493,
+      "learning_rate": 0.00323962556534579,
+      "loss": 0.2377,
+      "step": 775
+    },
+    {
+      "epoch": 0.9592326139088729,
+      "grad_norm": 0.06312242150306702,
+      "learning_rate": 0.003113713717851998,
+      "loss": 0.2371,
+      "step": 800
+    },
+    {
+      "epoch": 0.9892086330935251,
+      "grad_norm": 0.06418934464454651,
+      "learning_rate": 0.0029861219638263694,
+      "loss": 0.2313,
+      "step": 825
+    },
+    {
+      "epoch": 1.0191846522781776,
+      "grad_norm": 0.06555480509996414,
+      "learning_rate": 0.002857199557676555,
+      "loss": 0.2148,
+      "step": 850
+    },
+    {
+      "epoch": 1.0491606714628297,
+      "grad_norm": 0.061830855906009674,
+      "learning_rate": 0.00272729939617819,
+      "loss": 0.203,
+      "step": 875
+    },
+    {
+      "epoch": 1.079136690647482,
+      "grad_norm": 0.07122394442558289,
+      "learning_rate": 0.002596777052497456,
+      "loss": 0.2041,
+      "step": 900
+    },
+    {
+      "epoch": 1.1091127098321343,
+      "grad_norm": 0.06675304472446442,
+      "learning_rate": 0.002465989802887632,
+      "loss": 0.21,
+      "step": 925
+    },
+    {
+      "epoch": 1.1390887290167866,
+      "grad_norm": 0.06000453978776932,
+      "learning_rate": 0.0023352956487238063,
+      "loss": 0.2003,
+      "step": 950
+    },
+    {
+      "epoch": 1.169064748201439,
+      "grad_norm": 0.05904003605246544,
+      "learning_rate": 0.002205052336552725,
+      "loss": 0.2035,
+      "step": 975
+    },
+    {
+      "epoch": 1.1990407673860912,
+      "grad_norm": 0.07205251604318619,
+      "learning_rate": 0.0020756163788401825,
+      "loss": 0.205,
+      "step": 1000
+    },
+    {
+      "epoch": 1.2290167865707433,
+      "grad_norm": 0.06704974919557571,
+      "learning_rate": 0.0019473420780964405,
+      "loss": 0.2069,
+      "step": 1025
+    },
+    {
+      "epoch": 1.2589928057553956,
+      "grad_norm": 0.060501646250486374,
+      "learning_rate": 0.0018205805570509052,
+      "loss": 0.198,
+      "step": 1050
+    },
+    {
+      "epoch": 1.288968824940048,
+      "grad_norm": 0.05758596956729889,
+      "learning_rate": 0.0016956787975307614,
+      "loss": 0.1917,
+      "step": 1075
+    },
+    {
+      "epoch": 1.3189448441247003,
+      "grad_norm": 0.05682109296321869,
+      "learning_rate": 0.0015729786906744237,
+      "loss": 0.1914,
+      "step": 1100
+    },
+    {
+      "epoch": 1.3489208633093526,
+      "grad_norm": 0.06109858676791191,
+      "learning_rate": 0.0014528161010796171,
+      "loss": 0.196,
+      "step": 1125
+    },
+    {
+      "epoch": 1.3788968824940047,
+      "grad_norm": 0.06597461551427841,
+      "learning_rate": 0.0013355199474478,
+      "loss": 0.1897,
+      "step": 1150
+    },
+    {
+      "epoch": 1.4088729016786572,
+      "grad_norm": 0.060266848653554916,
+      "learning_rate": 0.0012214113022414447,
+      "loss": 0.1965,
+      "step": 1175
+    },
+    {
+      "epoch": 1.4388489208633093,
+      "grad_norm": 0.05543503537774086,
+      "learning_rate": 0.0011108025128186872,
+      "loss": 0.1816,
+      "step": 1200
+    },
+    {
+      "epoch": 1.4688249400479616,
+      "grad_norm": 0.06788609176874161,
+      "learning_rate": 0.001003996346451016,
+      "loss": 0.1887,
+      "step": 1225
+    },
+    {
+      "epoch": 1.498800959232614,
+      "grad_norm": 0.05910054221749306,
+      "learning_rate": 0.0009012851615643594,
+      "loss": 0.1916,
+      "step": 1250
+    },
+    {
+      "epoch": 1.5287769784172662,
+      "grad_norm": 0.06214448809623718,
+      "learning_rate": 0.0008029501074720933,
+      "loss": 0.1897,
+      "step": 1275
+    },
+    {
+      "epoch": 1.5587529976019185,
+      "grad_norm": 0.05667509138584137,
+      "learning_rate": 0.0007092603547905377,
+      "loss": 0.1823,
+      "step": 1300
+    },
+    {
+      "epoch": 1.5887290167865706,
+      "grad_norm": 0.0649266168475151,
+      "learning_rate": 0.000620472358643503,
+      "loss": 0.1877,
+      "step": 1325
+    },
+    {
+      "epoch": 1.6187050359712232,
+      "grad_norm": 0.054551344364881516,
+      "learning_rate": 0.000536829156672706,
+      "loss": 0.1821,
+      "step": 1350
+    },
+    {
+      "epoch": 1.6486810551558753,
+      "grad_norm": 0.060151200741529465,
+      "learning_rate": 0.00045855970377559676,
+      "loss": 0.188,
+      "step": 1375
+    },
+    {
+      "epoch": 1.6786570743405276,
+      "grad_norm": 0.05992837995290756,
+      "learning_rate": 0.00038587824539160486,
+      "loss": 0.185,
+      "step": 1400
+    },
+    {
+      "epoch": 1.70863309352518,
+      "grad_norm": 0.06002328544855118,
+      "learning_rate": 0.00031898373105229694,
+      "loss": 0.1823,
+      "step": 1425
+    },
+    {
+      "epoch": 1.738609112709832,
+      "grad_norm": 0.06145670637488365,
+      "learning_rate": 0.00025805926980072337,
+      "loss": 0.1877,
+      "step": 1450
+    },
+    {
+      "epoch": 1.7685851318944845,
+      "grad_norm": 0.051237791776657104,
+      "learning_rate": 0.00020327162897062267,
+      "loss": 0.1826,
+      "step": 1475
+    },
+    {
+      "epoch": 1.7985611510791366,
+      "grad_norm": 0.059376440942287445,
+      "learning_rate": 0.00015477077769746855,
+      "loss": 0.1837,
+      "step": 1500
+    },
+    {
+      "epoch": 1.828537170263789,
+      "grad_norm": 0.05060333386063576,
+      "learning_rate": 0.00011268947641089322,
+      "loss": 0.1786,
+      "step": 1525
+    },
+    {
+      "epoch": 1.8585131894484412,
+      "grad_norm": 0.06010892242193222,
+      "learning_rate": 7.714291343216635e-05,
+      "loss": 0.1854,
+      "step": 1550
+    },
+    {
+      "epoch": 1.8884892086330936,
+      "grad_norm": 0.06022082641720772,
+      "learning_rate": 4.822838967146054e-05,
+      "loss": 0.184,
+      "step": 1575
+    },
+    {
+      "epoch": 1.9184652278177459,
+      "grad_norm": 0.05504591017961502,
+      "learning_rate": 2.6025052287976248e-05,
+      "loss": 0.19,
+      "step": 1600
+    },
+    {
+      "epoch": 1.948441247002398,
+      "grad_norm": 0.0550151988863945,
+      "learning_rate": 1.0593678041975475e-05,
+      "loss": 0.1808,
+      "step": 1625
+    },
+    {
+      "epoch": 1.9784172661870505,
+      "grad_norm": 0.05710240826010704,
+      "learning_rate": 1.9765069317453923e-06,
+      "loss": 0.1844,
+      "step": 1650
+    },
+    {
+      "epoch": 2.0,
+      "step": 1668,
+      "total_flos": 1.62588235137024e+18,
+      "train_loss": 0.2374439179468498,
+      "train_runtime": 2227.387,
+      "train_samples_per_second": 35.917,
+      "train_steps_per_second": 0.749
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 1668,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 0,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.62588235137024e+18,
+  "train_batch_size": 48,
+  "trial_name": null,
+  "trial_params": null
+}

nl_tasks/exps/run_ex30/ft/adapter_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "T": 1.0,
+  "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+  "bias": "none",
+  "inference_mode": false,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "num_rotations": 1,
+  "peft_type": "ROTATION",
+  "r": 16,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "target_modules_to_skip": null,
+  "task_type": "CAUSAL_LM"
+}

nl_tasks/exps/run_ex30/ft/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<unk>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

nl_tasks/exps/run_ex30/ft/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

nl_tasks/exps/run_ex30/ft/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

nl_tasks/exps/run_ex30/ft/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "legacy": false,
+  "model_max_length": 512,
+  "pad_token": "<unk>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

nl_tasks/exps/run_ex30/ft2/adapter_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "T": 1.0,
+  "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+  "bias": "none",
+  "inference_mode": true,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "num_rotations": 1,
+  "peft_type": "ROTATION",
+  "r": 16,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "target_modules_to_skip": null,
+  "task_type": "CAUSAL_LM"
+}

nl_tasks/exps/run_ex30/ft2/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37abc3b7865aedfe138803a372ca6148e64aa7084b6ae523203860321f217145
+size 33602915

nl_tasks/exps/run_ex30/trainer_state.json ADDED Viewed

	@@ -0,0 +1,505 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 1668,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02997601918465228,
+      "grad_norm": 0.2027871459722519,
+      "learning_rate": 0.0001437125748502994,
+      "loss": 0.6037,
+      "step": 25
+    },
+    {
+      "epoch": 0.05995203836930456,
+      "grad_norm": 0.2463991791009903,
+      "learning_rate": 0.0002934131736526946,
+      "loss": 0.3853,
+      "step": 50
+    },
+    {
+      "epoch": 0.08992805755395683,
+      "grad_norm": 0.16277779638767242,
+      "learning_rate": 0.0004431137724550898,
+      "loss": 0.3367,
+      "step": 75
+    },
+    {
+      "epoch": 0.11990407673860912,
+      "grad_norm": 0.19866418838500977,
+      "learning_rate": 0.000592814371257485,
+      "loss": 0.3121,
+      "step": 100
+    },
+    {
+      "epoch": 0.1498800959232614,
+      "grad_norm": 0.1782834231853485,
+      "learning_rate": 0.0007425149700598802,
+      "loss": 0.3089,
+      "step": 125
+    },
+    {
+      "epoch": 0.17985611510791366,
+      "grad_norm": 0.19668474793434143,
+      "learning_rate": 0.0008922155688622756,
+      "loss": 0.2998,
+      "step": 150
+    },
+    {
+      "epoch": 0.20983213429256595,
+      "grad_norm": 0.20847776532173157,
+      "learning_rate": 0.000999946338021481,
+      "loss": 0.2978,
+      "step": 175
+    },
+    {
+      "epoch": 0.23980815347721823,
+      "grad_norm": 0.24161750078201294,
+      "learning_rate": 0.000998878973254269,
+      "loss": 0.304,
+      "step": 200
+    },
+    {
+      "epoch": 0.2697841726618705,
+      "grad_norm": 0.20520828664302826,
+      "learning_rate": 0.0009964460368509867,
+      "loss": 0.2982,
+      "step": 225
+    },
+    {
+      "epoch": 0.2997601918465228,
+      "grad_norm": 0.205276221036911,
+      "learning_rate": 0.0009926541884407686,
+      "loss": 0.2948,
+      "step": 250
+    },
+    {
+      "epoch": 0.32973621103117506,
+      "grad_norm": 0.1710120588541031,
+      "learning_rate": 0.000987513807375952,
+      "loss": 0.2866,
+      "step": 275
+    },
+    {
+      "epoch": 0.3597122302158273,
+      "grad_norm": 0.18962617218494415,
+      "learning_rate": 0.000981038964320881,
+      "loss": 0.2766,
+      "step": 300
+    },
+    {
+      "epoch": 0.38968824940047964,
+      "grad_norm": 0.19223880767822266,
+      "learning_rate": 0.0009732473827365509,
+      "loss": 0.2738,
+      "step": 325
+    },
+    {
+      "epoch": 0.4196642685851319,
+      "grad_norm": 0.17323505878448486,
+      "learning_rate": 0.0009641603903665269,
+      "loss": 0.2747,
+      "step": 350
+    },
+    {
+      "epoch": 0.44964028776978415,
+      "grad_norm": 0.2111186534166336,
+      "learning_rate": 0.0009538028608569297,
+      "loss": 0.2687,
+      "step": 375
+    },
+    {
+      "epoch": 0.47961630695443647,
+      "grad_norm": 0.16343681514263153,
+      "learning_rate": 0.0009422031456702909,
+      "loss": 0.2695,
+      "step": 400
+    },
+    {
+      "epoch": 0.5095923261390888,
+      "grad_norm": 0.166376531124115,
+      "learning_rate": 0.0009293929964796506,
+      "loss": 0.2764,
+      "step": 425
+    },
+    {
+      "epoch": 0.539568345323741,
+      "grad_norm": 0.15445727109909058,
+      "learning_rate": 0.0009154074782553252,
+      "loss": 0.2592,
+      "step": 450
+    },
+    {
+      "epoch": 0.5695443645083933,
+      "grad_norm": 0.19298841059207916,
+      "learning_rate": 0.0009002848732822509,
+      "loss": 0.2586,
+      "step": 475
+    },
+    {
+      "epoch": 0.5995203836930456,
+      "grad_norm": 0.15150733292102814,
+      "learning_rate": 0.0008840665763706359,
+      "loss": 0.2642,
+      "step": 500
+    },
+    {
+      "epoch": 0.6294964028776978,
+      "grad_norm": 0.1794758439064026,
+      "learning_rate": 0.0008667969815467577,
+      "loss": 0.2519,
+      "step": 525
+    },
+    {
+      "epoch": 0.6594724220623501,
+      "grad_norm": 0.17440396547317505,
+      "learning_rate": 0.0008485233605340645,
+      "loss": 0.2473,
+      "step": 550
+    },
+    {
+      "epoch": 0.6894484412470024,
+      "grad_norm": 0.1693456918001175,
+      "learning_rate": 0.000829295733357214,
+      "loss": 0.2554,
+      "step": 575
+    },
+    {
+      "epoch": 0.7194244604316546,
+      "grad_norm": 0.21234950423240662,
+      "learning_rate": 0.0008091667314232391,
+      "loss": 0.2509,
+      "step": 600
+    },
+    {
+      "epoch": 0.749400479616307,
+      "grad_norm": 0.16216659545898438,
+      "learning_rate": 0.0007881914534546298,
+      "loss": 0.2539,
+      "step": 625
+    },
+    {
+      "epoch": 0.7793764988009593,
+      "grad_norm": 0.1589777022600174,
+      "learning_rate": 0.00076642731466868,
+      "loss": 0.2478,
+      "step": 650
+    },
+    {
+      "epoch": 0.8093525179856115,
+      "grad_norm": 0.17090196907520294,
+      "learning_rate": 0.0007439338896159376,
+      "loss": 0.2526,
+      "step": 675
+    },
+    {
+      "epoch": 0.8393285371702638,
+      "grad_norm": 0.1454530507326126,
+      "learning_rate": 0.000720772749107956,
+      "loss": 0.2407,
+      "step": 700
+    },
+    {
+      "epoch": 0.8693045563549161,
+      "grad_norm": 0.1544404923915863,
+      "learning_rate": 0.0006970072916807175,
+      "loss": 0.2358,
+      "step": 725
+    },
+    {
+      "epoch": 0.8992805755395683,
+      "grad_norm": 0.15039412677288055,
+      "learning_rate": 0.0006727025700550639,
+      "loss": 0.2416,
+      "step": 750
+    },
+    {
+      "epoch": 0.9292565947242206,
+      "grad_norm": 0.13531458377838135,
+      "learning_rate": 0.000647925113069158,
+      "loss": 0.2396,
+      "step": 775
+    },
+    {
+      "epoch": 0.9592326139088729,
+      "grad_norm": 0.13535469770431519,
+      "learning_rate": 0.0006227427435703996,
+      "loss": 0.2382,
+      "step": 800
+    },
+    {
+      "epoch": 0.9892086330935251,
+      "grad_norm": 0.13635869324207306,
+      "learning_rate": 0.0005972243927652738,
+      "loss": 0.234,
+      "step": 825
+    },
+    {
+      "epoch": 1.0191846522781776,
+      "grad_norm": 0.16282866895198822,
+      "learning_rate": 0.0005714399115353111,
+      "loss": 0.2181,
+      "step": 850
+    },
+    {
+      "epoch": 1.0491606714628297,
+      "grad_norm": 0.15078669786453247,
+      "learning_rate": 0.0005454598792356381,
+      "loss": 0.2082,
+      "step": 875
+    },
+    {
+      "epoch": 1.079136690647482,
+      "grad_norm": 0.14040178060531616,
+      "learning_rate": 0.0005193554104994912,
+      "loss": 0.2083,
+      "step": 900
+    },
+    {
+      "epoch": 1.1091127098321343,
+      "grad_norm": 0.14513766765594482,
+      "learning_rate": 0.0004931979605775264,
+      "loss": 0.2137,
+      "step": 925
+    },
+    {
+      "epoch": 1.1390887290167866,
+      "grad_norm": 0.14192743599414825,
+      "learning_rate": 0.0004670591297447613,
+      "loss": 0.2039,
+      "step": 950
+    },
+    {
+      "epoch": 1.169064748201439,
+      "grad_norm": 0.14158278703689575,
+      "learning_rate": 0.00044101046731054495,
+      "loss": 0.2073,
+      "step": 975
+    },
+    {
+      "epoch": 1.1990407673860912,
+      "grad_norm": 0.15080343186855316,
+      "learning_rate": 0.0004151232757680365,
+      "loss": 0.2089,
+      "step": 1000
+    },
+    {
+      "epoch": 1.2290167865707433,
+      "grad_norm": 0.16032980382442474,
+      "learning_rate": 0.0003894684156192881,
+      "loss": 0.2097,
+      "step": 1025
+    },
+    {
+      "epoch": 1.2589928057553956,
+      "grad_norm": 0.14257696270942688,
+      "learning_rate": 0.00036411611141018104,
+      "loss": 0.2013,
+      "step": 1050
+    },
+    {
+      "epoch": 1.288968824940048,
+      "grad_norm": 0.1491222381591797,
+      "learning_rate": 0.00033913575950615226,
+      "loss": 0.1949,
+      "step": 1075
+    },
+    {
+      "epoch": 1.3189448441247003,
+      "grad_norm": 0.13092097640037537,
+      "learning_rate": 0.00031459573813488474,
+      "loss": 0.1946,
+      "step": 1100
+    },
+    {
+      "epoch": 1.3489208633093526,
+      "grad_norm": 0.13922549784183502,
+      "learning_rate": 0.0002905632202159234,
+      "loss": 0.1991,
+      "step": 1125
+    },
+    {
+      "epoch": 1.3788968824940047,
+      "grad_norm": 0.13861505687236786,
+      "learning_rate": 0.00026710398948956,
+      "loss": 0.1921,
+      "step": 1150
+    },
+    {
+      "epoch": 1.4088729016786572,
+      "grad_norm": 0.14462125301361084,
+      "learning_rate": 0.00024428226044828893,
+      "loss": 0.1992,
+      "step": 1175
+    },
+    {
+      "epoch": 1.4388489208633093,
+      "grad_norm": 0.13747504353523254,
+      "learning_rate": 0.00022216050256373743,
+      "loss": 0.1848,
+      "step": 1200
+    },
+    {
+      "epoch": 1.4688249400479616,
+      "grad_norm": 0.1536317616701126,
+      "learning_rate": 0.00020079926929020321,
+      "loss": 0.1914,
+      "step": 1225
+    },
+    {
+      "epoch": 1.498800959232614,
+      "grad_norm": 0.1415477842092514,
+      "learning_rate": 0.00018025703231287188,
+      "loss": 0.1937,
+      "step": 1250
+    },
+    {
+      "epoch": 1.5287769784172662,
+      "grad_norm": 0.14675097167491913,
+      "learning_rate": 0.00016059002149441864,
+      "loss": 0.1934,
+      "step": 1275
+    },
+    {
+      "epoch": 1.5587529976019185,
+      "grad_norm": 0.13264699280261993,
+      "learning_rate": 0.00014185207095810754,
+      "loss": 0.1848,
+      "step": 1300
+    },
+    {
+      "epoch": 1.5887290167865706,
+      "grad_norm": 0.15923435986042023,
+      "learning_rate": 0.00012409447172870058,
+      "loss": 0.1909,
+      "step": 1325
+    },
+    {
+      "epoch": 1.6187050359712232,
+      "grad_norm": 0.12699192762374878,
+      "learning_rate": 0.00010736583133454119,
+      "loss": 0.1853,
+      "step": 1350
+    },
+    {
+      "epoch": 1.6486810551558753,
+      "grad_norm": 0.14546607434749603,
+      "learning_rate": 9.171194075511934e-05,
+      "loss": 0.1919,
+      "step": 1375
+    },
+    {
+      "epoch": 1.6786570743405276,
+      "grad_norm": 0.14308422803878784,
+      "learning_rate": 7.717564907832098e-05,
+      "loss": 0.1886,
+      "step": 1400
+    },
+    {
+      "epoch": 1.70863309352518,
+      "grad_norm": 0.1330956369638443,
+      "learning_rate": 6.379674621045939e-05,
+      "loss": 0.1856,
+      "step": 1425
+    },
+    {
+      "epoch": 1.738609112709832,
+      "grad_norm": 0.14610905945301056,
+      "learning_rate": 5.1611853960144674e-05,
+      "loss": 0.1923,
+      "step": 1450
+    },
+    {
+      "epoch": 1.7685851318944845,
+      "grad_norm": 0.12671244144439697,
+      "learning_rate": 4.0654325794124535e-05,
+      "loss": 0.1853,
+      "step": 1475
+    },
+    {
+      "epoch": 1.7985611510791366,
+      "grad_norm": 0.14322331547737122,
+      "learning_rate": 3.095415553949371e-05,
+      "loss": 0.1868,
+      "step": 1500
+    },
+    {
+      "epoch": 1.828537170263789,
+      "grad_norm": 0.1249406635761261,
+      "learning_rate": 2.2537895282178645e-05,
+      "loss": 0.1829,
+      "step": 1525
+    },
+    {
+      "epoch": 1.8585131894484412,
+      "grad_norm": 0.1507108360528946,
+      "learning_rate": 1.542858268643327e-05,
+      "loss": 0.1888,
+      "step": 1550
+    },
+    {
+      "epoch": 1.8884892086330936,
+      "grad_norm": 0.13775067031383514,
+      "learning_rate": 9.645677934292108e-06,
+      "loss": 0.1881,
+      "step": 1575
+    },
+    {
+      "epoch": 1.9184652278177459,
+      "grad_norm": 0.13463319838047028,
+      "learning_rate": 5.205010457595249e-06,
+      "loss": 0.1937,
+      "step": 1600
+    },
+    {
+      "epoch": 1.948441247002398,
+      "grad_norm": 0.13453663885593414,
+      "learning_rate": 2.118735608395095e-06,
+      "loss": 0.1846,
+      "step": 1625
+    },
+    {
+      "epoch": 1.9784172661870505,
+      "grad_norm": 0.15357571840286255,
+      "learning_rate": 3.953013863490784e-07,
+      "loss": 0.1877,
+      "step": 1650
+    },
+    {
+      "epoch": 2.0,
+      "step": 1668,
+      "total_flos": 1.62588235137024e+18,
+      "train_loss": 0.23850378386980053,
+      "train_runtime": 2220.9851,
+      "train_samples_per_second": 36.02,
+      "train_steps_per_second": 0.751
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 1668,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 0,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.62588235137024e+18,
+  "train_batch_size": 48,
+  "trial_name": null,
+  "trial_params": null
+}

nl_tasks/exps/run_ex31/ft/adapter_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "T": 1.0,
+  "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+  "bias": "none",
+  "inference_mode": false,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "num_rotations": 1,
+  "peft_type": "ROTATION",
+  "r": 16,
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj"
+  ],
+  "target_modules_to_skip": null,
+  "task_type": "CAUSAL_LM"
+}

nl_tasks/exps/run_ex31/ft/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<unk>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

nl_tasks/exps/run_ex31/ft/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

nl_tasks/exps/run_ex31/ft/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

nl_tasks/exps/run_ex31/ft/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "legacy": false,
+  "model_max_length": 512,
+  "pad_token": "<unk>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

nl_tasks/exps/run_ex31/ft2/adapter_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "T": 1.0,
+  "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+  "bias": "none",
+  "inference_mode": true,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "num_rotations": 1,
+  "peft_type": "ROTATION",
+  "r": 16,
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj"
+  ],
+  "target_modules_to_skip": null,
+  "task_type": "CAUSAL_LM"
+}

nl_tasks/exps/run_ex31/ft2/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a02d042f5103673b43fce0e75a90ffc2bbd4c2dd3f028a6db285cf34c732bb6f
+size 33602915

nl_tasks/exps/run_ex31/trainer_state.json ADDED Viewed

	@@ -0,0 +1,743 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 2502,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02997601918465228,
+      "grad_norm": 0.237616166472435,
+      "learning_rate": 0.00047808764940239046,
+      "loss": 0.523,
+      "step": 25
+    },
+    {
+      "epoch": 0.05995203836930456,
+      "grad_norm": 0.20422297716140747,
+      "learning_rate": 0.0009760956175298805,
+      "loss": 0.3493,
+      "step": 50
+    },
+    {
+      "epoch": 0.08992805755395683,
+      "grad_norm": 0.21873657405376434,
+      "learning_rate": 0.0014741035856573707,
+      "loss": 0.3229,
+      "step": 75
+    },
+    {
+      "epoch": 0.11990407673860912,
+      "grad_norm": 0.24540852010250092,
+      "learning_rate": 0.0019721115537848603,
+      "loss": 0.314,
+      "step": 100
+    },
+    {
+      "epoch": 0.1498800959232614,
+      "grad_norm": 1.2509855031967163,
+      "learning_rate": 0.002470119521912351,
+      "loss": 0.3362,
+      "step": 125
+    },
+    {
+      "epoch": 0.17985611510791366,
+      "grad_norm": 0.3144875168800354,
+      "learning_rate": 0.002968127490039841,
+      "loss": 0.3425,
+      "step": 150
+    },
+    {
+      "epoch": 0.20983213429256595,
+      "grad_norm": 0.3264140486717224,
+      "learning_rate": 0.003466135458167331,
+      "loss": 0.3266,
+      "step": 175
+    },
+    {
+      "epoch": 0.23980815347721823,
+      "grad_norm": 0.18573451042175293,
+      "learning_rate": 0.0039641434262948205,
+      "loss": 0.3298,
+      "step": 200
+    },
+    {
+      "epoch": 0.2697841726618705,
+      "grad_norm": 0.18408645689487457,
+      "learning_rate": 0.004462151394422311,
+      "loss": 0.3179,
+      "step": 225
+    },
+    {
+      "epoch": 0.2997601918465228,
+      "grad_norm": 0.15508218109607697,
+      "learning_rate": 0.0049601593625498005,
+      "loss": 0.3138,
+      "step": 250
+    },
+    {
+      "epoch": 0.32973621103117506,
+      "grad_norm": 0.12099787592887878,
+      "learning_rate": 0.004998712114810764,
+      "loss": 0.3034,
+      "step": 275
+    },
+    {
+      "epoch": 0.3597122302158273,
+      "grad_norm": 0.15490184724330902,
+      "learning_rate": 0.004994392376862353,
+      "loss": 0.2906,
+      "step": 300
+    },
+    {
+      "epoch": 0.38968824940047964,
+      "grad_norm": 0.12329553812742233,
+      "learning_rate": 0.004987036305323271,
+      "loss": 0.283,
+      "step": 325
+    },
+    {
+      "epoch": 0.4196642685851319,
+      "grad_norm": 0.1184345930814743,
+      "learning_rate": 0.0049766528544732515,
+      "loss": 0.2827,
+      "step": 350
+    },
+    {
+      "epoch": 0.44964028776978415,
+      "grad_norm": 0.11834505200386047,
+      "learning_rate": 0.00496325466371133,
+      "loss": 0.2732,
+      "step": 375
+    },
+    {
+      "epoch": 0.47961630695443647,
+      "grad_norm": 0.07786522805690765,
+      "learning_rate": 0.004946858042170361,
+      "loss": 0.2735,
+      "step": 400
+    },
+    {
+      "epoch": 0.5095923261390888,
+      "grad_norm": 0.08665332198143005,
+      "learning_rate": 0.0049274829488645,
+      "loss": 0.2795,
+      "step": 425
+    },
+    {
+      "epoch": 0.539568345323741,
+      "grad_norm": 0.07928116619586945,
+      "learning_rate": 0.004905152968393817,
+      "loss": 0.2609,
+      "step": 450
+    },
+    {
+      "epoch": 0.5695443645083933,
+      "grad_norm": 0.12693190574645996,
+      "learning_rate": 0.004879895282235616,
+      "loss": 0.2617,
+      "step": 475
+    },
+    {
+      "epoch": 0.5995203836930456,
+      "grad_norm": 0.07392635196447372,
+      "learning_rate": 0.0048517406356574115,
+      "loss": 0.2672,
+      "step": 500
+    },
+    {
+      "epoch": 0.6294964028776978,
+      "grad_norm": 0.091416135430336,
+      "learning_rate": 0.0048207233002918164,
+      "loss": 0.256,
+      "step": 525
+    },
+    {
+      "epoch": 0.6594724220623501,
+      "grad_norm": 0.08377746492624283,
+      "learning_rate": 0.004786881032418933,
+      "loss": 0.2511,
+      "step": 550
+    },
+    {
+      "epoch": 0.6894484412470024,
+      "grad_norm": 0.06915393471717834,
+      "learning_rate": 0.004750255027006994,
+      "loss": 0.2589,
+      "step": 575
+    },
+    {
+      "epoch": 0.7194244604316546,
+      "grad_norm": 0.07373099029064178,
+      "learning_rate": 0.004710889867567222,
+      "loss": 0.2518,
+      "step": 600
+    },
+    {
+      "epoch": 0.749400479616307,
+      "grad_norm": 0.06648170202970505,
+      "learning_rate": 0.004668833471883931,
+      "loss": 0.249,
+      "step": 625
+    },
+    {
+      "epoch": 0.7793764988009593,
+      "grad_norm": 0.06580448895692825,
+      "learning_rate": 0.0046241370336859424,
+      "loss": 0.2481,
+      "step": 650
+    },
+    {
+      "epoch": 0.8093525179856115,
+      "grad_norm": 0.07079949229955673,
+      "learning_rate": 0.004576854960330311,
+      "loss": 0.2543,
+      "step": 675
+    },
+    {
+      "epoch": 0.8393285371702638,
+      "grad_norm": 0.06271594017744064,
+      "learning_rate": 0.004527044806574219,
+      "loss": 0.2422,
+      "step": 700
+    },
+    {
+      "epoch": 0.8693045563549161,
+      "grad_norm": 0.0618261955678463,
+      "learning_rate": 0.004474767204515652,
+      "loss": 0.2386,
+      "step": 725
+    },
+    {
+      "epoch": 0.8992805755395683,
+      "grad_norm": 0.06375081837177277,
+      "learning_rate": 0.004420085789788137,
+      "loss": 0.2445,
+      "step": 750
+    },
+    {
+      "epoch": 0.9292565947242206,
+      "grad_norm": 0.05429168790578842,
+      "learning_rate": 0.0043630671240993905,
+      "loss": 0.2422,
+      "step": 775
+    },
+    {
+      "epoch": 0.9592326139088729,
+      "grad_norm": 0.05972912162542343,
+      "learning_rate": 0.0043037806142081645,
+      "loss": 0.2418,
+      "step": 800
+    },
+    {
+      "epoch": 0.9892086330935251,
+      "grad_norm": 0.05979093909263611,
+      "learning_rate": 0.004242298427437903,
+      "loss": 0.2361,
+      "step": 825
+    },
+    {
+      "epoch": 1.0191846522781776,
+      "grad_norm": 0.07171288132667542,
+      "learning_rate": 0.00417869540383007,
+      "loss": 0.2221,
+      "step": 850
+    },
+    {
+      "epoch": 1.0491606714628297,
+      "grad_norm": 0.06082647666335106,
+      "learning_rate": 0.0041130489650440805,
+      "loss": 0.211,
+      "step": 875
+    },
+    {
+      "epoch": 1.079136690647482,
+      "grad_norm": 0.05869077146053314,
+      "learning_rate": 0.004045439020114715,
+      "loss": 0.2123,
+      "step": 900
+    },
+    {
+      "epoch": 1.1091127098321343,
+      "grad_norm": 0.059404339641332626,
+      "learning_rate": 0.003975947868181739,
+      "loss": 0.2193,
+      "step": 925
+    },
+    {
+      "epoch": 1.1390887290167866,
+      "grad_norm": 0.05453066527843475,
+      "learning_rate": 0.0039046600983101355,
+      "loss": 0.2105,
+      "step": 950
+    },
+    {
+      "epoch": 1.169064748201439,
+      "grad_norm": 0.05172204226255417,
+      "learning_rate": 0.0038316624865229088,
+      "loss": 0.2142,
+      "step": 975
+    },
+    {
+      "epoch": 1.1990407673860912,
+      "grad_norm": 0.060828547924757004,
+      "learning_rate": 0.003757043890171755,
+      "loss": 0.2165,
+      "step": 1000
+    },
+    {
+      "epoch": 1.2290167865707433,
+      "grad_norm": 0.0641385167837143,
+      "learning_rate": 0.0036808951397742378,
+      "loss": 0.218,
+      "step": 1025
+    },
+    {
+      "epoch": 1.2589928057553956,
+      "grad_norm": 0.05649897828698158,
+      "learning_rate": 0.0036033089284490745,
+      "loss": 0.2094,
+      "step": 1050
+    },
+    {
+      "epoch": 1.288968824940048,
+      "grad_norm": 0.05249471217393875,
+      "learning_rate": 0.003524379699084162,
+      "loss": 0.2028,
+      "step": 1075
+    },
+    {
+      "epoch": 1.3189448441247003,
+      "grad_norm": 0.05551101639866829,
+      "learning_rate": 0.0034442035293746655,
+      "loss": 0.2037,
+      "step": 1100
+    },
+    {
+      "epoch": 1.3489208633093526,
+      "grad_norm": 0.05503613501787186,
+      "learning_rate": 0.003362878014871117,
+      "loss": 0.208,
+      "step": 1125
+    },
+    {
+      "epoch": 1.3788968824940047,
+      "grad_norm": 0.05301510915160179,
+      "learning_rate": 0.0032805021501798805,
+      "loss": 0.2012,
+      "step": 1150
+    },
+    {
+      "epoch": 1.4088729016786572,
+      "grad_norm": 0.056410036981105804,
+      "learning_rate": 0.0031971762084606003,
+      "loss": 0.2095,
+      "step": 1175
+    },
+    {
+      "epoch": 1.4388489208633093,
+      "grad_norm": 0.05041206628084183,
+      "learning_rate": 0.0031130016193673137,
+      "loss": 0.1943,
+      "step": 1200
+    },
+    {
+      "epoch": 1.4688249400479616,
+      "grad_norm": 0.05369720607995987,
+      "learning_rate": 0.003028080845581801,
+      "loss": 0.2021,
+      "step": 1225
+    },
+    {
+      "epoch": 1.498800959232614,
+      "grad_norm": 0.05225532501935959,
+      "learning_rate": 0.00294251725808947,
+      "loss": 0.2058,
+      "step": 1250
+    },
+    {
+      "epoch": 1.5287769784172662,
+      "grad_norm": 0.052846185863018036,
+      "learning_rate": 0.0028564150103495963,
+      "loss": 0.204,
+      "step": 1275
+    },
+    {
+      "epoch": 1.5587529976019185,
+      "grad_norm": 0.051086682826280594,
+      "learning_rate": 0.002769878911513086,
+      "loss": 0.1961,
+      "step": 1300
+    },
+    {
+      "epoch": 1.5887290167865706,
+      "grad_norm": 0.055970534682273865,
+      "learning_rate": 0.0026830142988420866,
+      "loss": 0.2012,
+      "step": 1325
+    },
+    {
+      "epoch": 1.6187050359712232,
+      "grad_norm": 0.0505131334066391,
+      "learning_rate": 0.0025959269094867525,
+      "loss": 0.1975,
+      "step": 1350
+    },
+    {
+      "epoch": 1.6486810551558753,
+      "grad_norm": 0.050360601395368576,
+      "learning_rate": 0.0025087227517752355,
+      "loss": 0.2029,
+      "step": 1375
+    },
+    {
+      "epoch": 1.6786570743405276,
+      "grad_norm": 0.0552959144115448,
+      "learning_rate": 0.0024215079761735793,
+      "loss": 0.1986,
+      "step": 1400
+    },
+    {
+      "epoch": 1.70863309352518,
+      "grad_norm": 0.04918622598052025,
+      "learning_rate": 0.0023343887460726058,
+      "loss": 0.1966,
+      "step": 1425
+    },
+    {
+      "epoch": 1.738609112709832,
+      "grad_norm": 0.05339549854397774,
+      "learning_rate": 0.0022474711085590524,
+      "loss": 0.2022,
+      "step": 1450
+    },
+    {
+      "epoch": 1.7685851318944845,
+      "grad_norm": 0.044814374297857285,
+      "learning_rate": 0.002160860865328295,
+      "loss": 0.1953,
+      "step": 1475
+    },
+    {
+      "epoch": 1.7985611510791366,
+      "grad_norm": 0.05152401328086853,
+      "learning_rate": 0.002074663443895771,
+      "loss": 0.1974,
+      "step": 1500
+    },
+    {
+      "epoch": 1.828537170263789,
+      "grad_norm": 0.04496421292424202,
+      "learning_rate": 0.001988983769263877,
+      "loss": 0.1926,
+      "step": 1525
+    },
+    {
+      "epoch": 1.8585131894484412,
+      "grad_norm": 0.0547536201775074,
+      "learning_rate": 0.001903926136200566,
+      "loss": 0.1992,
+      "step": 1550
+    },
+    {
+      "epoch": 1.8884892086330936,
+      "grad_norm": 0.04525403305888176,
+      "learning_rate": 0.0018195940822850927,
+      "loss": 0.1976,
+      "step": 1575
+    },
+    {
+      "epoch": 1.9184652278177459,
+      "grad_norm": 0.045630406588315964,
+      "learning_rate": 0.0017360902618754664,
+      "loss": 0.2022,
+      "step": 1600
+    },
+    {
+      "epoch": 1.948441247002398,
+      "grad_norm": 0.046241626143455505,
+      "learning_rate": 0.0016535163211510203,
+      "loss": 0.1926,
+      "step": 1625
+    },
+    {
+      "epoch": 1.9784172661870505,
+      "grad_norm": 0.04715004190802574,
+      "learning_rate": 0.0015719727743821854,
+      "loss": 0.1947,
+      "step": 1650
+    },
+    {
+      "epoch": 2.0083932853717026,
+      "grad_norm": 0.046405646950006485,
+      "learning_rate": 0.0014915588815781152,
+      "loss": 0.1849,
+      "step": 1675
+    },
+    {
+      "epoch": 2.038369304556355,
+      "grad_norm": 0.04901168495416641,
+      "learning_rate": 0.0014123725276610638,
+      "loss": 0.1587,
+      "step": 1700
+    },
+    {
+      "epoch": 2.068345323741007,
+      "grad_norm": 0.05803445354104042,
+      "learning_rate": 0.0013345101033146085,
+      "loss": 0.1605,
+      "step": 1725
+    },
+    {
+      "epoch": 2.0983213429256593,
+      "grad_norm": 0.05447980388998985,
+      "learning_rate": 0.0012580663876507647,
+      "loss": 0.1601,
+      "step": 1750
+    },
+    {
+      "epoch": 2.128297362110312,
+      "grad_norm": 0.05418948829174042,
+      "learning_rate": 0.0011831344328387986,
+      "loss": 0.1577,
+      "step": 1775
+    },
+    {
+      "epoch": 2.158273381294964,
+      "grad_norm": 0.055272314697504044,
+      "learning_rate": 0.0011098054508361854,
+      "loss": 0.1596,
+      "step": 1800
+    },
+    {
+      "epoch": 2.1882494004796165,
+      "grad_norm": 0.04978896677494049,
+      "learning_rate": 0.0010381687023596014,
+      "loss": 0.1634,
+      "step": 1825
+    },
+    {
+      "epoch": 2.2182254196642686,
+      "grad_norm": 0.052053723484277725,
+      "learning_rate": 0.0009683113882310735,
+      "loss": 0.1565,
+      "step": 1850
+    },
+    {
+      "epoch": 2.2482014388489207,
+      "grad_norm": 0.04874909296631813,
+      "learning_rate": 0.0009003185432315822,
+      "loss": 0.1597,
+      "step": 1875
+    },
+    {
+      "epoch": 2.278177458033573,
+      "grad_norm": 0.04999493435025215,
+      "learning_rate": 0.0008342729325912946,
+      "loss": 0.1554,
+      "step": 1900
+    },
+    {
+      "epoch": 2.3081534772182253,
+      "grad_norm": 0.051304448395967484,
+      "learning_rate": 0.0007702549512424437,
+      "loss": 0.1617,
+      "step": 1925
+    },
+    {
+      "epoch": 2.338129496402878,
+      "grad_norm": 0.04773577302694321,
+      "learning_rate": 0.0007083425259574896,
+      "loss": 0.1563,
+      "step": 1950
+    },
+    {
+      "epoch": 2.36810551558753,
+      "grad_norm": 0.04622693732380867,
+      "learning_rate": 0.0006486110204916776,
+      "loss": 0.1582,
+      "step": 1975
+    },
+    {
+      "epoch": 2.3980815347721824,
+      "grad_norm": 0.05061614140868187,
+      "learning_rate": 0.000591133143845462,
+      "loss": 0.1544,
+      "step": 2000
+    },
+    {
+      "epoch": 2.4280575539568345,
+      "grad_norm": 0.05210672691464424,
+      "learning_rate": 0.0005359788617584769,
+      "loss": 0.1575,
+      "step": 2025
+    },
+    {
+      "epoch": 2.4580335731414866,
+      "grad_norm": 0.049017682671546936,
+      "learning_rate": 0.00048321531154276706,
+      "loss": 0.1578,
+      "step": 2050
+    },
+    {
+      "epoch": 2.488009592326139,
+      "grad_norm": 0.061639346182346344,
+      "learning_rate": 0.0004329067203589709,
+      "loss": 0.1544,
+      "step": 2075
+    },
+    {
+      "epoch": 2.5179856115107913,
+      "grad_norm": 0.05240131914615631,
+      "learning_rate": 0.00038511432703492083,
+      "loss": 0.1568,
+      "step": 2100
+    },
+    {
+      "epoch": 2.547961630695444,
+      "grad_norm": 0.05182984471321106,
+      "learning_rate": 0.0003398963075218309,
+      "loss": 0.1567,
+      "step": 2125
+    },
+    {
+      "epoch": 2.577937649880096,
+      "grad_norm": 0.04628787934780121,
+      "learning_rate": 0.0002973077040788205,
+      "loss": 0.1528,
+      "step": 2150
+    },
+    {
+      "epoch": 2.6079136690647484,
+      "grad_norm": 0.056303806602954865,
+      "learning_rate": 0.00025740035827196165,
+      "loss": 0.1515,
+      "step": 2175
+    },
+    {
+      "epoch": 2.6378896882494005,
+      "grad_norm": 0.05215095728635788,
+      "learning_rate": 0.00022022284786941544,
+      "loss": 0.1527,
+      "step": 2200
+    },
+    {
+      "epoch": 2.6678657074340526,
+      "grad_norm": 0.04626120626926422,
+      "learning_rate": 0.00018582042770947467,
+      "loss": 0.1541,
+      "step": 2225
+    },
+    {
+      "epoch": 2.697841726618705,
+      "grad_norm": 0.04964889958500862,
+      "learning_rate": 0.0001542349746134855,
+      "loss": 0.1574,
+      "step": 2250
+    },
+    {
+      "epoch": 2.7278177458033572,
+      "grad_norm": 0.05072702094912529,
+      "learning_rate": 0.00012550493641070665,
+      "loss": 0.1609,
+      "step": 2275
+    },
+    {
+      "epoch": 2.7577937649880093,
+      "grad_norm": 0.048793647438287735,
+      "learning_rate": 9.966528513716072e-05,
+      "loss": 0.151,
+      "step": 2300
+    },
+    {
+      "epoch": 2.787769784172662,
+      "grad_norm": 0.04410620033740997,
+      "learning_rate": 7.674747446543756e-05,
+      "loss": 0.149,
+      "step": 2325
+    },
+    {
+      "epoch": 2.8177458033573144,
+      "grad_norm": 0.04964963719248772,
+      "learning_rate": 5.677940141727761e-05,
+      "loss": 0.1524,
+      "step": 2350
+    },
+    {
+      "epoch": 2.8477218225419665,
+      "grad_norm": 0.052818212658166885,
+      "learning_rate": 3.9785372405537756e-05,
+      "loss": 0.149,
+      "step": 2375
+    },
+    {
+      "epoch": 2.8776978417266186,
+      "grad_norm": 0.07118818908929825,
+      "learning_rate": 2.5786073646871523e-05,
+      "loss": 0.1528,
+      "step": 2400
+    },
+    {
+      "epoch": 2.907673860911271,
+      "grad_norm": 0.04659281671047211,
+      "learning_rate": 1.479854598114977e-05,
+      "loss": 0.1544,
+      "step": 2425
+    },
+    {
+      "epoch": 2.937649880095923,
+      "grad_norm": 0.04493272304534912,
+      "learning_rate": 6.836164128259103e-06,
+      "loss": 0.1504,
+      "step": 2450
+    },
+    {
+      "epoch": 2.9676258992805753,
+      "grad_norm": 0.06299802660942078,
+      "learning_rate": 1.908620407542472e-06,
+      "loss": 0.1501,
+      "step": 2475
+    },
+    {
+      "epoch": 2.997601918465228,
+      "grad_norm": 0.053529493510723114,
+      "learning_rate": 2.191293968722974e-08,
+      "loss": 0.152,
+      "step": 2500
+    },
+    {
+      "epoch": 3.0,
+      "step": 2502,
+      "total_flos": 2.43882352705536e+18,
+      "train_loss": 0.215125925130219,
+      "train_runtime": 3297.3883,
+      "train_samples_per_second": 36.392,
+      "train_steps_per_second": 0.759
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 2502,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 0,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.43882352705536e+18,
+  "train_batch_size": 48,
+  "trial_name": null,
+  "trial_params": null
+}

nl_tasks/exps/run_ex32/ft/adapter_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "T": 1.0,
+  "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+  "bias": "none",
+  "inference_mode": false,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "num_rotations": 1,
+  "peft_type": "ROTATION",
+  "r": 16,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "target_modules_to_skip": null,
+  "task_type": "CAUSAL_LM"
+}

nl_tasks/exps/run_ex32/ft/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<unk>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

nl_tasks/exps/run_ex32/ft/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

nl_tasks/exps/run_ex32/ft/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

nl_tasks/exps/run_ex32/ft/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "legacy": false,
+  "model_max_length": 512,
+  "pad_token": "<unk>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

nl_tasks/exps/run_ex32/ft2/adapter_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "T": 1.0,
+  "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+  "bias": "none",
+  "inference_mode": true,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "num_rotations": 1,
+  "peft_type": "ROTATION",
+  "r": 16,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "target_modules_to_skip": null,
+  "task_type": "CAUSAL_LM"
+}

nl_tasks/exps/run_ex32/ft2/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e8b473fff55419f14a36da83dfd8d8f05944a51103982531702ea9c3fdd5c0c
+size 33602915

nl_tasks/exps/run_ex32/trainer_state.json ADDED Viewed

	@@ -0,0 +1,743 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 2502,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02997601918465228,
+      "grad_norm": 0.22439797222614288,
+      "learning_rate": 9.56175298804781e-05,
+      "loss": 0.634,
+      "step": 25
+    },
+    {
+      "epoch": 0.05995203836930456,
+      "grad_norm": 0.21105553209781647,
+      "learning_rate": 0.0001952191235059761,
+      "loss": 0.4028,
+      "step": 50
+    },
+    {
+      "epoch": 0.08992805755395683,
+      "grad_norm": 0.18460212647914886,
+      "learning_rate": 0.0002948207171314741,
+      "loss": 0.346,
+      "step": 75
+    },
+    {
+      "epoch": 0.11990407673860912,
+      "grad_norm": 0.19811777770519257,
+      "learning_rate": 0.0003944223107569721,
+      "loss": 0.3192,
+      "step": 100
+    },
+    {
+      "epoch": 0.1498800959232614,
+      "grad_norm": 0.18307138979434967,
+      "learning_rate": 0.0004940239043824702,
+      "loss": 0.3131,
+      "step": 125
+    },
+    {
+      "epoch": 0.17985611510791366,
+      "grad_norm": 0.19494092464447021,
+      "learning_rate": 0.0005936254980079682,
+      "loss": 0.3019,
+      "step": 150
+    },
+    {
+      "epoch": 0.20983213429256595,
+      "grad_norm": 0.19441217184066772,
+      "learning_rate": 0.0006932270916334662,
+      "loss": 0.2973,
+      "step": 175
+    },
+    {
+      "epoch": 0.23980815347721823,
+      "grad_norm": 0.1927807629108429,
+      "learning_rate": 0.0007928286852589641,
+      "loss": 0.3038,
+      "step": 200
+    },
+    {
+      "epoch": 0.2697841726618705,
+      "grad_norm": 0.17632770538330078,
+      "learning_rate": 0.0008924302788844621,
+      "loss": 0.2981,
+      "step": 225
+    },
+    {
+      "epoch": 0.2997601918465228,
+      "grad_norm": 0.19236312806606293,
+      "learning_rate": 0.00099203187250996,
+      "loss": 0.2966,
+      "step": 250
+    },
+    {
+      "epoch": 0.32973621103117506,
+      "grad_norm": 0.21083885431289673,
+      "learning_rate": 0.0009997424229621528,
+      "loss": 0.2933,
+      "step": 275
+    },
+    {
+      "epoch": 0.3597122302158273,
+      "grad_norm": 0.21011164784431458,
+      "learning_rate": 0.0009988784753724707,
+      "loss": 0.343,
+      "step": 300
+    },
+    {
+      "epoch": 0.38968824940047964,
+      "grad_norm": 0.2327512800693512,
+      "learning_rate": 0.0009974072610646543,
+      "loss": 0.2838,
+      "step": 325
+    },
+    {
+      "epoch": 0.4196642685851319,
+      "grad_norm": 0.25379207730293274,
+      "learning_rate": 0.0009953305708946503,
+      "loss": 0.2835,
+      "step": 350
+    },
+    {
+      "epoch": 0.44964028776978415,
+      "grad_norm": 0.21607662737369537,
+      "learning_rate": 0.000992650932742266,
+      "loss": 0.2739,
+      "step": 375
+    },
+    {
+      "epoch": 0.47961630695443647,
+      "grad_norm": 0.17558318376541138,
+      "learning_rate": 0.0009893716084340722,
+      "loss": 0.2751,
+      "step": 400
+    },
+    {
+      "epoch": 0.5095923261390888,
+      "grad_norm": 0.17286434769630432,
+      "learning_rate": 0.0009854965897729,
+      "loss": 0.2812,
+      "step": 425
+    },
+    {
+      "epoch": 0.539568345323741,
+      "grad_norm": 0.15521785616874695,
+      "learning_rate": 0.0009810305936787634,
+      "loss": 0.2631,
+      "step": 450
+    },
+    {
+      "epoch": 0.5695443645083933,
+      "grad_norm": 0.19885142147541046,
+      "learning_rate": 0.0009759790564471232,
+      "loss": 0.2634,
+      "step": 475
+    },
+    {
+      "epoch": 0.5995203836930456,
+      "grad_norm": 0.16864049434661865,
+      "learning_rate": 0.0009703481271314822,
+      "loss": 0.2686,
+      "step": 500
+    },
+    {
+      "epoch": 0.6294964028776978,
+      "grad_norm": 0.18147310614585876,
+      "learning_rate": 0.0009641446600583632,
+      "loss": 0.2565,
+      "step": 525
+    },
+    {
+      "epoch": 0.6594724220623501,
+      "grad_norm": 0.1791866272687912,
+      "learning_rate": 0.0009573762064837866,
+      "loss": 0.2525,
+      "step": 550
+    },
+    {
+      "epoch": 0.6894484412470024,
+      "grad_norm": 0.16333305835723877,
+      "learning_rate": 0.0009500510054013988,
+      "loss": 0.2599,
+      "step": 575
+    },
+    {
+      "epoch": 0.7194244604316546,
+      "grad_norm": 0.15634645521640778,
+      "learning_rate": 0.0009421779735134444,
+      "loss": 0.2551,
+      "step": 600
+    },
+    {
+      "epoch": 0.749400479616307,
+      "grad_norm": 0.16058474779129028,
+      "learning_rate": 0.0009337666943767861,
+      "loss": 0.2518,
+      "step": 625
+    },
+    {
+      "epoch": 0.7793764988009593,
+      "grad_norm": 0.15423771739006042,
+      "learning_rate": 0.0009248274067371884,
+      "loss": 0.2507,
+      "step": 650
+    },
+    {
+      "epoch": 0.8093525179856115,
+      "grad_norm": 0.16649393737316132,
+      "learning_rate": 0.0009153709920660622,
+      "loss": 0.256,
+      "step": 675
+    },
+    {
+      "epoch": 0.8393285371702638,
+      "grad_norm": 0.14274722337722778,
+      "learning_rate": 0.0009054089613148438,
+      "loss": 0.2444,
+      "step": 700
+    },
+    {
+      "epoch": 0.8693045563549161,
+      "grad_norm": 0.14566783607006073,
+      "learning_rate": 0.0008949534409031304,
+      "loss": 0.2406,
+      "step": 725
+    },
+    {
+      "epoch": 0.8992805755395683,
+      "grad_norm": 0.16103540360927582,
+      "learning_rate": 0.0008840171579576273,
+      "loss": 0.2476,
+      "step": 750
+    },
+    {
+      "epoch": 0.9292565947242206,
+      "grad_norm": 0.13278396427631378,
+      "learning_rate": 0.0008726134248198781,
+      "loss": 0.2444,
+      "step": 775
+    },
+    {
+      "epoch": 0.9592326139088729,
+      "grad_norm": 0.13953395187854767,
+      "learning_rate": 0.000860756122841633,
+      "loss": 0.2429,
+      "step": 800
+    },
+    {
+      "epoch": 0.9892086330935251,
+      "grad_norm": 0.15767574310302734,
+      "learning_rate": 0.0008484596854875805,
+      "loss": 0.2382,
+      "step": 825
+    },
+    {
+      "epoch": 1.0191846522781776,
+      "grad_norm": 0.17023757100105286,
+      "learning_rate": 0.0008357390807660139,
+      "loss": 0.2247,
+      "step": 850
+    },
+    {
+      "epoch": 1.0491606714628297,
+      "grad_norm": 0.1432039439678192,
+      "learning_rate": 0.0008226097930088161,
+      "loss": 0.2148,
+      "step": 875
+    },
+    {
+      "epoch": 1.079136690647482,
+      "grad_norm": 0.14797061681747437,
+      "learning_rate": 0.0008090878040229431,
+      "loss": 0.2162,
+      "step": 900
+    },
+    {
+      "epoch": 1.1091127098321343,
+      "grad_norm": 0.15065963566303253,
+      "learning_rate": 0.0007951895736363477,
+      "loss": 0.2234,
+      "step": 925
+    },
+    {
+      "epoch": 1.1390887290167866,
+      "grad_norm": 0.13475900888442993,
+      "learning_rate": 0.0007809320196620271,
+      "loss": 0.2137,
+      "step": 950
+    },
+    {
+      "epoch": 1.169064748201439,
+      "grad_norm": 0.1358369141817093,
+      "learning_rate": 0.0007663324973045817,
+      "loss": 0.2167,
+      "step": 975
+    },
+    {
+      "epoch": 1.1990407673860912,
+      "grad_norm": 0.15405559539794922,
+      "learning_rate": 0.000751408778034351,
+      "loss": 0.2193,
+      "step": 1000
+    },
+    {
+      "epoch": 1.2290167865707433,
+      "grad_norm": 0.15572060644626617,
+      "learning_rate": 0.0007361790279548476,
+      "loss": 0.2207,
+      "step": 1025
+    },
+    {
+      "epoch": 1.2589928057553956,
+      "grad_norm": 0.14885199069976807,
+      "learning_rate": 0.0007206617856898149,
+      "loss": 0.2122,
+      "step": 1050
+    },
+    {
+      "epoch": 1.288968824940048,
+      "grad_norm": 0.13450340926647186,
+      "learning_rate": 0.0007048759398168324,
+      "loss": 0.2053,
+      "step": 1075
+    },
+    {
+      "epoch": 1.3189448441247003,
+      "grad_norm": 0.12750637531280518,
+      "learning_rate": 0.0006888407058749331,
+      "loss": 0.2059,
+      "step": 1100
+    },
+    {
+      "epoch": 1.3489208633093526,
+      "grad_norm": 0.1344868540763855,
+      "learning_rate": 0.0006725756029742234,
+      "loss": 0.2108,
+      "step": 1125
+    },
+    {
+      "epoch": 1.3788968824940047,
+      "grad_norm": 0.12938323616981506,
+      "learning_rate": 0.0006561004300359761,
+      "loss": 0.2038,
+      "step": 1150
+    },
+    {
+      "epoch": 1.4088729016786572,
+      "grad_norm": 0.13532765209674835,
+      "learning_rate": 0.00063943524169212,
+      "loss": 0.2107,
+      "step": 1175
+    },
+    {
+      "epoch": 1.4388489208633093,
+      "grad_norm": 0.1328439861536026,
+      "learning_rate": 0.0006226003238734627,
+      "loss": 0.1973,
+      "step": 1200
+    },
+    {
+      "epoch": 1.4688249400479616,
+      "grad_norm": 0.14724420011043549,
+      "learning_rate": 0.0006056161691163601,
+      "loss": 0.2038,
+      "step": 1225
+    },
+    {
+      "epoch": 1.498800959232614,
+      "grad_norm": 0.1372281312942505,
+      "learning_rate": 0.000588503451617894,
+      "loss": 0.2063,
+      "step": 1250
+    },
+    {
+      "epoch": 1.5287769784172662,
+      "grad_norm": 0.13836322724819183,
+      "learning_rate": 0.0005712830020699192,
+      "loss": 0.2056,
+      "step": 1275
+    },
+    {
+      "epoch": 1.5587529976019185,
+      "grad_norm": 0.12976917624473572,
+      "learning_rate": 0.0005539757823026172,
+      "loss": 0.1973,
+      "step": 1300
+    },
+    {
+      "epoch": 1.5887290167865706,
+      "grad_norm": 0.1546931117773056,
+      "learning_rate": 0.0005366028597684172,
+      "loss": 0.2028,
+      "step": 1325
+    },
+    {
+      "epoch": 1.6187050359712232,
+      "grad_norm": 0.12072357535362244,
+      "learning_rate": 0.0005191853818973506,
+      "loss": 0.1979,
+      "step": 1350
+    },
+    {
+      "epoch": 1.6486810551558753,
+      "grad_norm": 0.13212819397449493,
+      "learning_rate": 0.0005017445503550471,
+      "loss": 0.2051,
+      "step": 1375
+    },
+    {
+      "epoch": 1.6786570743405276,
+      "grad_norm": 0.13972344994544983,
+      "learning_rate": 0.00048430159523471587,
+      "loss": 0.1999,
+      "step": 1400
+    },
+    {
+      "epoch": 1.70863309352518,
+      "grad_norm": 0.13032999634742737,
+      "learning_rate": 0.00046687774921452113,
+      "loss": 0.1975,
+      "step": 1425
+    },
+    {
+      "epoch": 1.738609112709832,
+      "grad_norm": 0.1394297480583191,
+      "learning_rate": 0.00044949422171181047,
+      "loss": 0.2031,
+      "step": 1450
+    },
+    {
+      "epoch": 1.7685851318944845,
+      "grad_norm": 0.12275266647338867,
+      "learning_rate": 0.0004321721730656589,
+      "loss": 0.1966,
+      "step": 1475
+    },
+    {
+      "epoch": 1.7985611510791366,
+      "grad_norm": 0.12649331986904144,
+      "learning_rate": 0.0004149326887791541,
+      "loss": 0.1983,
+      "step": 1500
+    },
+    {
+      "epoch": 1.828537170263789,
+      "grad_norm": 0.11251500993967056,
+      "learning_rate": 0.0003977967538527754,
+      "loss": 0.1932,
+      "step": 1525
+    },
+    {
+      "epoch": 1.8585131894484412,
+      "grad_norm": 0.13119769096374512,
+      "learning_rate": 0.0003807852272401132,
+      "loss": 0.1995,
+      "step": 1550
+    },
+    {
+      "epoch": 1.8884892086330936,
+      "grad_norm": 0.12021032720804214,
+      "learning_rate": 0.0003639188164570185,
+      "loss": 0.1985,
+      "step": 1575
+    },
+    {
+      "epoch": 1.9184652278177459,
+      "grad_norm": 0.12251219153404236,
+      "learning_rate": 0.0003472180523750933,
+      "loss": 0.2041,
+      "step": 1600
+    },
+    {
+      "epoch": 1.948441247002398,
+      "grad_norm": 0.11931514739990234,
+      "learning_rate": 0.0003307032642302041,
+      "loss": 0.1933,
+      "step": 1625
+    },
+    {
+      "epoch": 1.9784172661870505,
+      "grad_norm": 0.12238750606775284,
+      "learning_rate": 0.0003143945548764371,
+      "loss": 0.195,
+      "step": 1650
+    },
+    {
+      "epoch": 2.0083932853717026,
+      "grad_norm": 0.13976894319057465,
+      "learning_rate": 0.00029831177631562306,
+      "loss": 0.1858,
+      "step": 1675
+    },
+    {
+      "epoch": 2.038369304556355,
+      "grad_norm": 0.12677723169326782,
+      "learning_rate": 0.0002824745055322128,
+      "loss": 0.1608,
+      "step": 1700
+    },
+    {
+      "epoch": 2.068345323741007,
+      "grad_norm": 0.13730570673942566,
+      "learning_rate": 0.0002669020206629217,
+      "loss": 0.1632,
+      "step": 1725
+    },
+    {
+      "epoch": 2.0983213429256593,
+      "grad_norm": 0.1422010064125061,
+      "learning_rate": 0.00025161327753015297,
+      "loss": 0.1619,
+      "step": 1750
+    },
+    {
+      "epoch": 2.128297362110312,
+      "grad_norm": 0.14339914917945862,
+      "learning_rate": 0.00023662688656775972,
+      "loss": 0.1607,
+      "step": 1775
+    },
+    {
+      "epoch": 2.158273381294964,
+      "grad_norm": 0.14340265095233917,
+      "learning_rate": 0.00022196109016723708,
+      "loss": 0.1611,
+      "step": 1800
+    },
+    {
+      "epoch": 2.1882494004796165,
+      "grad_norm": 0.14029954373836517,
+      "learning_rate": 0.0002076337404719203,
+      "loss": 0.1657,
+      "step": 1825
+    },
+    {
+      "epoch": 2.2182254196642686,
+      "grad_norm": 0.13461889326572418,
+      "learning_rate": 0.00019366227764621468,
+      "loss": 0.1584,
+      "step": 1850
+    },
+    {
+      "epoch": 2.2482014388489207,
+      "grad_norm": 0.1285167932510376,
+      "learning_rate": 0.00018006370864631643,
+      "loss": 0.1622,
+      "step": 1875
+    },
+    {
+      "epoch": 2.278177458033573,
+      "grad_norm": 0.13294167816638947,
+      "learning_rate": 0.0001668545865182589,
+      "loss": 0.1577,
+      "step": 1900
+    },
+    {
+      "epoch": 2.3081534772182253,
+      "grad_norm": 0.1409018188714981,
+      "learning_rate": 0.00015405099024848874,
+      "loss": 0.1637,
+      "step": 1925
+    },
+    {
+      "epoch": 2.338129496402878,
+      "grad_norm": 0.13364772498607635,
+      "learning_rate": 0.00014166850519149794,
+      "loss": 0.1579,
+      "step": 1950
+    },
+    {
+      "epoch": 2.36810551558753,
+      "grad_norm": 0.12694032490253448,
+      "learning_rate": 0.0001297222040983355,
+      "loss": 0.1597,
+      "step": 1975
+    },
+    {
+      "epoch": 2.3980815347721824,
+      "grad_norm": 0.12603726983070374,
+      "learning_rate": 0.0001182266287690924,
+      "loss": 0.1569,
+      "step": 2000
+    },
+    {
+      "epoch": 2.4280575539568345,
+      "grad_norm": 0.12928339838981628,
+      "learning_rate": 0.00010719577235169537,
+      "loss": 0.1592,
+      "step": 2025
+    },
+    {
+      "epoch": 2.4580335731414866,
+      "grad_norm": 0.12346290051937103,
+      "learning_rate": 9.664306230855341e-05,
+      "loss": 0.1596,
+      "step": 2050
+    },
+    {
+      "epoch": 2.488009592326139,
+      "grad_norm": 0.1295640915632248,
+      "learning_rate": 8.658134407179418e-05,
+      "loss": 0.1561,
+      "step": 2075
+    },
+    {
+      "epoch": 2.5179856115107913,
+      "grad_norm": 0.13177761435508728,
+      "learning_rate": 7.702286540698416e-05,
+      "loss": 0.1597,
+      "step": 2100
+    },
+    {
+      "epoch": 2.547961630695444,
+      "grad_norm": 0.13077791035175323,
+      "learning_rate": 6.797926150436617e-05,
+      "loss": 0.1586,
+      "step": 2125
+    },
+    {
+      "epoch": 2.577937649880096,
+      "grad_norm": 0.117102712392807,
+      "learning_rate": 5.9461540815764105e-05,
+      "loss": 0.1551,
+      "step": 2150
+    },
+    {
+      "epoch": 2.6079136690647484,
+      "grad_norm": 0.14442621171474457,
+      "learning_rate": 5.1480071654392335e-05,
+      "loss": 0.1543,
+      "step": 2175
+    },
+    {
+      "epoch": 2.6378896882494005,
+      "grad_norm": 0.13530191779136658,
+      "learning_rate": 4.404456957388309e-05,
+      "loss": 0.1547,
+      "step": 2200
+    },
+    {
+      "epoch": 2.6678657074340526,
+      "grad_norm": 0.11663298308849335,
+      "learning_rate": 3.716408554189493e-05,
+      "loss": 0.1567,
+      "step": 2225
+    },
+    {
+      "epoch": 2.697841726618705,
+      "grad_norm": 0.13254733383655548,
+      "learning_rate": 3.08469949226971e-05,
+      "loss": 0.1605,
+      "step": 2250
+    },
+    {
+      "epoch": 2.7278177458033572,
+      "grad_norm": 0.13481509685516357,
+      "learning_rate": 2.510098728214133e-05,
+      "loss": 0.1638,
+      "step": 2275
+    },
+    {
+      "epoch": 2.7577937649880093,
+      "grad_norm": 0.12630033493041992,
+      "learning_rate": 1.9933057027432144e-05,
+      "loss": 0.1544,
+      "step": 2300
+    },
+    {
+      "epoch": 2.787769784172662,
+      "grad_norm": 0.11446399986743927,
+      "learning_rate": 1.5349494893087514e-05,
+      "loss": 0.1519,
+      "step": 2325
+    },
+    {
+      "epoch": 2.8177458033573144,
+      "grad_norm": 0.1315765082836151,
+      "learning_rate": 1.1355880283455521e-05,
+      "loss": 0.1559,
+      "step": 2350
+    },
+    {
+      "epoch": 2.8477218225419665,
+      "grad_norm": 0.13976338505744934,
+      "learning_rate": 7.95707448110755e-06,
+      "loss": 0.1513,
+      "step": 2375
+    },
+    {
+      "epoch": 2.8776978417266186,
+      "grad_norm": 0.15874944627285004,
+      "learning_rate": 5.157214729374305e-06,
+      "loss": 0.1558,
+      "step": 2400
+    },
+    {
+      "epoch": 2.907673860911271,
+      "grad_norm": 0.12649980187416077,
+      "learning_rate": 2.959709196229954e-06,
+      "loss": 0.1576,
+      "step": 2425
+    },
+    {
+      "epoch": 2.937649880095923,
+      "grad_norm": 0.121933713555336,
+      "learning_rate": 1.3672328256518206e-06,
+      "loss": 0.1541,
+      "step": 2450
+    },
+    {
+      "epoch": 2.9676258992805753,
+      "grad_norm": 0.14589641988277435,
+      "learning_rate": 3.8172408150849435e-07,
+      "loss": 0.1534,
+      "step": 2475
+    },
+    {
+      "epoch": 2.997601918465228,
+      "grad_norm": 0.14007411897182465,
+      "learning_rate": 4.382587937445947e-09,
+      "loss": 0.1536,
+      "step": 2500
+    },
+    {
+      "epoch": 3.0,
+      "step": 2502,
+      "total_flos": 2.43882352705536e+18,
+      "train_loss": 0.217716321051359,
+      "train_runtime": 3304.2612,
+      "train_samples_per_second": 36.317,
+      "train_steps_per_second": 0.757
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 2502,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 0,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.43882352705536e+18,
+  "train_batch_size": 48,
+  "trial_name": null,
+  "trial_params": null
+}

nl_tasks/exps/run_ex33/ft/adapter_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "T": 1.0,
+  "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+  "bias": "none",
+  "inference_mode": false,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "num_rotations": 1,
+  "peft_type": "ROTATION",
+  "r": 16,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "target_modules_to_skip": null,
+  "task_type": "CAUSAL_LM"
+}

nl_tasks/exps/run_ex33/ft/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<unk>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

nl_tasks/exps/run_ex33/ft/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

nl_tasks/exps/run_ex33/ft/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

nl_tasks/exps/run_ex33/ft/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "legacy": false,
+  "model_max_length": 512,
+  "pad_token": "<unk>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

nl_tasks/exps/run_ex33/ft2/adapter_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "T": 1.0,
+  "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+  "bias": "none",
+  "inference_mode": true,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "num_rotations": 1,
+  "peft_type": "ROTATION",
+  "r": 16,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "target_modules_to_skip": null,
+  "task_type": "CAUSAL_LM"
+}

nl_tasks/exps/run_ex33/ft2/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50b23d0f92322496640c91535debe14bd2599b7c55d1720ecbeff4fd370d1495
+size 33602915

nl_tasks/exps/run_ex33/trainer_state.json ADDED Viewed

	@@ -0,0 +1,743 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 2502,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02997601918465228,
+      "grad_norm": 0.24060821533203125,
+      "learning_rate": 0.0009561752988047809,
+      "loss": 0.4891,
+      "step": 25
+    },
+    {
+      "epoch": 0.05995203836930456,
+      "grad_norm": 0.2504737377166748,
+      "learning_rate": 0.001952191235059761,
+      "loss": 0.3442,
+      "step": 50
+    },
+    {
+      "epoch": 0.08992805755395683,
+      "grad_norm": 0.3224866986274719,
+      "learning_rate": 0.0029482071713147415,
+      "loss": 0.3373,
+      "step": 75
+    },
+    {
+      "epoch": 0.11990407673860912,
+      "grad_norm": 1.9630478620529175,
+      "learning_rate": 0.003944223107569721,
+      "loss": 0.367,
+      "step": 100
+    },
+    {
+      "epoch": 0.1498800959232614,
+      "grad_norm": 2.872677803039551,
+      "learning_rate": 0.004940239043824702,
+      "loss": 0.3491,
+      "step": 125
+    },
+    {
+      "epoch": 0.17985611510791366,
+      "grad_norm": 0.3211333751678467,
+      "learning_rate": 0.005936254980079682,
+      "loss": 0.3578,
+      "step": 150
+    },
+    {
+      "epoch": 0.20983213429256595,
+      "grad_norm": 0.14077529311180115,
+      "learning_rate": 0.006932270916334662,
+      "loss": 0.3252,
+      "step": 175
+    },
+    {
+      "epoch": 0.23980815347721823,
+      "grad_norm": 0.1346043050289154,
+      "learning_rate": 0.007928286852589641,
+      "loss": 0.3208,
+      "step": 200
+    },
+    {
+      "epoch": 0.2697841726618705,
+      "grad_norm": 0.12695597112178802,
+      "learning_rate": 0.008924302788844622,
+      "loss": 0.3113,
+      "step": 225
+    },
+    {
+      "epoch": 0.2997601918465228,
+      "grad_norm": 0.09868916869163513,
+      "learning_rate": 0.009920318725099601,
+      "loss": 0.3067,
+      "step": 250
+    },
+    {
+      "epoch": 0.32973621103117506,
+      "grad_norm": 0.10491207242012024,
+      "learning_rate": 0.009997424229621528,
+      "loss": 0.2987,
+      "step": 275
+    },
+    {
+      "epoch": 0.3597122302158273,
+      "grad_norm": 0.08867417275905609,
+      "learning_rate": 0.009988784753724706,
+      "loss": 0.2875,
+      "step": 300
+    },
+    {
+      "epoch": 0.38968824940047964,
+      "grad_norm": 0.07948382943868637,
+      "learning_rate": 0.009974072610646543,
+      "loss": 0.2814,
+      "step": 325
+    },
+    {
+      "epoch": 0.4196642685851319,
+      "grad_norm": 0.1311594694852829,
+      "learning_rate": 0.009953305708946503,
+      "loss": 0.2815,
+      "step": 350
+    },
+    {
+      "epoch": 0.44964028776978415,
+      "grad_norm": 0.09352317452430725,
+      "learning_rate": 0.00992650932742266,
+      "loss": 0.274,
+      "step": 375
+    },
+    {
+      "epoch": 0.47961630695443647,
+      "grad_norm": 0.06186804547905922,
+      "learning_rate": 0.009893716084340722,
+      "loss": 0.2739,
+      "step": 400
+    },
+    {
+      "epoch": 0.5095923261390888,
+      "grad_norm": 0.057951804250478745,
+      "learning_rate": 0.009854965897729,
+      "loss": 0.2802,
+      "step": 425
+    },
+    {
+      "epoch": 0.539568345323741,
+      "grad_norm": 0.05402874946594238,
+      "learning_rate": 0.009810305936787634,
+      "loss": 0.2619,
+      "step": 450
+    },
+    {
+      "epoch": 0.5695443645083933,
+      "grad_norm": 0.06987974047660828,
+      "learning_rate": 0.009759790564471233,
+      "loss": 0.2596,
+      "step": 475
+    },
+    {
+      "epoch": 0.5995203836930456,
+      "grad_norm": 0.055585265159606934,
+      "learning_rate": 0.009703481271314823,
+      "loss": 0.2663,
+      "step": 500
+    },
+    {
+      "epoch": 0.6294964028776978,
+      "grad_norm": 0.06418672204017639,
+      "learning_rate": 0.009641446600583633,
+      "loss": 0.2537,
+      "step": 525
+    },
+    {
+      "epoch": 0.6594724220623501,
+      "grad_norm": 0.05453021079301834,
+      "learning_rate": 0.009573762064837866,
+      "loss": 0.2506,
+      "step": 550
+    },
+    {
+      "epoch": 0.6894484412470024,
+      "grad_norm": 0.04687987267971039,
+      "learning_rate": 0.009500510054013988,
+      "loss": 0.2581,
+      "step": 575
+    },
+    {
+      "epoch": 0.7194244604316546,
+      "grad_norm": 0.05430283397436142,
+      "learning_rate": 0.009421779735134445,
+      "loss": 0.2524,
+      "step": 600
+    },
+    {
+      "epoch": 0.749400479616307,
+      "grad_norm": 0.04550522193312645,
+      "learning_rate": 0.009337666943767862,
+      "loss": 0.2492,
+      "step": 625
+    },
+    {
+      "epoch": 0.7793764988009593,
+      "grad_norm": 0.04663613811135292,
+      "learning_rate": 0.009248274067371885,
+      "loss": 0.2476,
+      "step": 650
+    },
+    {
+      "epoch": 0.8093525179856115,
+      "grad_norm": 0.05199963226914406,
+      "learning_rate": 0.009153709920660622,
+      "loss": 0.2548,
+      "step": 675
+    },
+    {
+      "epoch": 0.8393285371702638,
+      "grad_norm": 0.043098509311676025,
+      "learning_rate": 0.009054089613148438,
+      "loss": 0.243,
+      "step": 700
+    },
+    {
+      "epoch": 0.8693045563549161,
+      "grad_norm": 0.043702222406864166,
+      "learning_rate": 0.008949534409031304,
+      "loss": 0.2392,
+      "step": 725
+    },
+    {
+      "epoch": 0.8992805755395683,
+      "grad_norm": 0.04471327364444733,
+      "learning_rate": 0.008840171579576273,
+      "loss": 0.2447,
+      "step": 750
+    },
+    {
+      "epoch": 0.9292565947242206,
+      "grad_norm": 0.04158543795347214,
+      "learning_rate": 0.008726134248198781,
+      "loss": 0.2431,
+      "step": 775
+    },
+    {
+      "epoch": 0.9592326139088729,
+      "grad_norm": 0.03955095633864403,
+      "learning_rate": 0.008607561228416329,
+      "loss": 0.243,
+      "step": 800
+    },
+    {
+      "epoch": 0.9892086330935251,
+      "grad_norm": 0.039677463471889496,
+      "learning_rate": 0.008484596854875806,
+      "loss": 0.2379,
+      "step": 825
+    },
+    {
+      "epoch": 1.0191846522781776,
+      "grad_norm": 0.04437502473592758,
+      "learning_rate": 0.00835739080766014,
+      "loss": 0.2221,
+      "step": 850
+    },
+    {
+      "epoch": 1.0491606714628297,
+      "grad_norm": 0.04531238228082657,
+      "learning_rate": 0.008226097930088161,
+      "loss": 0.2113,
+      "step": 875
+    },
+    {
+      "epoch": 1.079136690647482,
+      "grad_norm": 0.04641604423522949,
+      "learning_rate": 0.00809087804022943,
+      "loss": 0.2146,
+      "step": 900
+    },
+    {
+      "epoch": 1.1091127098321343,
+      "grad_norm": 0.0421479269862175,
+      "learning_rate": 0.007951895736363478,
+      "loss": 0.2209,
+      "step": 925
+    },
+    {
+      "epoch": 1.1390887290167866,
+      "grad_norm": 0.036580126732587814,
+      "learning_rate": 0.007809320196620271,
+      "loss": 0.2105,
+      "step": 950
+    },
+    {
+      "epoch": 1.169064748201439,
+      "grad_norm": 0.03694167360663414,
+      "learning_rate": 0.0076633249730458175,
+      "loss": 0.2148,
+      "step": 975
+    },
+    {
+      "epoch": 1.1990407673860912,
+      "grad_norm": 0.03922872990369797,
+      "learning_rate": 0.00751408778034351,
+      "loss": 0.2171,
+      "step": 1000
+    },
+    {
+      "epoch": 1.2290167865707433,
+      "grad_norm": 0.042761627584695816,
+      "learning_rate": 0.0073617902795484755,
+      "loss": 0.2194,
+      "step": 1025
+    },
+    {
+      "epoch": 1.2589928057553956,
+      "grad_norm": 0.03898875042796135,
+      "learning_rate": 0.007206617856898149,
+      "loss": 0.2102,
+      "step": 1050
+    },
+    {
+      "epoch": 1.288968824940048,
+      "grad_norm": 0.039882808923721313,
+      "learning_rate": 0.007048759398168324,
+      "loss": 0.2055,
+      "step": 1075
+    },
+    {
+      "epoch": 1.3189448441247003,
+      "grad_norm": 0.03663821145892143,
+      "learning_rate": 0.006888407058749331,
+      "loss": 0.2045,
+      "step": 1100
+    },
+    {
+      "epoch": 1.3489208633093526,
+      "grad_norm": 0.04057300463318825,
+      "learning_rate": 0.006725756029742234,
+      "loss": 0.2094,
+      "step": 1125
+    },
+    {
+      "epoch": 1.3788968824940047,
+      "grad_norm": 0.03922704979777336,
+      "learning_rate": 0.006561004300359761,
+      "loss": 0.2031,
+      "step": 1150
+    },
+    {
+      "epoch": 1.4088729016786572,
+      "grad_norm": 0.03589184582233429,
+      "learning_rate": 0.0063943524169212005,
+      "loss": 0.2103,
+      "step": 1175
+    },
+    {
+      "epoch": 1.4388489208633093,
+      "grad_norm": 0.03473867475986481,
+      "learning_rate": 0.0062260032387346275,
+      "loss": 0.1965,
+      "step": 1200
+    },
+    {
+      "epoch": 1.4688249400479616,
+      "grad_norm": 0.041303601115942,
+      "learning_rate": 0.006056161691163602,
+      "loss": 0.2037,
+      "step": 1225
+    },
+    {
+      "epoch": 1.498800959232614,
+      "grad_norm": 0.04580175131559372,
+      "learning_rate": 0.00588503451617894,
+      "loss": 0.2067,
+      "step": 1250
+    },
+    {
+      "epoch": 1.5287769784172662,
+      "grad_norm": 0.03737177699804306,
+      "learning_rate": 0.005712830020699193,
+      "loss": 0.2058,
+      "step": 1275
+    },
+    {
+      "epoch": 1.5587529976019185,
+      "grad_norm": 0.03482063487172127,
+      "learning_rate": 0.005539757823026172,
+      "loss": 0.1975,
+      "step": 1300
+    },
+    {
+      "epoch": 1.5887290167865706,
+      "grad_norm": 0.04050496220588684,
+      "learning_rate": 0.005366028597684173,
+      "loss": 0.2029,
+      "step": 1325
+    },
+    {
+      "epoch": 1.6187050359712232,
+      "grad_norm": 0.031009122729301453,
+      "learning_rate": 0.005191853818973505,
+      "loss": 0.1978,
+      "step": 1350
+    },
+    {
+      "epoch": 1.6486810551558753,
+      "grad_norm": 0.03680524602532387,
+      "learning_rate": 0.005017445503550471,
+      "loss": 0.2042,
+      "step": 1375
+    },
+    {
+      "epoch": 1.6786570743405276,
+      "grad_norm": 0.03796203434467316,
+      "learning_rate": 0.004843015952347159,
+      "loss": 0.2002,
+      "step": 1400
+    },
+    {
+      "epoch": 1.70863309352518,
+      "grad_norm": 0.03277565911412239,
+      "learning_rate": 0.0046687774921452116,
+      "loss": 0.1982,
+      "step": 1425
+    },
+    {
+      "epoch": 1.738609112709832,
+      "grad_norm": 0.03661928325891495,
+      "learning_rate": 0.004494942217118105,
+      "loss": 0.2034,
+      "step": 1450
+    },
+    {
+      "epoch": 1.7685851318944845,
+      "grad_norm": 0.02970374934375286,
+      "learning_rate": 0.00432172173065659,
+      "loss": 0.1973,
+      "step": 1475
+    },
+    {
+      "epoch": 1.7985611510791366,
+      "grad_norm": 0.03302035480737686,
+      "learning_rate": 0.004149326887791542,
+      "loss": 0.1989,
+      "step": 1500
+    },
+    {
+      "epoch": 1.828537170263789,
+      "grad_norm": 0.03104039840400219,
+      "learning_rate": 0.003977967538527754,
+      "loss": 0.194,
+      "step": 1525
+    },
+    {
+      "epoch": 1.8585131894484412,
+      "grad_norm": 0.037008076906204224,
+      "learning_rate": 0.003807852272401132,
+      "loss": 0.2002,
+      "step": 1550
+    },
+    {
+      "epoch": 1.8884892086330936,
+      "grad_norm": 0.030725648626685143,
+      "learning_rate": 0.0036391881645701854,
+      "loss": 0.1992,
+      "step": 1575
+    },
+    {
+      "epoch": 1.9184652278177459,
+      "grad_norm": 0.03207903727889061,
+      "learning_rate": 0.003472180523750933,
+      "loss": 0.2046,
+      "step": 1600
+    },
+    {
+      "epoch": 1.948441247002398,
+      "grad_norm": 0.0310438871383667,
+      "learning_rate": 0.0033070326423020407,
+      "loss": 0.1941,
+      "step": 1625
+    },
+    {
+      "epoch": 1.9784172661870505,
+      "grad_norm": 0.032687630504369736,
+      "learning_rate": 0.0031439455487643707,
+      "loss": 0.1962,
+      "step": 1650
+    },
+    {
+      "epoch": 2.0083932853717026,
+      "grad_norm": 0.0336097776889801,
+      "learning_rate": 0.0029831177631562305,
+      "loss": 0.1852,
+      "step": 1675
+    },
+    {
+      "epoch": 2.038369304556355,
+      "grad_norm": 0.034838490188121796,
+      "learning_rate": 0.0028247450553221276,
+      "loss": 0.1593,
+      "step": 1700
+    },
+    {
+      "epoch": 2.068345323741007,
+      "grad_norm": 0.03779308870434761,
+      "learning_rate": 0.002669020206629217,
+      "loss": 0.1617,
+      "step": 1725
+    },
+    {
+      "epoch": 2.0983213429256593,
+      "grad_norm": 0.03595089539885521,
+      "learning_rate": 0.0025161327753015295,
+      "loss": 0.1602,
+      "step": 1750
+    },
+    {
+      "epoch": 2.128297362110312,
+      "grad_norm": 0.03848210349678993,
+      "learning_rate": 0.0023662688656775972,
+      "loss": 0.159,
+      "step": 1775
+    },
+    {
+      "epoch": 2.158273381294964,
+      "grad_norm": 0.04832014814019203,
+      "learning_rate": 0.0022196109016723708,
+      "loss": 0.1593,
+      "step": 1800
+    },
+    {
+      "epoch": 2.1882494004796165,
+      "grad_norm": 0.03901856020092964,
+      "learning_rate": 0.0020763374047192027,
+      "loss": 0.1641,
+      "step": 1825
+    },
+    {
+      "epoch": 2.2182254196642686,
+      "grad_norm": 0.03556321561336517,
+      "learning_rate": 0.001936622776462147,
+      "loss": 0.1573,
+      "step": 1850
+    },
+    {
+      "epoch": 2.2482014388489207,
+      "grad_norm": 0.033188410103321075,
+      "learning_rate": 0.0018006370864631644,
+      "loss": 0.1611,
+      "step": 1875
+    },
+    {
+      "epoch": 2.278177458033573,
+      "grad_norm": 0.0357639417052269,
+      "learning_rate": 0.0016685458651825892,
+      "loss": 0.1565,
+      "step": 1900
+    },
+    {
+      "epoch": 2.3081534772182253,
+      "grad_norm": 0.03594391047954559,
+      "learning_rate": 0.0015405099024848874,
+      "loss": 0.1629,
+      "step": 1925
+    },
+    {
+      "epoch": 2.338129496402878,
+      "grad_norm": 0.03396276384592056,
+      "learning_rate": 0.0014166850519149793,
+      "loss": 0.1566,
+      "step": 1950
+    },
+    {
+      "epoch": 2.36810551558753,
+      "grad_norm": 0.03651060536503792,
+      "learning_rate": 0.0012972220409833552,
+      "loss": 0.1587,
+      "step": 1975
+    },
+    {
+      "epoch": 2.3980815347721824,
+      "grad_norm": 0.03662113845348358,
+      "learning_rate": 0.001182266287690924,
+      "loss": 0.155,
+      "step": 2000
+    },
+    {
+      "epoch": 2.4280575539568345,
+      "grad_norm": 0.03555059805512428,
+      "learning_rate": 0.0010719577235169537,
+      "loss": 0.1587,
+      "step": 2025
+    },
+    {
+      "epoch": 2.4580335731414866,
+      "grad_norm": 0.03360700234770775,
+      "learning_rate": 0.0009664306230855341,
+      "loss": 0.1585,
+      "step": 2050
+    },
+    {
+      "epoch": 2.488009592326139,
+      "grad_norm": 0.033780504018068314,
+      "learning_rate": 0.0008658134407179418,
+      "loss": 0.1553,
+      "step": 2075
+    },
+    {
+      "epoch": 2.5179856115107913,
+      "grad_norm": 0.03432834520936012,
+      "learning_rate": 0.0007702286540698417,
+      "loss": 0.158,
+      "step": 2100
+    },
+    {
+      "epoch": 2.547961630695444,
+      "grad_norm": 0.035472046583890915,
+      "learning_rate": 0.0006797926150436618,
+      "loss": 0.1573,
+      "step": 2125
+    },
+    {
+      "epoch": 2.577937649880096,
+      "grad_norm": 0.03242143243551254,
+      "learning_rate": 0.000594615408157641,
+      "loss": 0.1537,
+      "step": 2150
+    },
+    {
+      "epoch": 2.6079136690647484,
+      "grad_norm": 0.03848033398389816,
+      "learning_rate": 0.0005148007165439233,
+      "loss": 0.1532,
+      "step": 2175
+    },
+    {
+      "epoch": 2.6378896882494005,
+      "grad_norm": 0.03433902934193611,
+      "learning_rate": 0.0004404456957388309,
+      "loss": 0.1529,
+      "step": 2200
+    },
+    {
+      "epoch": 2.6678657074340526,
+      "grad_norm": 0.031391434371471405,
+      "learning_rate": 0.00037164085541894934,
+      "loss": 0.1546,
+      "step": 2225
+    },
+    {
+      "epoch": 2.697841726618705,
+      "grad_norm": 0.04350714385509491,
+      "learning_rate": 0.000308469949226971,
+      "loss": 0.1584,
+      "step": 2250
+    },
+    {
+      "epoch": 2.7278177458033572,
+      "grad_norm": 0.03302815929055214,
+      "learning_rate": 0.0002510098728214133,
+      "loss": 0.1614,
+      "step": 2275
+    },
+    {
+      "epoch": 2.7577937649880093,
+      "grad_norm": 0.03371904045343399,
+      "learning_rate": 0.00019933057027432145,
+      "loss": 0.1517,
+      "step": 2300
+    },
+    {
+      "epoch": 2.787769784172662,
+      "grad_norm": 0.031399570405483246,
+      "learning_rate": 0.00015349494893087513,
+      "loss": 0.1496,
+      "step": 2325
+    },
+    {
+      "epoch": 2.8177458033573144,
+      "grad_norm": 0.03590654581785202,
+      "learning_rate": 0.00011355880283455522,
+      "loss": 0.1535,
+      "step": 2350
+    },
+    {
+      "epoch": 2.8477218225419665,
+      "grad_norm": 0.03726603463292122,
+      "learning_rate": 7.957074481107551e-05,
+      "loss": 0.1491,
+      "step": 2375
+    },
+    {
+      "epoch": 2.8776978417266186,
+      "grad_norm": 0.041672345250844955,
+      "learning_rate": 5.1572147293743046e-05,
+      "loss": 0.1537,
+      "step": 2400
+    },
+    {
+      "epoch": 2.907673860911271,
+      "grad_norm": 0.03395906835794449,
+      "learning_rate": 2.959709196229954e-05,
+      "loss": 0.1544,
+      "step": 2425
+    },
+    {
+      "epoch": 2.937649880095923,
+      "grad_norm": 0.030766665935516357,
+      "learning_rate": 1.3672328256518207e-05,
+      "loss": 0.1513,
+      "step": 2450
+    },
+    {
+      "epoch": 2.9676258992805753,
+      "grad_norm": 0.03894634544849396,
+      "learning_rate": 3.817240815084944e-06,
+      "loss": 0.1504,
+      "step": 2475
+    },
+    {
+      "epoch": 2.997601918465228,
+      "grad_norm": 0.03591805323958397,
+      "learning_rate": 4.382587937445948e-08,
+      "loss": 0.1516,
+      "step": 2500
+    },
+    {
+      "epoch": 3.0,
+      "step": 2502,
+      "total_flos": 2.43882352705536e+18,
+      "train_loss": 0.21604387271556733,
+      "train_runtime": 3302.176,
+      "train_samples_per_second": 36.34,
+      "train_steps_per_second": 0.758
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 2502,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 0,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.43882352705536e+18,
+  "train_batch_size": 48,
+  "trial_name": null,
+  "trial_params": null
+}

nl_tasks/exps/run_ex34/gsm8k.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ gsm8k length==== 1319, gsm8k acc %====, 52.388172858225936

nl_tasks/exps/run_ex34/math.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ math length==== 5000, math acc %====, 7.84

nl_tasks/exps/run_ex34/trainer_state.json ADDED Viewed

	@@ -0,0 +1,743 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 2502,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02997601918465228,
+      "grad_norm": 0.2532191574573517,
+      "learning_rate": 0.0019123505976095618,
+      "loss": 0.4625,
+      "step": 25
+    },
+    {
+      "epoch": 0.05995203836930456,
+      "grad_norm": 5.414860725402832,
+      "learning_rate": 0.003904382470119522,
+      "loss": 0.478,
+      "step": 50
+    },
+    {
+      "epoch": 0.08992805755395683,
+      "grad_norm": 0.6449404954910278,
+      "learning_rate": 0.005896414342629483,
+      "loss": 0.6016,
+      "step": 75
+    },
+    {
+      "epoch": 0.11990407673860912,
+      "grad_norm": 1.4888554811477661,
+      "learning_rate": 0.007888446215139441,
+      "loss": 0.4044,
+      "step": 100
+    },
+    {
+      "epoch": 0.1498800959232614,
+      "grad_norm": 0.23530787229537964,
+      "learning_rate": 0.009880478087649403,
+      "loss": 0.3748,
+      "step": 125
+    },
+    {
+      "epoch": 0.17985611510791366,
+      "grad_norm": 0.10670146346092224,
+      "learning_rate": 0.011872509960159363,
+      "loss": 0.3298,
+      "step": 150
+    },
+    {
+      "epoch": 0.20983213429256595,
+      "grad_norm": 0.1089276447892189,
+      "learning_rate": 0.013864541832669323,
+      "loss": 0.3172,
+      "step": 175
+    },
+    {
+      "epoch": 0.23980815347721823,
+      "grad_norm": 0.09628577530384064,
+      "learning_rate": 0.015856573705179282,
+      "loss": 0.3173,
+      "step": 200
+    },
+    {
+      "epoch": 0.2697841726618705,
+      "grad_norm": 0.07994027435779572,
+      "learning_rate": 0.017848605577689244,
+      "loss": 0.3081,
+      "step": 225
+    },
+    {
+      "epoch": 0.2997601918465228,
+      "grad_norm": 0.06300719082355499,
+      "learning_rate": 0.019840637450199202,
+      "loss": 0.3046,
+      "step": 250
+    },
+    {
+      "epoch": 0.32973621103117506,
+      "grad_norm": 0.06899631768465042,
+      "learning_rate": 0.019994848459243056,
+      "loss": 0.2986,
+      "step": 275
+    },
+    {
+      "epoch": 0.3597122302158273,
+      "grad_norm": 0.08354010432958603,
+      "learning_rate": 0.019977569507449413,
+      "loss": 0.2875,
+      "step": 300
+    },
+    {
+      "epoch": 0.38968824940047964,
+      "grad_norm": 0.0717054083943367,
+      "learning_rate": 0.019948145221293085,
+      "loss": 0.2824,
+      "step": 325
+    },
+    {
+      "epoch": 0.4196642685851319,
+      "grad_norm": 0.04974915832281113,
+      "learning_rate": 0.019906611417893006,
+      "loss": 0.282,
+      "step": 350
+    },
+    {
+      "epoch": 0.44964028776978415,
+      "grad_norm": 0.0749383196234703,
+      "learning_rate": 0.01985301865484532,
+      "loss": 0.2738,
+      "step": 375
+    },
+    {
+      "epoch": 0.47961630695443647,
+      "grad_norm": 0.04245521500706673,
+      "learning_rate": 0.019787432168681444,
+      "loss": 0.2727,
+      "step": 400
+    },
+    {
+      "epoch": 0.5095923261390888,
+      "grad_norm": 0.04584546759724617,
+      "learning_rate": 0.019709931795458,
+      "loss": 0.2818,
+      "step": 425
+    },
+    {
+      "epoch": 0.539568345323741,
+      "grad_norm": 0.043414946645498276,
+      "learning_rate": 0.019620611873575267,
+      "loss": 0.2613,
+      "step": 450
+    },
+    {
+      "epoch": 0.5695443645083933,
+      "grad_norm": 0.058648571372032166,
+      "learning_rate": 0.019519581128942465,
+      "loss": 0.2609,
+      "step": 475
+    },
+    {
+      "epoch": 0.5995203836930456,
+      "grad_norm": 0.04752543196082115,
+      "learning_rate": 0.019406962542629646,
+      "loss": 0.2677,
+      "step": 500
+    },
+    {
+      "epoch": 0.6294964028776978,
+      "grad_norm": 0.048361025750637054,
+      "learning_rate": 0.019282893201167266,
+      "loss": 0.2548,
+      "step": 525
+    },
+    {
+      "epoch": 0.6594724220623501,
+      "grad_norm": 0.05573540925979614,
+      "learning_rate": 0.01914752412967573,
+      "loss": 0.2517,
+      "step": 550
+    },
+    {
+      "epoch": 0.6894484412470024,
+      "grad_norm": 0.03862696886062622,
+      "learning_rate": 0.019001020108027976,
+      "loss": 0.2583,
+      "step": 575
+    },
+    {
+      "epoch": 0.7194244604316546,
+      "grad_norm": 0.0389208160340786,
+      "learning_rate": 0.01884355947026889,
+      "loss": 0.253,
+      "step": 600
+    },
+    {
+      "epoch": 0.749400479616307,
+      "grad_norm": 0.03260328620672226,
+      "learning_rate": 0.018675333887535724,
+      "loss": 0.2507,
+      "step": 625
+    },
+    {
+      "epoch": 0.7793764988009593,
+      "grad_norm": 0.038773685693740845,
+      "learning_rate": 0.01849654813474377,
+      "loss": 0.2481,
+      "step": 650
+    },
+    {
+      "epoch": 0.8093525179856115,
+      "grad_norm": 0.03589491918683052,
+      "learning_rate": 0.018307419841321244,
+      "loss": 0.2558,
+      "step": 675
+    },
+    {
+      "epoch": 0.8393285371702638,
+      "grad_norm": 0.040207505226135254,
+      "learning_rate": 0.018108179226296876,
+      "loss": 0.2432,
+      "step": 700
+    },
+    {
+      "epoch": 0.8693045563549161,
+      "grad_norm": 0.03414730727672577,
+      "learning_rate": 0.017899068818062608,
+      "loss": 0.2397,
+      "step": 725
+    },
+    {
+      "epoch": 0.8992805755395683,
+      "grad_norm": 0.0384533517062664,
+      "learning_rate": 0.017680343159152546,
+      "loss": 0.2469,
+      "step": 750
+    },
+    {
+      "epoch": 0.9292565947242206,
+      "grad_norm": 0.029624082148075104,
+      "learning_rate": 0.017452268496397562,
+      "loss": 0.2448,
+      "step": 775
+    },
+    {
+      "epoch": 0.9592326139088729,
+      "grad_norm": 0.028060954064130783,
+      "learning_rate": 0.017215122456832658,
+      "loss": 0.243,
+      "step": 800
+    },
+    {
+      "epoch": 0.9892086330935251,
+      "grad_norm": 0.033197712153196335,
+      "learning_rate": 0.016969193709751612,
+      "loss": 0.2393,
+      "step": 825
+    },
+    {
+      "epoch": 1.0191846522781776,
+      "grad_norm": 0.03793744370341301,
+      "learning_rate": 0.01671478161532028,
+      "loss": 0.2231,
+      "step": 850
+    },
+    {
+      "epoch": 1.0491606714628297,
+      "grad_norm": 0.03373177349567413,
+      "learning_rate": 0.016452195860176322,
+      "loss": 0.2136,
+      "step": 875
+    },
+    {
+      "epoch": 1.079136690647482,
+      "grad_norm": 0.029341645538806915,
+      "learning_rate": 0.01618175608045886,
+      "loss": 0.2158,
+      "step": 900
+    },
+    {
+      "epoch": 1.1091127098321343,
+      "grad_norm": 0.03401786461472511,
+      "learning_rate": 0.015903791472726955,
+      "loss": 0.2223,
+      "step": 925
+    },
+    {
+      "epoch": 1.1390887290167866,
+      "grad_norm": 0.02708265371620655,
+      "learning_rate": 0.015618640393240542,
+      "loss": 0.2121,
+      "step": 950
+    },
+    {
+      "epoch": 1.169064748201439,
+      "grad_norm": 0.029649930074810982,
+      "learning_rate": 0.015326649946091635,
+      "loss": 0.2168,
+      "step": 975
+    },
+    {
+      "epoch": 1.1990407673860912,
+      "grad_norm": 0.033625248819589615,
+      "learning_rate": 0.01502817556068702,
+      "loss": 0.2184,
+      "step": 1000
+    },
+    {
+      "epoch": 1.2290167865707433,
+      "grad_norm": 0.036567322909832,
+      "learning_rate": 0.014723580559096951,
+      "loss": 0.2205,
+      "step": 1025
+    },
+    {
+      "epoch": 1.2589928057553956,
+      "grad_norm": 0.029653819277882576,
+      "learning_rate": 0.014413235713796298,
+      "loss": 0.2122,
+      "step": 1050
+    },
+    {
+      "epoch": 1.288968824940048,
+      "grad_norm": 0.0300185214728117,
+      "learning_rate": 0.014097518796336648,
+      "loss": 0.2058,
+      "step": 1075
+    },
+    {
+      "epoch": 1.3189448441247003,
+      "grad_norm": 0.027810001745820045,
+      "learning_rate": 0.013776814117498662,
+      "loss": 0.207,
+      "step": 1100
+    },
+    {
+      "epoch": 1.3489208633093526,
+      "grad_norm": 0.027427321299910545,
+      "learning_rate": 0.013451512059484468,
+      "loss": 0.2108,
+      "step": 1125
+    },
+    {
+      "epoch": 1.3788968824940047,
+      "grad_norm": 0.028375081717967987,
+      "learning_rate": 0.013122008600719522,
+      "loss": 0.2048,
+      "step": 1150
+    },
+    {
+      "epoch": 1.4088729016786572,
+      "grad_norm": 0.029488051310181618,
+      "learning_rate": 0.012788704833842401,
+      "loss": 0.2121,
+      "step": 1175
+    },
+    {
+      "epoch": 1.4388489208633093,
+      "grad_norm": 0.029031606391072273,
+      "learning_rate": 0.012452006477469255,
+      "loss": 0.1975,
+      "step": 1200
+    },
+    {
+      "epoch": 1.4688249400479616,
+      "grad_norm": 0.029272671788930893,
+      "learning_rate": 0.012112323382327204,
+      "loss": 0.2055,
+      "step": 1225
+    },
+    {
+      "epoch": 1.498800959232614,
+      "grad_norm": 0.027180878445506096,
+      "learning_rate": 0.01177006903235788,
+      "loss": 0.2082,
+      "step": 1250
+    },
+    {
+      "epoch": 1.5287769784172662,
+      "grad_norm": 0.02914930321276188,
+      "learning_rate": 0.011425660041398385,
+      "loss": 0.2067,
+      "step": 1275
+    },
+    {
+      "epoch": 1.5587529976019185,
+      "grad_norm": 0.024756262078881264,
+      "learning_rate": 0.011079515646052343,
+      "loss": 0.1988,
+      "step": 1300
+    },
+    {
+      "epoch": 1.5887290167865706,
+      "grad_norm": 0.0281531922519207,
+      "learning_rate": 0.010732057195368346,
+      "loss": 0.2043,
+      "step": 1325
+    },
+    {
+      "epoch": 1.6187050359712232,
+      "grad_norm": 0.02760651335120201,
+      "learning_rate": 0.01038370763794701,
+      "loss": 0.2008,
+      "step": 1350
+    },
+    {
+      "epoch": 1.6486810551558753,
+      "grad_norm": 0.026089100167155266,
+      "learning_rate": 0.010034891007100942,
+      "loss": 0.2065,
+      "step": 1375
+    },
+    {
+      "epoch": 1.6786570743405276,
+      "grad_norm": 0.030709104612469673,
+      "learning_rate": 0.009686031904694317,
+      "loss": 0.2022,
+      "step": 1400
+    },
+    {
+      "epoch": 1.70863309352518,
+      "grad_norm": 0.028219345957040787,
+      "learning_rate": 0.009337554984290423,
+      "loss": 0.1998,
+      "step": 1425
+    },
+    {
+      "epoch": 1.738609112709832,
+      "grad_norm": 0.02828747034072876,
+      "learning_rate": 0.00898988443423621,
+      "loss": 0.2048,
+      "step": 1450
+    },
+    {
+      "epoch": 1.7685851318944845,
+      "grad_norm": 0.02299325354397297,
+      "learning_rate": 0.00864344346131318,
+      "loss": 0.1983,
+      "step": 1475
+    },
+    {
+      "epoch": 1.7985611510791366,
+      "grad_norm": 0.02619764395058155,
+      "learning_rate": 0.008298653775583083,
+      "loss": 0.2013,
+      "step": 1500
+    },
+    {
+      "epoch": 1.828537170263789,
+      "grad_norm": 0.021840449422597885,
+      "learning_rate": 0.007955935077055509,
+      "loss": 0.1956,
+      "step": 1525
+    },
+    {
+      "epoch": 1.8585131894484412,
+      "grad_norm": 0.02665482647716999,
+      "learning_rate": 0.007615704544802264,
+      "loss": 0.2022,
+      "step": 1550
+    },
+    {
+      "epoch": 1.8884892086330936,
+      "grad_norm": 0.023482663556933403,
+      "learning_rate": 0.007278376329140371,
+      "loss": 0.2017,
+      "step": 1575
+    },
+    {
+      "epoch": 1.9184652278177459,
+      "grad_norm": 0.02401842176914215,
+      "learning_rate": 0.006944361047501866,
+      "loss": 0.2059,
+      "step": 1600
+    },
+    {
+      "epoch": 1.948441247002398,
+      "grad_norm": 0.022366248071193695,
+      "learning_rate": 0.006614065284604081,
+      "loss": 0.1954,
+      "step": 1625
+    },
+    {
+      "epoch": 1.9784172661870505,
+      "grad_norm": 0.023342736065387726,
+      "learning_rate": 0.0062878910975287415,
+      "loss": 0.1973,
+      "step": 1650
+    },
+    {
+      "epoch": 2.0083932853717026,
+      "grad_norm": 0.02544998750090599,
+      "learning_rate": 0.005966235526312461,
+      "loss": 0.1869,
+      "step": 1675
+    },
+    {
+      "epoch": 2.038369304556355,
+      "grad_norm": 0.027767734602093697,
+      "learning_rate": 0.005649490110644255,
+      "loss": 0.1598,
+      "step": 1700
+    },
+    {
+      "epoch": 2.068345323741007,
+      "grad_norm": 0.0325852669775486,
+      "learning_rate": 0.005338040413258434,
+      "loss": 0.1628,
+      "step": 1725
+    },
+    {
+      "epoch": 2.0983213429256593,
+      "grad_norm": 0.027936723083257675,
+      "learning_rate": 0.005032265550603059,
+      "loss": 0.161,
+      "step": 1750
+    },
+    {
+      "epoch": 2.128297362110312,
+      "grad_norm": 0.027292126789689064,
+      "learning_rate": 0.0047325377313551945,
+      "loss": 0.1598,
+      "step": 1775
+    },
+    {
+      "epoch": 2.158273381294964,
+      "grad_norm": 0.030712289735674858,
+      "learning_rate": 0.0044392218033447416,
+      "loss": 0.1614,
+      "step": 1800
+    },
+    {
+      "epoch": 2.1882494004796165,
+      "grad_norm": 0.02516656182706356,
+      "learning_rate": 0.0041526748094384055,
+      "loss": 0.165,
+      "step": 1825
+    },
+    {
+      "epoch": 2.2182254196642686,
+      "grad_norm": 0.028255818411707878,
+      "learning_rate": 0.003873245552924294,
+      "loss": 0.1584,
+      "step": 1850
+    },
+    {
+      "epoch": 2.2482014388489207,
+      "grad_norm": 0.028185885399580002,
+      "learning_rate": 0.003601274172926329,
+      "loss": 0.1619,
+      "step": 1875
+    },
+    {
+      "epoch": 2.278177458033573,
+      "grad_norm": 0.029240386560559273,
+      "learning_rate": 0.0033370917303651784,
+      "loss": 0.1575,
+      "step": 1900
+    },
+    {
+      "epoch": 2.3081534772182253,
+      "grad_norm": 0.026783913373947144,
+      "learning_rate": 0.003081019804969775,
+      "loss": 0.1636,
+      "step": 1925
+    },
+    {
+      "epoch": 2.338129496402878,
+      "grad_norm": 0.026911884546279907,
+      "learning_rate": 0.0028333701038299585,
+      "loss": 0.1583,
+      "step": 1950
+    },
+    {
+      "epoch": 2.36810551558753,
+      "grad_norm": 0.02731228433549404,
+      "learning_rate": 0.0025944440819667103,
+      "loss": 0.1596,
+      "step": 1975
+    },
+    {
+      "epoch": 2.3980815347721824,
+      "grad_norm": 0.026634665206074715,
+      "learning_rate": 0.002364532575381848,
+      "loss": 0.1559,
+      "step": 2000
+    },
+    {
+      "epoch": 2.4280575539568345,
+      "grad_norm": 0.02517741546034813,
+      "learning_rate": 0.0021439154470339074,
+      "loss": 0.1598,
+      "step": 2025
+    },
+    {
+      "epoch": 2.4580335731414866,
+      "grad_norm": 0.028662823140621185,
+      "learning_rate": 0.0019328612461710682,
+      "loss": 0.1592,
+      "step": 2050
+    },
+    {
+      "epoch": 2.488009592326139,
+      "grad_norm": 0.024708494544029236,
+      "learning_rate": 0.0017316268814358837,
+      "loss": 0.1558,
+      "step": 2075
+    },
+    {
+      "epoch": 2.5179856115107913,
+      "grad_norm": 0.023627523332834244,
+      "learning_rate": 0.0015404573081396833,
+      "loss": 0.1581,
+      "step": 2100
+    },
+    {
+      "epoch": 2.547961630695444,
+      "grad_norm": 0.02505665458738804,
+      "learning_rate": 0.0013595852300873235,
+      "loss": 0.1578,
+      "step": 2125
+    },
+    {
+      "epoch": 2.577937649880096,
+      "grad_norm": 0.02366657927632332,
+      "learning_rate": 0.001189230816315282,
+      "loss": 0.1536,
+      "step": 2150
+    },
+    {
+      "epoch": 2.6079136690647484,
+      "grad_norm": 0.028983445838093758,
+      "learning_rate": 0.0010296014330878466,
+      "loss": 0.1528,
+      "step": 2175
+    },
+    {
+      "epoch": 2.6378896882494005,
+      "grad_norm": 0.02606895938515663,
+      "learning_rate": 0.0008808913914776618,
+      "loss": 0.1542,
+      "step": 2200
+    },
+    {
+      "epoch": 2.6678657074340526,
+      "grad_norm": 0.021730564534664154,
+      "learning_rate": 0.0007432817108378987,
+      "loss": 0.1558,
+      "step": 2225
+    },
+    {
+      "epoch": 2.697841726618705,
+      "grad_norm": 0.0258767269551754,
+      "learning_rate": 0.000616939898453942,
+      "loss": 0.1587,
+      "step": 2250
+    },
+    {
+      "epoch": 2.7278177458033572,
+      "grad_norm": 0.024898972362279892,
+      "learning_rate": 0.0005020197456428266,
+      "loss": 0.1613,
+      "step": 2275
+    },
+    {
+      "epoch": 2.7577937649880093,
+      "grad_norm": 0.026595328003168106,
+      "learning_rate": 0.0003986611405486429,
+      "loss": 0.1518,
+      "step": 2300
+    },
+    {
+      "epoch": 2.787769784172662,
+      "grad_norm": 0.021675392985343933,
+      "learning_rate": 0.00030698989786175025,
+      "loss": 0.1495,
+      "step": 2325
+    },
+    {
+      "epoch": 2.8177458033573144,
+      "grad_norm": 0.026996396481990814,
+      "learning_rate": 0.00022711760566911045,
+      "loss": 0.1539,
+      "step": 2350
+    },
+    {
+      "epoch": 2.8477218225419665,
+      "grad_norm": 0.02844955585896969,
+      "learning_rate": 0.00015914148962215102,
+      "loss": 0.1501,
+      "step": 2375
+    },
+    {
+      "epoch": 2.8776978417266186,
+      "grad_norm": 0.035385046154260635,
+      "learning_rate": 0.00010314429458748609,
+      "loss": 0.1533,
+      "step": 2400
+    },
+    {
+      "epoch": 2.907673860911271,
+      "grad_norm": 0.0226058941334486,
+      "learning_rate": 5.919418392459908e-05,
+      "loss": 0.1553,
+      "step": 2425
+    },
+    {
+      "epoch": 2.937649880095923,
+      "grad_norm": 0.021318677812814713,
+      "learning_rate": 2.7344656513036413e-05,
+      "loss": 0.1513,
+      "step": 2450
+    },
+    {
+      "epoch": 2.9676258992805753,
+      "grad_norm": 0.03294537961483002,
+      "learning_rate": 7.634481630169888e-06,
+      "loss": 0.1507,
+      "step": 2475
+    },
+    {
+      "epoch": 2.997601918465228,
+      "grad_norm": 0.027222590520977974,
+      "learning_rate": 8.765175874891896e-08,
+      "loss": 0.1521,
+      "step": 2500
+    },
+    {
+      "epoch": 3.0,
+      "step": 2502,
+      "total_flos": 2.43882352705536e+18,
+      "train_loss": 0.22085073801110403,
+      "train_runtime": 3314.1845,
+      "train_samples_per_second": 36.208,
+      "train_steps_per_second": 0.755
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 2502,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 0,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.43882352705536e+18,
+  "train_batch_size": 48,
+  "trial_name": null,
+  "trial_params": null
+}