RefalMachine commited on Jul 13, 2024

Commit

4fec6b2

verified ·

1 Parent(s): 0ce93cf

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +13 -0
llmtf_eval_k0_bs8/daru_treewayabstractive.jsonl +0 -0
llmtf_eval_k0_bs8/daru_treewayabstractive_params.jsonl +57 -0
llmtf_eval_k0_bs8/daru_treewayabstractive_total.jsonl +8 -0
llmtf_eval_k0_bs8/daru_treewayextractive.jsonl +3 -0
llmtf_eval_k0_bs8/daru_treewayextractive_params.jsonl +57 -0
llmtf_eval_k0_bs8/daru_treewayextractive_total.jsonl +7 -0
llmtf_eval_k0_bs8/darumeru_MultiQ.jsonl +0 -0
llmtf_eval_k0_bs8/darumeru_MultiQ_params.jsonl +57 -0
llmtf_eval_k0_bs8/darumeru_MultiQ_total.jsonl +8 -0
llmtf_eval_k0_bs8/darumeru_PARus.jsonl +0 -0
llmtf_eval_k0_bs8/darumeru_PARus_params.jsonl +57 -0
llmtf_eval_k0_bs8/darumeru_PARus_total.jsonl +7 -0
llmtf_eval_k0_bs8/darumeru_RCB.jsonl +0 -0
llmtf_eval_k0_bs8/darumeru_RCB_params.jsonl +57 -0
llmtf_eval_k0_bs8/darumeru_RCB_total.jsonl +8 -0
llmtf_eval_k0_bs8/darumeru_RWSD.jsonl +0 -0
llmtf_eval_k0_bs8/darumeru_RWSD_params.jsonl +57 -0
llmtf_eval_k0_bs8/darumeru_RWSD_total.jsonl +7 -0
llmtf_eval_k0_bs8/darumeru_USE.jsonl +0 -0
llmtf_eval_k0_bs8/darumeru_USE_params.jsonl +57 -0
llmtf_eval_k0_bs8/darumeru_USE_total.jsonl +7 -0
llmtf_eval_k0_bs8/darumeru_cp_para_en.jsonl +0 -0
llmtf_eval_k0_bs8/darumeru_cp_para_en_params.jsonl +57 -0
llmtf_eval_k0_bs8/darumeru_cp_para_en_total.jsonl +9 -0
llmtf_eval_k0_bs8/darumeru_cp_para_ru.jsonl +0 -0
llmtf_eval_k0_bs8/darumeru_cp_para_ru_params.jsonl +57 -0
llmtf_eval_k0_bs8/darumeru_cp_para_ru_total.jsonl +9 -0
llmtf_eval_k0_bs8/darumeru_cp_sent_en.jsonl +0 -0
llmtf_eval_k0_bs8/darumeru_cp_sent_en_params.jsonl +57 -0
llmtf_eval_k0_bs8/darumeru_cp_sent_en_total.jsonl +9 -0
llmtf_eval_k0_bs8/darumeru_cp_sent_ru.jsonl +0 -0
llmtf_eval_k0_bs8/darumeru_cp_sent_ru_params.jsonl +57 -0
llmtf_eval_k0_bs8/darumeru_cp_sent_ru_total.jsonl +9 -0
llmtf_eval_k0_bs8/darumeru_ruMMLU.jsonl +3 -0
llmtf_eval_k0_bs8/darumeru_ruMMLU_params.jsonl +57 -0
llmtf_eval_k0_bs8/darumeru_ruMMLU_total.jsonl +7 -0
llmtf_eval_k0_bs8/darumeru_ruOpenBookQA.jsonl +0 -0
llmtf_eval_k0_bs8/darumeru_ruOpenBookQA_params.jsonl +57 -0
llmtf_eval_k0_bs8/darumeru_ruOpenBookQA_total.jsonl +8 -0
llmtf_eval_k0_bs8/darumeru_ruTiE.jsonl +3 -0
llmtf_eval_k0_bs8/darumeru_ruTiE_params.jsonl +57 -0
llmtf_eval_k0_bs8/darumeru_ruTiE_total.jsonl +7 -0
llmtf_eval_k0_bs8/darumeru_ruWorldTree.jsonl +0 -0
llmtf_eval_k0_bs8/darumeru_ruWorldTree_params.jsonl +57 -0
llmtf_eval_k0_bs8/darumeru_ruWorldTree_total.jsonl +8 -0
llmtf_eval_k0_bs8/evaluation_log.txt +273 -0
llmtf_eval_k0_bs8/evaluation_results.txt +2 -0
llmtf_eval_k0_bs8/nlpcoreteam_enMMLU.jsonl +3 -0
llmtf_eval_k0_bs8/nlpcoreteam_enMMLU_params.jsonl +57 -0

.gitattributes CHANGED Viewed

@@ -71,3 +71,16 @@ llmtf_eval_k5_bs4/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
 llmtf_eval_k5_bs4/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
 llmtf_eval_k5_bs4/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
 llmtf_eval_k5_bs4/russiannlp_rucola_custom.jsonl filter=lfs diff=lfs merge=lfs -text

 llmtf_eval_k5_bs4/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
 llmtf_eval_k5_bs4/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
 llmtf_eval_k5_bs4/russiannlp_rucola_custom.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k0_bs8/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k0_bs8/darumeru_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k0_bs8/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k0_bs8/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k0_bs8/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs8/daru_treewayabstractive.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs8/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs8/darumeru_MultiQ.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs8/darumeru_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs8/darumeru_ruOpenBookQA.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs8/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs8/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
+llmtf_eval_k5_bs8/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text

llmtf_eval_k0_bs8/daru_treewayabstractive.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs8/daru_treewayabstractive_params.jsonl ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
+        "generation_config": {
+            "bos_token_id": 128000,
+            "do_sample": true,
+            "eos_token_id": [
+                128001,
+                128009
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 512,
+            "pad_token_id": 128001,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2",
+            "trust_remote_code": [
+                false
+            ]
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "",
+            "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "<|begin_of_text|>",
+            "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+            "add_special_tokens": false,
+            "eos_token": "<|eot_id|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 500,
+        "method": "generate"
+    }
+}

llmtf_eval_k0_bs8/daru_treewayabstractive_total.jsonl ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "task_name": "daru/treewayabstractive",
+    "results": {
+        "rouge1": 0.35574041658645894,
+        "rouge2": 0.1282333481459036
+    },
+    "leaderboard_result": 0.24198688236618127
+}

llmtf_eval_k0_bs8/daru_treewayextractive.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c7d2c5ec7e7ea394ebccddb9480c55a54d9150353711b34b7cb29e76b2c1236
+size 259990342

llmtf_eval_k0_bs8/daru_treewayextractive_params.jsonl ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
+        "generation_config": {
+            "bos_token_id": 128000,
+            "do_sample": true,
+            "eos_token_id": [
+                128001,
+                128009
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 1,
+            "pad_token_id": 128001,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2",
+            "trust_remote_code": [
+                false
+            ]
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "",
+            "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "<|begin_of_text|>",
+            "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+            "add_special_tokens": false,
+            "eos_token": "<|eot_id|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 500,
+        "method": "calculate_logsoftmax"
+    }
+}

llmtf_eval_k0_bs8/daru_treewayextractive_total.jsonl ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "task_name": "daru/treewayextractive",
+    "results": {
+        "r-prec": 0.39738621933621937
+    },
+    "leaderboard_result": 0.39738621933621937
+}

llmtf_eval_k0_bs8/darumeru_MultiQ.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs8/darumeru_MultiQ_params.jsonl ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
+        "generation_config": {
+            "bos_token_id": 128000,
+            "do_sample": true,
+            "eos_token_id": [
+                128001,
+                128009
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 64,
+            "pad_token_id": 128001,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2",
+            "trust_remote_code": [
+                false
+            ]
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "",
+            "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "<|begin_of_text|>",
+            "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+            "add_special_tokens": false,
+            "eos_token": "<|eot_id|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "generate"
+    }
+}

llmtf_eval_k0_bs8/darumeru_MultiQ_total.jsonl ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "task_name": "darumeru/MultiQ",
+    "results": {
+        "f1": 0.3370324579707962,
+        "em": 0.21032504780114722
+    },
+    "leaderboard_result": 0.2736787528859717
+}

llmtf_eval_k0_bs8/darumeru_PARus.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs8/darumeru_PARus_params.jsonl ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
+        "generation_config": {
+            "bos_token_id": 128000,
+            "do_sample": true,
+            "eos_token_id": [
+                128001,
+                128009
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 64,
+            "pad_token_id": 128001,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2",
+            "trust_remote_code": [
+                false
+            ]
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "",
+            "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "<|begin_of_text|>",
+            "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+            "add_special_tokens": false,
+            "eos_token": "<|eot_id|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}

llmtf_eval_k0_bs8/darumeru_PARus_total.jsonl ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "task_name": "darumeru/PARus",
+    "results": {
+        "acc": 0.64
+    },
+    "leaderboard_result": 0.64
+}

llmtf_eval_k0_bs8/darumeru_RCB.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs8/darumeru_RCB_params.jsonl ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
+        "generation_config": {
+            "bos_token_id": 128000,
+            "do_sample": true,
+            "eos_token_id": [
+                128001,
+                128009
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 64,
+            "pad_token_id": 128001,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2",
+            "trust_remote_code": [
+                false
+            ]
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "",
+            "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "<|begin_of_text|>",
+            "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+            "add_special_tokens": false,
+            "eos_token": "<|eot_id|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}

llmtf_eval_k0_bs8/darumeru_RCB_total.jsonl ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "task_name": "darumeru/RCB",
+    "results": {
+        "acc": 0.4863636363636364,
+        "f1_macro": 0.4094575374734713
+    },
+    "leaderboard_result": 0.44791058691855384
+}

llmtf_eval_k0_bs8/darumeru_RWSD.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs8/darumeru_RWSD_params.jsonl ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
+        "generation_config": {
+            "bos_token_id": 128000,
+            "do_sample": true,
+            "eos_token_id": [
+                128001,
+                128009
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 64,
+            "pad_token_id": 128001,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2",
+            "trust_remote_code": [
+                false
+            ]
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "",
+            "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "<|begin_of_text|>",
+            "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+            "add_special_tokens": false,
+            "eos_token": "<|eot_id|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}

llmtf_eval_k0_bs8/darumeru_RWSD_total.jsonl ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "task_name": "darumeru/RWSD",
+    "results": {
+        "acc": 0.5490196078431373
+    },
+    "leaderboard_result": 0.5490196078431373
+}

llmtf_eval_k0_bs8/darumeru_USE.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs8/darumeru_USE_params.jsonl ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
+        "generation_config": {
+            "bos_token_id": 128000,
+            "do_sample": true,
+            "eos_token_id": [
+                128001,
+                128009
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 64,
+            "pad_token_id": 128001,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2",
+            "trust_remote_code": [
+                false
+            ]
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "",
+            "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "<|begin_of_text|>",
+            "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+            "add_special_tokens": false,
+            "eos_token": "<|eot_id|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "generate"
+    }
+}

llmtf_eval_k0_bs8/darumeru_USE_total.jsonl ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "task_name": "darumeru/USE",
+    "results": {
+        "grade_norm": 0.07941176470588233
+    },
+    "leaderboard_result": 0.07941176470588233
+}

llmtf_eval_k0_bs8/darumeru_cp_para_en.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs8/darumeru_cp_para_en_params.jsonl ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
+        "generation_config": {
+            "bos_token_id": 128000,
+            "do_sample": true,
+            "eos_token_id": [
+                128001,
+                128009
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 1024,
+            "pad_token_id": 128001,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2",
+            "trust_remote_code": [
+                false
+            ]
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "",
+            "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "<|begin_of_text|>",
+            "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+            "add_special_tokens": false,
+            "eos_token": "<|eot_id|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "generate"
+    }
+}

llmtf_eval_k0_bs8/darumeru_cp_para_en_total.jsonl ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "task_name": "darumeru/cp_para_en",
+    "results": {
+        "symbol_per_token": 4.463140535341514,
+        "len": 0.9941296296409974,
+        "lcs": 0.955732821155511
+    },
+    "leaderboard_result": 0.955732821155511
+}

llmtf_eval_k0_bs8/darumeru_cp_para_ru.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs8/darumeru_cp_para_ru_params.jsonl ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
+        "generation_config": {
+            "bos_token_id": 128000,
+            "do_sample": true,
+            "eos_token_id": [
+                128001,
+                128009
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 1024,
+            "pad_token_id": 128001,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2",
+            "trust_remote_code": [
+                false
+            ]
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "",
+            "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "<|begin_of_text|>",
+            "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+            "add_special_tokens": false,
+            "eos_token": "<|eot_id|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "generate"
+    }
+}

llmtf_eval_k0_bs8/darumeru_cp_para_ru_total.jsonl ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "task_name": "darumeru/cp_para_ru",
+    "results": {
+        "symbol_per_token": 2.968660662438201,
+        "len": 0.9950114211220992,
+        "lcs": 0.9146147408713498
+    },
+    "leaderboard_result": 0.9146147408713498
+}

llmtf_eval_k0_bs8/darumeru_cp_sent_en.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs8/darumeru_cp_sent_en_params.jsonl ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
+        "generation_config": {
+            "bos_token_id": 128000,
+            "do_sample": true,
+            "eos_token_id": [
+                128001,
+                128009
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 128,
+            "pad_token_id": 128001,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2",
+            "trust_remote_code": [
+                false
+            ]
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "",
+            "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "<|begin_of_text|>",
+            "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+            "add_special_tokens": false,
+            "eos_token": "<|eot_id|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "generate"
+    }
+}

llmtf_eval_k0_bs8/darumeru_cp_sent_en_total.jsonl ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "task_name": "darumeru/cp_sent_en",
+    "results": {
+        "symbol_per_token": 4.424907714143083,
+        "len": 0.9996416196590585,
+        "lcs": 0.995460815828734
+    },
+    "leaderboard_result": 0.9996416196590585
+}

llmtf_eval_k0_bs8/darumeru_cp_sent_ru.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs8/darumeru_cp_sent_ru_params.jsonl ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
+        "generation_config": {
+            "bos_token_id": 128000,
+            "do_sample": true,
+            "eos_token_id": [
+                128001,
+                128009
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 128,
+            "pad_token_id": 128001,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2",
+            "trust_remote_code": [
+                false
+            ]
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "",
+            "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "<|begin_of_text|>",
+            "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+            "add_special_tokens": false,
+            "eos_token": "<|eot_id|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "generate"
+    }
+}

llmtf_eval_k0_bs8/darumeru_cp_sent_ru_total.jsonl ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "task_name": "darumeru/cp_sent_ru",
+    "results": {
+        "symbol_per_token": 2.8294160005417113,
+        "len": 0.993227090420785,
+        "lcs": 0.9520454300336516
+    },
+    "leaderboard_result": 0.993227090420785
+}

llmtf_eval_k0_bs8/darumeru_ruMMLU.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5ee846335e356e3e1586a9b868629e4c1bbe1e33e5520c743444bbd31278928
+size 32909204

llmtf_eval_k0_bs8/darumeru_ruMMLU_params.jsonl ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
+        "generation_config": {
+            "bos_token_id": 128000,
+            "do_sample": true,
+            "eos_token_id": [
+                128001,
+                128009
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 64,
+            "pad_token_id": 128001,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2",
+            "trust_remote_code": [
+                false
+            ]
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "",
+            "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "<|begin_of_text|>",
+            "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+            "add_special_tokens": false,
+            "eos_token": "<|eot_id|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}

llmtf_eval_k0_bs8/darumeru_ruMMLU_total.jsonl ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "task_name": "darumeru/ruMMLU",
+    "results": {
+        "acc": 0.5046393295420533
+    },
+    "leaderboard_result": 0.5046393295420533
+}

llmtf_eval_k0_bs8/darumeru_ruOpenBookQA.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs8/darumeru_ruOpenBookQA_params.jsonl ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
+        "generation_config": {
+            "bos_token_id": 128000,
+            "do_sample": true,
+            "eos_token_id": [
+                128001,
+                128009
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 64,
+            "pad_token_id": 128001,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2",
+            "trust_remote_code": [
+                false
+            ]
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "",
+            "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "<|begin_of_text|>",
+            "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+            "add_special_tokens": false,
+            "eos_token": "<|eot_id|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}

llmtf_eval_k0_bs8/darumeru_ruOpenBookQA_total.jsonl ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "task_name": "darumeru/ruOpenBookQA",
+    "results": {
+        "acc": 0.6907216494845361,
+        "f1_macro": 0.6911297261861948
+    },
+    "leaderboard_result": 0.6909256878353655
+}

llmtf_eval_k0_bs8/darumeru_ruTiE.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:476faf4cedfb6f0fd8a8133db9b7e996269ca1a1430740186acbe267adcba897
+size 12832557

llmtf_eval_k0_bs8/darumeru_ruTiE_params.jsonl ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
+        "generation_config": {
+            "bos_token_id": 128000,
+            "do_sample": true,
+            "eos_token_id": [
+                128001,
+                128009
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 64,
+            "pad_token_id": 128001,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2",
+            "trust_remote_code": [
+                false
+            ]
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "",
+            "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "<|begin_of_text|>",
+            "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+            "add_special_tokens": false,
+            "eos_token": "<|eot_id|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}

llmtf_eval_k0_bs8/darumeru_ruTiE_total.jsonl ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "task_name": "darumeru/ruTiE",
+    "results": {
+        "acc": 0.3511627906976744
+    },
+    "leaderboard_result": 0.3511627906976744
+}

llmtf_eval_k0_bs8/darumeru_ruWorldTree.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

llmtf_eval_k0_bs8/darumeru_ruWorldTree_params.jsonl ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
+        "generation_config": {
+            "bos_token_id": 128000,
+            "do_sample": true,
+            "eos_token_id": [
+                128001,
+                128009
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 64,
+            "pad_token_id": 128001,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2",
+            "trust_remote_code": [
+                false
+            ]
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "",
+            "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "<|begin_of_text|>",
+            "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+            "add_special_tokens": false,
+            "eos_token": "<|eot_id|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}

llmtf_eval_k0_bs8/darumeru_ruWorldTree_total.jsonl ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "task_name": "darumeru/ruWorldTree",
+    "results": {
+        "acc": 0.8476190476190476,
+        "f1_macro": 0.8445201637796824
+    },
+    "leaderboard_result": 0.8460696056993651
+}

llmtf_eval_k0_bs8/evaluation_log.txt ADDED Viewed

	@@ -0,0 +1,273 @@

+INFO: 2024-07-13 14:29:01,210: llmtf.base.evaluator: Starting eval on ['darumeru/multiq', 'darumeru/parus', 'darumeru/rcb', 'darumeru/ruopenbookqa', 'darumeru/rutie', 'darumeru/ruworldtree', 'darumeru/rwsd', 'darumeru/use', 'russiannlp/rucola_custom']
+INFO: 2024-07-13 14:29:01,211: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
+INFO: 2024-07-13 14:29:01,211: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 14:29:01,212: llmtf.base.evaluator: Starting eval on ['darumeru/rummlu']
+INFO: 2024-07-13 14:29:01,212: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
+INFO: 2024-07-13 14:29:01,212: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 14:29:01,379: llmtf.base.evaluator: Starting eval on ['nlpcoreteam/rummlu']
+INFO: 2024-07-13 14:29:01,379: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
+INFO: 2024-07-13 14:29:01,380: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 14:29:01,969: llmtf.base.evaluator: Starting eval on ['nlpcoreteam/enmmlu']
+INFO: 2024-07-13 14:29:01,970: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
+INFO: 2024-07-13 14:29:01,970: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 14:29:04,129: llmtf.base.evaluator: Starting eval on ['daru/treewayabstractive']
+INFO: 2024-07-13 14:29:04,130: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
+INFO: 2024-07-13 14:29:04,130: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 14:29:05,366: llmtf.base.darumeru/MultiQ: Loading Dataset: 4.15s
+INFO: 2024-07-13 14:29:05,855: llmtf.base.evaluator: Starting eval on ['daru/treewayextractive']
+INFO: 2024-07-13 14:29:05,855: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
+INFO: 2024-07-13 14:29:05,855: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 14:29:07,422: llmtf.base.evaluator: Starting eval on ['darumeru/cp_sent_ru', 'darumeru/cp_sent_en', 'darumeru/cp_para_ru', 'darumeru/cp_para_en']
+INFO: 2024-07-13 14:29:07,422: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
+INFO: 2024-07-13 14:29:07,422: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 14:29:08,720: llmtf.base.daru/treewayabstractive: Loading Dataset: 4.59s
+INFO: 2024-07-13 14:29:09,722: llmtf.base.darumeru/ruMMLU: Loading Dataset: 8.51s
+INFO: 2024-07-13 14:29:09,808: llmtf.base.darumeru/cp_sent_ru: Loading Dataset: 2.39s
+INFO: 2024-07-13 14:29:18,031: llmtf.base.daru/treewayextractive: Loading Dataset: 12.17s
+INFO: 2024-07-13 14:31:16,783: llmtf.base.nlpcoreteam/enMMLU: Loading Dataset: 134.81s
+INFO: 2024-07-13 14:31:18,578: llmtf.base.nlpcoreteam/ruMMLU: Loading Dataset: 137.20s
+INFO: 2024-07-13 14:32:42,801: llmtf.base.darumeru/cp_sent_ru: Processing Dataset: 212.99s
+INFO: 2024-07-13 14:32:42,818: llmtf.base.darumeru/cp_sent_ru: Results for darumeru/cp_sent_ru:
+INFO: 2024-07-13 14:32:42,822: llmtf.base.darumeru/cp_sent_ru: {'symbol_per_token': 2.8294160005417113, 'len': 0.993227090420785, 'lcs': 0.9520454300336516}
+INFO: 2024-07-13 14:32:42,824: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
+INFO: 2024-07-13 14:32:42,824: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 14:32:45,506: llmtf.base.darumeru/cp_sent_en: Loading Dataset: 2.68s
+INFO: 2024-07-13 14:35:04,924: llmtf.base.darumeru/ruMMLU: Processing Dataset: 355.20s
+INFO: 2024-07-13 14:35:04,929: llmtf.base.darumeru/ruMMLU: Results for darumeru/ruMMLU:
+INFO: 2024-07-13 14:35:04,937: llmtf.base.darumeru/ruMMLU: {'acc': 0.5046393295420533}
+INFO: 2024-07-13 14:35:04,978: llmtf.base.evaluator: Ended eval
+INFO: 2024-07-13 14:35:04,984: llmtf.base.evaluator:
+mean	darumeru/cp_sent_ru	darumeru/ruMMLU
+0.749	0.993	0.505
+INFO: 2024-07-13 14:35:16,448: llmtf.base.darumeru/MultiQ: Processing Dataset: 371.08s
+INFO: 2024-07-13 14:35:16,452: llmtf.base.darumeru/MultiQ: Results for darumeru/MultiQ:
+INFO: 2024-07-13 14:35:16,456: llmtf.base.darumeru/MultiQ: {'f1': 0.3370324579707962, 'em': 0.21032504780114722}
+INFO: 2024-07-13 14:35:16,460: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
+INFO: 2024-07-13 14:35:16,461: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 14:35:19,048: llmtf.base.darumeru/cp_sent_en: Processing Dataset: 153.54s
+INFO: 2024-07-13 14:35:19,050: llmtf.base.darumeru/cp_sent_en: Results for darumeru/cp_sent_en:
+INFO: 2024-07-13 14:35:19,083: llmtf.base.darumeru/cp_sent_en: {'symbol_per_token': 4.424907714143083, 'len': 0.9996416196590585, 'lcs': 0.995460815828734}
+INFO: 2024-07-13 14:35:19,084: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
+INFO: 2024-07-13 14:35:19,085: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 14:35:19,188: llmtf.base.darumeru/PARus: Loading Dataset: 2.73s
+INFO: 2024-07-13 14:35:20,825: llmtf.base.darumeru/cp_para_ru: Loading Dataset: 1.74s
+INFO: 2024-07-13 14:35:22,119: llmtf.base.darumeru/PARus: Processing Dataset: 2.93s
+INFO: 2024-07-13 14:35:22,121: llmtf.base.darumeru/PARus: Results for darumeru/PARus:
+INFO: 2024-07-13 14:35:22,164: llmtf.base.darumeru/PARus: {'acc': 0.64}
+INFO: 2024-07-13 14:35:22,165: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
+INFO: 2024-07-13 14:35:22,165: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 14:35:24,196: llmtf.base.darumeru/RCB: Loading Dataset: 2.03s
+INFO: 2024-07-13 14:35:29,614: llmtf.base.darumeru/RCB: Processing Dataset: 5.41s
+INFO: 2024-07-13 14:35:29,616: llmtf.base.darumeru/RCB: Results for darumeru/RCB:
+INFO: 2024-07-13 14:35:29,622: llmtf.base.darumeru/RCB: {'acc': 0.4863636363636364, 'f1_macro': 0.4094575374734713}
+INFO: 2024-07-13 14:35:29,624: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
+INFO: 2024-07-13 14:35:29,624: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 14:35:32,722: llmtf.base.darumeru/ruOpenBookQA: Loading Dataset: 3.10s
+INFO: 2024-07-13 14:35:40,173: llmtf.base.nlpcoreteam/enMMLU: Processing Dataset: 263.39s
+INFO: 2024-07-13 14:35:40,174: llmtf.base.nlpcoreteam/enMMLU: Results for nlpcoreteam/enMMLU:
+INFO: 2024-07-13 14:35:40,219: llmtf.base.nlpcoreteam/enMMLU:                                        metric
+subject
+abstract_algebra                     0.340000
+anatomy                              0.718519
+astronomy                            0.730263
+business_ethics                      0.720000
+clinical_knowledge                   0.735849
+college_biology                      0.791667
+college_chemistry                    0.460000
+college_computer_science             0.600000
+college_mathematics                  0.310000
+college_medicine                     0.647399
+college_physics                      0.480392
+computer_security                    0.760000
+conceptual_physics                   0.570213
+econometrics                         0.517544
+electrical_engineering               0.606897
+elementary_mathematics               0.468254
+formal_logic                         0.523810
+global_facts                         0.410000
+high_school_biology                  0.809677
+high_school_chemistry                0.541872
+high_school_computer_science         0.730000
+high_school_european_history         0.733333
+high_school_geography                0.823232
+high_school_government_and_politics  0.865285
+high_school_macroeconomics           0.630769
+high_school_mathematics              0.370370
+high_school_microeconomics           0.752101
+high_school_physics                  0.410596
+high_school_psychology               0.855046
+high_school_statistics               0.532407
+high_school_us_history               0.828431
+high_school_world_history            0.839662
+human_aging                          0.721973
+human_sexuality                      0.778626
+international_law                    0.760331
+jurisprudence                        0.796296
+logical_fallacies                    0.779141
+machine_learning                     0.446429
+management                           0.796117
+marketing                            0.893162
+medical_genetics                     0.780000
+miscellaneous                        0.840358
+moral_disputes                       0.696532
+moral_scenarios                      0.293855
+nutrition                            0.764706
+philosophy                           0.720257
+prehistory                           0.706790
+professional_accounting              0.542553
+professional_law                     0.481747
+professional_medicine                0.731618
+professional_psychology              0.674837
+public_relations                     0.663636
+security_studies                     0.714286
+sociology                            0.825871
+us_foreign_policy                    0.890000
+virology                             0.487952
+world_religions                      0.824561
+INFO: 2024-07-13 14:35:40,227: llmtf.base.nlpcoreteam/enMMLU:                                    metric
+subject
+STEM                             0.553280
+humanities                       0.691134
+other (business, health, misc.)  0.699300
+social sciences                  0.749269
+INFO: 2024-07-13 14:35:40,234: llmtf.base.nlpcoreteam/enMMLU: {'acc': 0.6732459770237078}
+INFO: 2024-07-13 14:35:40,267: llmtf.base.evaluator: Ended eval
+INFO: 2024-07-13 14:35:40,273: llmtf.base.evaluator:
+mean	darumeru/MultiQ	darumeru/PARus	darumeru/RCB	darumeru/cp_sent_en	darumeru/cp_sent_ru	darumeru/ruMMLU	nlpcoreteam/enMMLU
+0.647	0.274	0.640	0.448	1.000	0.993	0.505	0.673
+INFO: 2024-07-13 14:35:54,003: llmtf.base.daru/treewayextractive: Processing Dataset: 395.96s
+INFO: 2024-07-13 14:35:54,004: llmtf.base.daru/treewayextractive: Results for daru/treewayextractive:
+INFO: 2024-07-13 14:35:54,481: llmtf.base.daru/treewayextractive: {'r-prec': 0.39738621933621937}
+INFO: 2024-07-13 14:35:54,526: llmtf.base.evaluator: Ended eval
+INFO: 2024-07-13 14:35:54,533: llmtf.base.evaluator:
+mean	daru/treewayextractive	darumeru/MultiQ	darumeru/PARus	darumeru/RCB	darumeru/cp_sent_en	darumeru/cp_sent_ru	darumeru/ruMMLU	nlpcoreteam/enMMLU
+0.616	0.397	0.274	0.640	0.448	1.000	0.993	0.505	0.673
+INFO: 2024-07-13 14:36:08,587: llmtf.base.darumeru/ruOpenBookQA: Processing Dataset: 35.86s
+INFO: 2024-07-13 14:36:08,588: llmtf.base.darumeru/ruOpenBookQA: Results for darumeru/ruOpenBookQA:
+INFO: 2024-07-13 14:36:08,601: llmtf.base.darumeru/ruOpenBookQA: {'acc': 0.6907216494845361, 'f1_macro': 0.6911297261861948}
+INFO: 2024-07-13 14:36:08,608: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
+INFO: 2024-07-13 14:36:08,608: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 14:36:16,304: llmtf.base.darumeru/ruTiE: Loading Dataset: 7.69s
+INFO: 2024-07-13 14:37:20,843: llmtf.base.nlpcoreteam/ruMMLU: Processing Dataset: 362.26s
+INFO: 2024-07-13 14:37:20,846: llmtf.base.nlpcoreteam/ruMMLU: Results for nlpcoreteam/ruMMLU:
+INFO: 2024-07-13 14:37:20,893: llmtf.base.nlpcoreteam/ruMMLU:                                        metric
+subject
+abstract_algebra                     0.300000
+anatomy                              0.459259
+astronomy                            0.651316
+business_ethics                      0.600000
+clinical_knowledge                   0.566038
+college_biology                      0.541667
+college_chemistry                    0.400000
+college_computer_science             0.460000
+college_mathematics                  0.320000
+college_medicine                     0.502890
+college_physics                      0.352941
+computer_security                    0.570000
+conceptual_physics                   0.485106
+econometrics                         0.350877
+electrical_engineering               0.551724
+elementary_mathematics               0.410053
+formal_logic                         0.380952
+global_facts                         0.350000
+high_school_biology                  0.638710
+high_school_chemistry                0.423645
+high_school_computer_science         0.610000
+high_school_european_history         0.715152
+high_school_geography                0.661616
+high_school_government_and_politics  0.595855
+high_school_macroeconomics           0.510256
+high_school_mathematics              0.337037
+high_school_microeconomics           0.495798
+high_school_physics                  0.344371
+high_school_psychology               0.669725
+high_school_statistics               0.467593
+high_school_us_history               0.651961
+high_school_world_history            0.713080
+human_aging                          0.551570
+human_sexuality                      0.656489
+international_law                    0.710744
+jurisprudence                        0.592593
+logical_fallacies                    0.527607
+machine_learning                     0.357143
+management                           0.669903
+marketing                            0.705128
+medical_genetics                     0.560000
+miscellaneous                        0.646232
+moral_disputes                       0.560694
+moral_scenarios                      0.249162
+nutrition                            0.598039
+philosophy                           0.565916
+prehistory                           0.558642
+professional_accounting              0.386525
+professional_law                     0.359192
+professional_medicine                0.518382
+professional_psychology              0.485294
+public_relations                     0.572727
+security_studies                     0.620408
+sociology                            0.701493
+us_foreign_policy                    0.750000
+virology                             0.415663
+world_religions                      0.695906
+INFO: 2024-07-13 14:37:20,902: llmtf.base.nlpcoreteam/ruMMLU:                                    metric
+subject
+STEM                             0.456739
+humanities                       0.560123
+other (business, health, misc.)  0.537831
+social sciences                  0.589212
+INFO: 2024-07-13 14:37:20,909: llmtf.base.nlpcoreteam/ruMMLU: {'acc': 0.5359761297506582}
+INFO: 2024-07-13 14:37:20,942: llmtf.base.evaluator: Ended eval
+INFO: 2024-07-13 14:37:21,003: llmtf.base.evaluator:
+mean	daru/treewayextractive	darumeru/MultiQ	darumeru/PARus	darumeru/RCB	darumeru/cp_sent_en	darumeru/cp_sent_ru	darumeru/ruMMLU	darumeru/ruOpenBookQA	nlpcoreteam/enMMLU	nlpcoreteam/ruMMLU
+0.616	0.397	0.274	0.640	0.448	1.000	0.993	0.505	0.691	0.673	0.536
+INFO: 2024-07-13 14:38:13,255: llmtf.base.daru/treewayabstractive: Processing Dataset: 544.53s
+INFO: 2024-07-13 14:38:13,256: llmtf.base.daru/treewayabstractive: Results for daru/treewayabstractive:
+INFO: 2024-07-13 14:38:13,260: llmtf.base.daru/treewayabstractive: {'rouge1': 0.35574041658645894, 'rouge2': 0.1282333481459036}
+INFO: 2024-07-13 14:38:13,262: llmtf.base.evaluator: Ended eval
+INFO: 2024-07-13 14:38:13,270: llmtf.base.evaluator:
+mean	daru/treewayabstractive	daru/treewayextractive	darumeru/MultiQ	darumeru/PARus	darumeru/RCB	darumeru/cp_sent_en	darumeru/cp_sent_ru	darumeru/ruMMLU	darumeru/ruOpenBookQA	nlpcoreteam/enMMLU	nlpcoreteam/ruMMLU
+0.582	0.242	0.397	0.274	0.640	0.448	1.000	0.993	0.505	0.691	0.673	0.536
+INFO: 2024-07-13 14:40:26,872: llmtf.base.darumeru/cp_para_ru: Processing Dataset: 306.04s
+INFO: 2024-07-13 14:40:26,875: llmtf.base.darumeru/cp_para_ru: Results for darumeru/cp_para_ru:
+INFO: 2024-07-13 14:40:26,895: llmtf.base.darumeru/cp_para_ru: {'symbol_per_token': 2.968660662438201, 'len': 0.9950114211220992, 'lcs': 0.9146147408713498}
+INFO: 2024-07-13 14:40:26,896: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
+INFO: 2024-07-13 14:40:26,896: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 14:40:28,747: llmtf.base.darumeru/cp_para_en: Loading Dataset: 1.85s
+INFO: 2024-07-13 14:40:42,169: llmtf.base.darumeru/ruTiE: Processing Dataset: 265.86s
+INFO: 2024-07-13 14:40:42,170: llmtf.base.darumeru/ruTiE: Results for darumeru/ruTiE:
+INFO: 2024-07-13 14:40:42,198: llmtf.base.darumeru/ruTiE: {'acc': 0.3511627906976744}
+INFO: 2024-07-13 14:40:42,201: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
+INFO: 2024-07-13 14:40:42,202: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 14:40:44,145: llmtf.base.darumeru/ruWorldTree: Loading Dataset: 1.94s
+INFO: 2024-07-13 14:40:46,061: llmtf.base.darumeru/ruWorldTree: Processing Dataset: 1.92s
+INFO: 2024-07-13 14:40:46,063: llmtf.base.darumeru/ruWorldTree: Results for darumeru/ruWorldTree:
+INFO: 2024-07-13 14:40:46,081: llmtf.base.darumeru/ruWorldTree: {'acc': 0.8476190476190476, 'f1_macro': 0.8445201637796824}
+INFO: 2024-07-13 14:40:46,082: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
+INFO: 2024-07-13 14:40:46,082: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 14:40:48,101: llmtf.base.darumeru/RWSD: Loading Dataset: 2.02s
+INFO: 2024-07-13 14:40:53,690: llmtf.base.darumeru/RWSD: Processing Dataset: 5.59s
+INFO: 2024-07-13 14:40:53,692: llmtf.base.darumeru/RWSD: Results for darumeru/RWSD:
+INFO: 2024-07-13 14:40:53,696: llmtf.base.darumeru/RWSD: {'acc': 0.5490196078431373}
+INFO: 2024-07-13 14:40:53,697: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
+INFO: 2024-07-13 14:40:53,697: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 14:40:56,414: llmtf.base.darumeru/USE: Loading Dataset: 2.72s
+INFO: 2024-07-13 14:44:03,848: llmtf.base.darumeru/cp_para_en: Processing Dataset: 215.10s
+INFO: 2024-07-13 14:44:03,851: llmtf.base.darumeru/cp_para_en: Results for darumeru/cp_para_en:
+INFO: 2024-07-13 14:44:03,854: llmtf.base.darumeru/cp_para_en: {'symbol_per_token': 4.463140535341514, 'len': 0.9941296296409974, 'lcs': 0.955732821155511}
+INFO: 2024-07-13 14:44:03,855: llmtf.base.evaluator: Ended eval
+INFO: 2024-07-13 14:44:03,884: llmtf.base.evaluator:
+mean	daru/treewayabstractive	daru/treewayextractive	darumeru/MultiQ	darumeru/PARus	darumeru/RCB	darumeru/RWSD	darumeru/cp_para_en	darumeru/cp_para_ru	darumeru/cp_sent_en	darumeru/cp_sent_ru	darumeru/ruMMLU	darumeru/ruOpenBookQA	darumeru/ruTiE	darumeru/ruWorldTree	nlpcoreteam/enMMLU	nlpcoreteam/ruMMLU
+0.626	0.242	0.397	0.274	0.640	0.448	0.549	0.956	0.915	1.000	0.993	0.505	0.691	0.351	0.846	0.673	0.536
+INFO: 2024-07-13 14:45:47,572: llmtf.base.darumeru/USE: Processing Dataset: 291.16s
+INFO: 2024-07-13 14:45:47,575: llmtf.base.darumeru/USE: Results for darumeru/USE:
+INFO: 2024-07-13 14:45:47,607: llmtf.base.darumeru/USE: {'grade_norm': 0.07941176470588233}
+INFO: 2024-07-13 14:45:47,610: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
+INFO: 2024-07-13 14:45:47,611: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
+INFO: 2024-07-13 14:45:52,951: llmtf.base.russiannlp/rucola_custom: Loading Dataset: 5.34s
+INFO: 2024-07-13 14:46:34,251: llmtf.base.russiannlp/rucola_custom: Processing Dataset: 41.30s
+INFO: 2024-07-13 14:46:34,255: llmtf.base.russiannlp/rucola_custom: Results for russiannlp/rucola_custom:
+INFO: 2024-07-13 14:46:34,267: llmtf.base.russiannlp/rucola_custom: {'acc': 0.7061356297093649, 'mcc': 0.2603067425656207}
+INFO: 2024-07-13 14:46:34,271: llmtf.base.evaluator: Ended eval
+INFO: 2024-07-13 14:46:34,283: llmtf.base.evaluator:
+mean	daru/treewayabstractive	daru/treewayextractive	darumeru/MultiQ	darumeru/PARus	darumeru/RCB	darumeru/RWSD	darumeru/USE	darumeru/cp_para_en	darumeru/cp_para_ru	darumeru/cp_sent_en	darumeru/cp_sent_ru	darumeru/ruMMLU	darumeru/ruOpenBookQA	darumeru/ruTiE	darumeru/ruWorldTree	nlpcoreteam/enMMLU	nlpcoreteam/ruMMLU	russiannlp/rucola_custom
+0.588	0.242	0.397	0.274	0.640	0.448	0.549	0.079	0.956	0.915	1.000	0.993	0.505	0.691	0.351	0.846	0.673	0.536	0.483

llmtf_eval_k0_bs8/evaluation_results.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
2	+ 0.588 0.242 0.397 0.274 0.640 0.448 0.549 0.079 0.956 0.915 1.000 0.993 0.505 0.691 0.351 0.846 0.673 0.536 0.483

llmtf_eval_k0_bs8/nlpcoreteam_enMMLU.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c874bfff402dc1e0898d2710e2741d51c679a7c9587622d1f591a21ee5f12abb
+size 38085341

llmtf_eval_k0_bs8/nlpcoreteam_enMMLU_params.jsonl ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+    "custom_generation_config": null,
+    "model_params": {
+        "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
+        "generation_config": {
+            "bos_token_id": 128000,
+            "do_sample": true,
+            "eos_token_id": [
+                128001,
+                128009
+            ],
+            "max_length": 8192,
+            "max_new_tokens": 64,
+            "pad_token_id": 128001,
+            "stop_strings": [],
+            "temperature": 0.1,
+            "top_k": 40,
+            "top_p": 0.9,
+            "transformers_version": "4.38.2",
+            "trust_remote_code": [
+                false
+            ]
+        },
+        "conversation_template": {
+            "system_prompt": "",
+            "system_message_template": "",
+            "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
+            "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
+            "user_role": "user",
+            "bot_role": "assistant",
+            "system_role": "system",
+            "global_prefix": "<|begin_of_text|>",
+            "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
+            "add_special_tokens": false,
+            "eos_token": "<|eot_id|>"
+        },
+        "load_in_8bit": false,
+        "torch_dtype": "auto",
+        "use_flash_attention_2": true,
+        "device_map": "cuda:0",
+        "use_fast_tokenizer": true,
+        "leading_space": false,
+        "space_token": null,
+        "trust_remote_code": [
+            false
+        ],
+        "max_model_len": 8192
+    },
+    "task_params": {
+        "max_len": 4000,
+        "few_shot_count": 0,
+        "batch_size": 8,
+        "max_sample_per_dataset": 10000000000000,
+        "method": "calculate_tokens_proba"
+    }
+}