RefalMachine commited on
Commit
4fec6b2
·
verified ·
1 Parent(s): 0ce93cf

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +13 -0
  2. llmtf_eval_k0_bs8/daru_treewayabstractive.jsonl +0 -0
  3. llmtf_eval_k0_bs8/daru_treewayabstractive_params.jsonl +57 -0
  4. llmtf_eval_k0_bs8/daru_treewayabstractive_total.jsonl +8 -0
  5. llmtf_eval_k0_bs8/daru_treewayextractive.jsonl +3 -0
  6. llmtf_eval_k0_bs8/daru_treewayextractive_params.jsonl +57 -0
  7. llmtf_eval_k0_bs8/daru_treewayextractive_total.jsonl +7 -0
  8. llmtf_eval_k0_bs8/darumeru_MultiQ.jsonl +0 -0
  9. llmtf_eval_k0_bs8/darumeru_MultiQ_params.jsonl +57 -0
  10. llmtf_eval_k0_bs8/darumeru_MultiQ_total.jsonl +8 -0
  11. llmtf_eval_k0_bs8/darumeru_PARus.jsonl +0 -0
  12. llmtf_eval_k0_bs8/darumeru_PARus_params.jsonl +57 -0
  13. llmtf_eval_k0_bs8/darumeru_PARus_total.jsonl +7 -0
  14. llmtf_eval_k0_bs8/darumeru_RCB.jsonl +0 -0
  15. llmtf_eval_k0_bs8/darumeru_RCB_params.jsonl +57 -0
  16. llmtf_eval_k0_bs8/darumeru_RCB_total.jsonl +8 -0
  17. llmtf_eval_k0_bs8/darumeru_RWSD.jsonl +0 -0
  18. llmtf_eval_k0_bs8/darumeru_RWSD_params.jsonl +57 -0
  19. llmtf_eval_k0_bs8/darumeru_RWSD_total.jsonl +7 -0
  20. llmtf_eval_k0_bs8/darumeru_USE.jsonl +0 -0
  21. llmtf_eval_k0_bs8/darumeru_USE_params.jsonl +57 -0
  22. llmtf_eval_k0_bs8/darumeru_USE_total.jsonl +7 -0
  23. llmtf_eval_k0_bs8/darumeru_cp_para_en.jsonl +0 -0
  24. llmtf_eval_k0_bs8/darumeru_cp_para_en_params.jsonl +57 -0
  25. llmtf_eval_k0_bs8/darumeru_cp_para_en_total.jsonl +9 -0
  26. llmtf_eval_k0_bs8/darumeru_cp_para_ru.jsonl +0 -0
  27. llmtf_eval_k0_bs8/darumeru_cp_para_ru_params.jsonl +57 -0
  28. llmtf_eval_k0_bs8/darumeru_cp_para_ru_total.jsonl +9 -0
  29. llmtf_eval_k0_bs8/darumeru_cp_sent_en.jsonl +0 -0
  30. llmtf_eval_k0_bs8/darumeru_cp_sent_en_params.jsonl +57 -0
  31. llmtf_eval_k0_bs8/darumeru_cp_sent_en_total.jsonl +9 -0
  32. llmtf_eval_k0_bs8/darumeru_cp_sent_ru.jsonl +0 -0
  33. llmtf_eval_k0_bs8/darumeru_cp_sent_ru_params.jsonl +57 -0
  34. llmtf_eval_k0_bs8/darumeru_cp_sent_ru_total.jsonl +9 -0
  35. llmtf_eval_k0_bs8/darumeru_ruMMLU.jsonl +3 -0
  36. llmtf_eval_k0_bs8/darumeru_ruMMLU_params.jsonl +57 -0
  37. llmtf_eval_k0_bs8/darumeru_ruMMLU_total.jsonl +7 -0
  38. llmtf_eval_k0_bs8/darumeru_ruOpenBookQA.jsonl +0 -0
  39. llmtf_eval_k0_bs8/darumeru_ruOpenBookQA_params.jsonl +57 -0
  40. llmtf_eval_k0_bs8/darumeru_ruOpenBookQA_total.jsonl +8 -0
  41. llmtf_eval_k0_bs8/darumeru_ruTiE.jsonl +3 -0
  42. llmtf_eval_k0_bs8/darumeru_ruTiE_params.jsonl +57 -0
  43. llmtf_eval_k0_bs8/darumeru_ruTiE_total.jsonl +7 -0
  44. llmtf_eval_k0_bs8/darumeru_ruWorldTree.jsonl +0 -0
  45. llmtf_eval_k0_bs8/darumeru_ruWorldTree_params.jsonl +57 -0
  46. llmtf_eval_k0_bs8/darumeru_ruWorldTree_total.jsonl +8 -0
  47. llmtf_eval_k0_bs8/evaluation_log.txt +273 -0
  48. llmtf_eval_k0_bs8/evaluation_results.txt +2 -0
  49. llmtf_eval_k0_bs8/nlpcoreteam_enMMLU.jsonl +3 -0
  50. llmtf_eval_k0_bs8/nlpcoreteam_enMMLU_params.jsonl +57 -0
.gitattributes CHANGED
@@ -71,3 +71,16 @@ llmtf_eval_k5_bs4/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
71
  llmtf_eval_k5_bs4/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
72
  llmtf_eval_k5_bs4/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
73
  llmtf_eval_k5_bs4/russiannlp_rucola_custom.jsonl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  llmtf_eval_k5_bs4/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
72
  llmtf_eval_k5_bs4/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
73
  llmtf_eval_k5_bs4/russiannlp_rucola_custom.jsonl filter=lfs diff=lfs merge=lfs -text
74
+ llmtf_eval_k0_bs8/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
75
+ llmtf_eval_k0_bs8/darumeru_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
76
+ llmtf_eval_k0_bs8/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
77
+ llmtf_eval_k0_bs8/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
78
+ llmtf_eval_k0_bs8/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
79
+ llmtf_eval_k5_bs8/daru_treewayabstractive.jsonl filter=lfs diff=lfs merge=lfs -text
80
+ llmtf_eval_k5_bs8/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
81
+ llmtf_eval_k5_bs8/darumeru_MultiQ.jsonl filter=lfs diff=lfs merge=lfs -text
82
+ llmtf_eval_k5_bs8/darumeru_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
83
+ llmtf_eval_k5_bs8/darumeru_ruOpenBookQA.jsonl filter=lfs diff=lfs merge=lfs -text
84
+ llmtf_eval_k5_bs8/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
85
+ llmtf_eval_k5_bs8/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
86
+ llmtf_eval_k5_bs8/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
llmtf_eval_k0_bs8/daru_treewayabstractive.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs8/daru_treewayabstractive_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 512,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 8,
54
+ "max_sample_per_dataset": 500,
55
+ "method": "generate"
56
+ }
57
+ }
llmtf_eval_k0_bs8/daru_treewayabstractive_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "daru/treewayabstractive",
3
+ "results": {
4
+ "rouge1": 0.35574041658645894,
5
+ "rouge2": 0.1282333481459036
6
+ },
7
+ "leaderboard_result": 0.24198688236618127
8
+ }
llmtf_eval_k0_bs8/daru_treewayextractive.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c7d2c5ec7e7ea394ebccddb9480c55a54d9150353711b34b7cb29e76b2c1236
3
+ size 259990342
llmtf_eval_k0_bs8/daru_treewayextractive_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 1,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 8,
54
+ "max_sample_per_dataset": 500,
55
+ "method": "calculate_logsoftmax"
56
+ }
57
+ }
llmtf_eval_k0_bs8/daru_treewayextractive_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "daru/treewayextractive",
3
+ "results": {
4
+ "r-prec": 0.39738621933621937
5
+ },
6
+ "leaderboard_result": 0.39738621933621937
7
+ }
llmtf_eval_k0_bs8/darumeru_MultiQ.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs8/darumeru_MultiQ_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 8,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "generate"
56
+ }
57
+ }
llmtf_eval_k0_bs8/darumeru_MultiQ_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/MultiQ",
3
+ "results": {
4
+ "f1": 0.3370324579707962,
5
+ "em": 0.21032504780114722
6
+ },
7
+ "leaderboard_result": 0.2736787528859717
8
+ }
llmtf_eval_k0_bs8/darumeru_PARus.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs8/darumeru_PARus_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 8,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "calculate_tokens_proba"
56
+ }
57
+ }
llmtf_eval_k0_bs8/darumeru_PARus_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/PARus",
3
+ "results": {
4
+ "acc": 0.64
5
+ },
6
+ "leaderboard_result": 0.64
7
+ }
llmtf_eval_k0_bs8/darumeru_RCB.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs8/darumeru_RCB_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 8,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "calculate_tokens_proba"
56
+ }
57
+ }
llmtf_eval_k0_bs8/darumeru_RCB_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/RCB",
3
+ "results": {
4
+ "acc": 0.4863636363636364,
5
+ "f1_macro": 0.4094575374734713
6
+ },
7
+ "leaderboard_result": 0.44791058691855384
8
+ }
llmtf_eval_k0_bs8/darumeru_RWSD.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs8/darumeru_RWSD_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 8,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "calculate_tokens_proba"
56
+ }
57
+ }
llmtf_eval_k0_bs8/darumeru_RWSD_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/RWSD",
3
+ "results": {
4
+ "acc": 0.5490196078431373
5
+ },
6
+ "leaderboard_result": 0.5490196078431373
7
+ }
llmtf_eval_k0_bs8/darumeru_USE.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs8/darumeru_USE_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 8,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "generate"
56
+ }
57
+ }
llmtf_eval_k0_bs8/darumeru_USE_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/USE",
3
+ "results": {
4
+ "grade_norm": 0.07941176470588233
5
+ },
6
+ "leaderboard_result": 0.07941176470588233
7
+ }
llmtf_eval_k0_bs8/darumeru_cp_para_en.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs8/darumeru_cp_para_en_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 1024,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 8,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "generate"
56
+ }
57
+ }
llmtf_eval_k0_bs8/darumeru_cp_para_en_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_para_en",
3
+ "results": {
4
+ "symbol_per_token": 4.463140535341514,
5
+ "len": 0.9941296296409974,
6
+ "lcs": 0.955732821155511
7
+ },
8
+ "leaderboard_result": 0.955732821155511
9
+ }
llmtf_eval_k0_bs8/darumeru_cp_para_ru.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs8/darumeru_cp_para_ru_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 1024,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 8,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "generate"
56
+ }
57
+ }
llmtf_eval_k0_bs8/darumeru_cp_para_ru_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_para_ru",
3
+ "results": {
4
+ "symbol_per_token": 2.968660662438201,
5
+ "len": 0.9950114211220992,
6
+ "lcs": 0.9146147408713498
7
+ },
8
+ "leaderboard_result": 0.9146147408713498
9
+ }
llmtf_eval_k0_bs8/darumeru_cp_sent_en.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs8/darumeru_cp_sent_en_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 128,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 8,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "generate"
56
+ }
57
+ }
llmtf_eval_k0_bs8/darumeru_cp_sent_en_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_sent_en",
3
+ "results": {
4
+ "symbol_per_token": 4.424907714143083,
5
+ "len": 0.9996416196590585,
6
+ "lcs": 0.995460815828734
7
+ },
8
+ "leaderboard_result": 0.9996416196590585
9
+ }
llmtf_eval_k0_bs8/darumeru_cp_sent_ru.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs8/darumeru_cp_sent_ru_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 128,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 8,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "generate"
56
+ }
57
+ }
llmtf_eval_k0_bs8/darumeru_cp_sent_ru_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_sent_ru",
3
+ "results": {
4
+ "symbol_per_token": 2.8294160005417113,
5
+ "len": 0.993227090420785,
6
+ "lcs": 0.9520454300336516
7
+ },
8
+ "leaderboard_result": 0.993227090420785
9
+ }
llmtf_eval_k0_bs8/darumeru_ruMMLU.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5ee846335e356e3e1586a9b868629e4c1bbe1e33e5520c743444bbd31278928
3
+ size 32909204
llmtf_eval_k0_bs8/darumeru_ruMMLU_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 8,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "calculate_tokens_proba"
56
+ }
57
+ }
llmtf_eval_k0_bs8/darumeru_ruMMLU_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/ruMMLU",
3
+ "results": {
4
+ "acc": 0.5046393295420533
5
+ },
6
+ "leaderboard_result": 0.5046393295420533
7
+ }
llmtf_eval_k0_bs8/darumeru_ruOpenBookQA.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs8/darumeru_ruOpenBookQA_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 8,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "calculate_tokens_proba"
56
+ }
57
+ }
llmtf_eval_k0_bs8/darumeru_ruOpenBookQA_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/ruOpenBookQA",
3
+ "results": {
4
+ "acc": 0.6907216494845361,
5
+ "f1_macro": 0.6911297261861948
6
+ },
7
+ "leaderboard_result": 0.6909256878353655
8
+ }
llmtf_eval_k0_bs8/darumeru_ruTiE.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:476faf4cedfb6f0fd8a8133db9b7e996269ca1a1430740186acbe267adcba897
3
+ size 12832557
llmtf_eval_k0_bs8/darumeru_ruTiE_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 8,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "calculate_tokens_proba"
56
+ }
57
+ }
llmtf_eval_k0_bs8/darumeru_ruTiE_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/ruTiE",
3
+ "results": {
4
+ "acc": 0.3511627906976744
5
+ },
6
+ "leaderboard_result": 0.3511627906976744
7
+ }
llmtf_eval_k0_bs8/darumeru_ruWorldTree.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs8/darumeru_ruWorldTree_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 8,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "calculate_tokens_proba"
56
+ }
57
+ }
llmtf_eval_k0_bs8/darumeru_ruWorldTree_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/ruWorldTree",
3
+ "results": {
4
+ "acc": 0.8476190476190476,
5
+ "f1_macro": 0.8445201637796824
6
+ },
7
+ "leaderboard_result": 0.8460696056993651
8
+ }
llmtf_eval_k0_bs8/evaluation_log.txt ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ INFO: 2024-07-13 14:29:01,210: llmtf.base.evaluator: Starting eval on ['darumeru/multiq', 'darumeru/parus', 'darumeru/rcb', 'darumeru/ruopenbookqa', 'darumeru/rutie', 'darumeru/ruworldtree', 'darumeru/rwsd', 'darumeru/use', 'russiannlp/rucola_custom']
2
+ INFO: 2024-07-13 14:29:01,211: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
3
+ INFO: 2024-07-13 14:29:01,211: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
4
+ INFO: 2024-07-13 14:29:01,212: llmtf.base.evaluator: Starting eval on ['darumeru/rummlu']
5
+ INFO: 2024-07-13 14:29:01,212: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
6
+ INFO: 2024-07-13 14:29:01,212: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
7
+ INFO: 2024-07-13 14:29:01,379: llmtf.base.evaluator: Starting eval on ['nlpcoreteam/rummlu']
8
+ INFO: 2024-07-13 14:29:01,379: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
9
+ INFO: 2024-07-13 14:29:01,380: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
10
+ INFO: 2024-07-13 14:29:01,969: llmtf.base.evaluator: Starting eval on ['nlpcoreteam/enmmlu']
11
+ INFO: 2024-07-13 14:29:01,970: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
12
+ INFO: 2024-07-13 14:29:01,970: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
13
+ INFO: 2024-07-13 14:29:04,129: llmtf.base.evaluator: Starting eval on ['daru/treewayabstractive']
14
+ INFO: 2024-07-13 14:29:04,130: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
15
+ INFO: 2024-07-13 14:29:04,130: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
16
+ INFO: 2024-07-13 14:29:05,366: llmtf.base.darumeru/MultiQ: Loading Dataset: 4.15s
17
+ INFO: 2024-07-13 14:29:05,855: llmtf.base.evaluator: Starting eval on ['daru/treewayextractive']
18
+ INFO: 2024-07-13 14:29:05,855: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
19
+ INFO: 2024-07-13 14:29:05,855: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
20
+ INFO: 2024-07-13 14:29:07,422: llmtf.base.evaluator: Starting eval on ['darumeru/cp_sent_ru', 'darumeru/cp_sent_en', 'darumeru/cp_para_ru', 'darumeru/cp_para_en']
21
+ INFO: 2024-07-13 14:29:07,422: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
22
+ INFO: 2024-07-13 14:29:07,422: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
23
+ INFO: 2024-07-13 14:29:08,720: llmtf.base.daru/treewayabstractive: Loading Dataset: 4.59s
24
+ INFO: 2024-07-13 14:29:09,722: llmtf.base.darumeru/ruMMLU: Loading Dataset: 8.51s
25
+ INFO: 2024-07-13 14:29:09,808: llmtf.base.darumeru/cp_sent_ru: Loading Dataset: 2.39s
26
+ INFO: 2024-07-13 14:29:18,031: llmtf.base.daru/treewayextractive: Loading Dataset: 12.17s
27
+ INFO: 2024-07-13 14:31:16,783: llmtf.base.nlpcoreteam/enMMLU: Loading Dataset: 134.81s
28
+ INFO: 2024-07-13 14:31:18,578: llmtf.base.nlpcoreteam/ruMMLU: Loading Dataset: 137.20s
29
+ INFO: 2024-07-13 14:32:42,801: llmtf.base.darumeru/cp_sent_ru: Processing Dataset: 212.99s
30
+ INFO: 2024-07-13 14:32:42,818: llmtf.base.darumeru/cp_sent_ru: Results for darumeru/cp_sent_ru:
31
+ INFO: 2024-07-13 14:32:42,822: llmtf.base.darumeru/cp_sent_ru: {'symbol_per_token': 2.8294160005417113, 'len': 0.993227090420785, 'lcs': 0.9520454300336516}
32
+ INFO: 2024-07-13 14:32:42,824: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
33
+ INFO: 2024-07-13 14:32:42,824: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
34
+ INFO: 2024-07-13 14:32:45,506: llmtf.base.darumeru/cp_sent_en: Loading Dataset: 2.68s
35
+ INFO: 2024-07-13 14:35:04,924: llmtf.base.darumeru/ruMMLU: Processing Dataset: 355.20s
36
+ INFO: 2024-07-13 14:35:04,929: llmtf.base.darumeru/ruMMLU: Results for darumeru/ruMMLU:
37
+ INFO: 2024-07-13 14:35:04,937: llmtf.base.darumeru/ruMMLU: {'acc': 0.5046393295420533}
38
+ INFO: 2024-07-13 14:35:04,978: llmtf.base.evaluator: Ended eval
39
+ INFO: 2024-07-13 14:35:04,984: llmtf.base.evaluator:
40
+ mean darumeru/cp_sent_ru darumeru/ruMMLU
41
+ 0.749 0.993 0.505
42
+ INFO: 2024-07-13 14:35:16,448: llmtf.base.darumeru/MultiQ: Processing Dataset: 371.08s
43
+ INFO: 2024-07-13 14:35:16,452: llmtf.base.darumeru/MultiQ: Results for darumeru/MultiQ:
44
+ INFO: 2024-07-13 14:35:16,456: llmtf.base.darumeru/MultiQ: {'f1': 0.3370324579707962, 'em': 0.21032504780114722}
45
+ INFO: 2024-07-13 14:35:16,460: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
46
+ INFO: 2024-07-13 14:35:16,461: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
47
+ INFO: 2024-07-13 14:35:19,048: llmtf.base.darumeru/cp_sent_en: Processing Dataset: 153.54s
48
+ INFO: 2024-07-13 14:35:19,050: llmtf.base.darumeru/cp_sent_en: Results for darumeru/cp_sent_en:
49
+ INFO: 2024-07-13 14:35:19,083: llmtf.base.darumeru/cp_sent_en: {'symbol_per_token': 4.424907714143083, 'len': 0.9996416196590585, 'lcs': 0.995460815828734}
50
+ INFO: 2024-07-13 14:35:19,084: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
51
+ INFO: 2024-07-13 14:35:19,085: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
52
+ INFO: 2024-07-13 14:35:19,188: llmtf.base.darumeru/PARus: Loading Dataset: 2.73s
53
+ INFO: 2024-07-13 14:35:20,825: llmtf.base.darumeru/cp_para_ru: Loading Dataset: 1.74s
54
+ INFO: 2024-07-13 14:35:22,119: llmtf.base.darumeru/PARus: Processing Dataset: 2.93s
55
+ INFO: 2024-07-13 14:35:22,121: llmtf.base.darumeru/PARus: Results for darumeru/PARus:
56
+ INFO: 2024-07-13 14:35:22,164: llmtf.base.darumeru/PARus: {'acc': 0.64}
57
+ INFO: 2024-07-13 14:35:22,165: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
58
+ INFO: 2024-07-13 14:35:22,165: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
59
+ INFO: 2024-07-13 14:35:24,196: llmtf.base.darumeru/RCB: Loading Dataset: 2.03s
60
+ INFO: 2024-07-13 14:35:29,614: llmtf.base.darumeru/RCB: Processing Dataset: 5.41s
61
+ INFO: 2024-07-13 14:35:29,616: llmtf.base.darumeru/RCB: Results for darumeru/RCB:
62
+ INFO: 2024-07-13 14:35:29,622: llmtf.base.darumeru/RCB: {'acc': 0.4863636363636364, 'f1_macro': 0.4094575374734713}
63
+ INFO: 2024-07-13 14:35:29,624: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
64
+ INFO: 2024-07-13 14:35:29,624: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
65
+ INFO: 2024-07-13 14:35:32,722: llmtf.base.darumeru/ruOpenBookQA: Loading Dataset: 3.10s
66
+ INFO: 2024-07-13 14:35:40,173: llmtf.base.nlpcoreteam/enMMLU: Processing Dataset: 263.39s
67
+ INFO: 2024-07-13 14:35:40,174: llmtf.base.nlpcoreteam/enMMLU: Results for nlpcoreteam/enMMLU:
68
+ INFO: 2024-07-13 14:35:40,219: llmtf.base.nlpcoreteam/enMMLU: metric
69
+ subject
70
+ abstract_algebra 0.340000
71
+ anatomy 0.718519
72
+ astronomy 0.730263
73
+ business_ethics 0.720000
74
+ clinical_knowledge 0.735849
75
+ college_biology 0.791667
76
+ college_chemistry 0.460000
77
+ college_computer_science 0.600000
78
+ college_mathematics 0.310000
79
+ college_medicine 0.647399
80
+ college_physics 0.480392
81
+ computer_security 0.760000
82
+ conceptual_physics 0.570213
83
+ econometrics 0.517544
84
+ electrical_engineering 0.606897
85
+ elementary_mathematics 0.468254
86
+ formal_logic 0.523810
87
+ global_facts 0.410000
88
+ high_school_biology 0.809677
89
+ high_school_chemistry 0.541872
90
+ high_school_computer_science 0.730000
91
+ high_school_european_history 0.733333
92
+ high_school_geography 0.823232
93
+ high_school_government_and_politics 0.865285
94
+ high_school_macroeconomics 0.630769
95
+ high_school_mathematics 0.370370
96
+ high_school_microeconomics 0.752101
97
+ high_school_physics 0.410596
98
+ high_school_psychology 0.855046
99
+ high_school_statistics 0.532407
100
+ high_school_us_history 0.828431
101
+ high_school_world_history 0.839662
102
+ human_aging 0.721973
103
+ human_sexuality 0.778626
104
+ international_law 0.760331
105
+ jurisprudence 0.796296
106
+ logical_fallacies 0.779141
107
+ machine_learning 0.446429
108
+ management 0.796117
109
+ marketing 0.893162
110
+ medical_genetics 0.780000
111
+ miscellaneous 0.840358
112
+ moral_disputes 0.696532
113
+ moral_scenarios 0.293855
114
+ nutrition 0.764706
115
+ philosophy 0.720257
116
+ prehistory 0.706790
117
+ professional_accounting 0.542553
118
+ professional_law 0.481747
119
+ professional_medicine 0.731618
120
+ professional_psychology 0.674837
121
+ public_relations 0.663636
122
+ security_studies 0.714286
123
+ sociology 0.825871
124
+ us_foreign_policy 0.890000
125
+ virology 0.487952
126
+ world_religions 0.824561
127
+ INFO: 2024-07-13 14:35:40,227: llmtf.base.nlpcoreteam/enMMLU: metric
128
+ subject
129
+ STEM 0.553280
130
+ humanities 0.691134
131
+ other (business, health, misc.) 0.699300
132
+ social sciences 0.749269
133
+ INFO: 2024-07-13 14:35:40,234: llmtf.base.nlpcoreteam/enMMLU: {'acc': 0.6732459770237078}
134
+ INFO: 2024-07-13 14:35:40,267: llmtf.base.evaluator: Ended eval
135
+ INFO: 2024-07-13 14:35:40,273: llmtf.base.evaluator:
136
+ mean darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU nlpcoreteam/enMMLU
137
+ 0.647 0.274 0.640 0.448 1.000 0.993 0.505 0.673
138
+ INFO: 2024-07-13 14:35:54,003: llmtf.base.daru/treewayextractive: Processing Dataset: 395.96s
139
+ INFO: 2024-07-13 14:35:54,004: llmtf.base.daru/treewayextractive: Results for daru/treewayextractive:
140
+ INFO: 2024-07-13 14:35:54,481: llmtf.base.daru/treewayextractive: {'r-prec': 0.39738621933621937}
141
+ INFO: 2024-07-13 14:35:54,526: llmtf.base.evaluator: Ended eval
142
+ INFO: 2024-07-13 14:35:54,533: llmtf.base.evaluator:
143
+ mean daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU nlpcoreteam/enMMLU
144
+ 0.616 0.397 0.274 0.640 0.448 1.000 0.993 0.505 0.673
145
+ INFO: 2024-07-13 14:36:08,587: llmtf.base.darumeru/ruOpenBookQA: Processing Dataset: 35.86s
146
+ INFO: 2024-07-13 14:36:08,588: llmtf.base.darumeru/ruOpenBookQA: Results for darumeru/ruOpenBookQA:
147
+ INFO: 2024-07-13 14:36:08,601: llmtf.base.darumeru/ruOpenBookQA: {'acc': 0.6907216494845361, 'f1_macro': 0.6911297261861948}
148
+ INFO: 2024-07-13 14:36:08,608: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
149
+ INFO: 2024-07-13 14:36:08,608: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
150
+ INFO: 2024-07-13 14:36:16,304: llmtf.base.darumeru/ruTiE: Loading Dataset: 7.69s
151
+ INFO: 2024-07-13 14:37:20,843: llmtf.base.nlpcoreteam/ruMMLU: Processing Dataset: 362.26s
152
+ INFO: 2024-07-13 14:37:20,846: llmtf.base.nlpcoreteam/ruMMLU: Results for nlpcoreteam/ruMMLU:
153
+ INFO: 2024-07-13 14:37:20,893: llmtf.base.nlpcoreteam/ruMMLU: metric
154
+ subject
155
+ abstract_algebra 0.300000
156
+ anatomy 0.459259
157
+ astronomy 0.651316
158
+ business_ethics 0.600000
159
+ clinical_knowledge 0.566038
160
+ college_biology 0.541667
161
+ college_chemistry 0.400000
162
+ college_computer_science 0.460000
163
+ college_mathematics 0.320000
164
+ college_medicine 0.502890
165
+ college_physics 0.352941
166
+ computer_security 0.570000
167
+ conceptual_physics 0.485106
168
+ econometrics 0.350877
169
+ electrical_engineering 0.551724
170
+ elementary_mathematics 0.410053
171
+ formal_logic 0.380952
172
+ global_facts 0.350000
173
+ high_school_biology 0.638710
174
+ high_school_chemistry 0.423645
175
+ high_school_computer_science 0.610000
176
+ high_school_european_history 0.715152
177
+ high_school_geography 0.661616
178
+ high_school_government_and_politics 0.595855
179
+ high_school_macroeconomics 0.510256
180
+ high_school_mathematics 0.337037
181
+ high_school_microeconomics 0.495798
182
+ high_school_physics 0.344371
183
+ high_school_psychology 0.669725
184
+ high_school_statistics 0.467593
185
+ high_school_us_history 0.651961
186
+ high_school_world_history 0.713080
187
+ human_aging 0.551570
188
+ human_sexuality 0.656489
189
+ international_law 0.710744
190
+ jurisprudence 0.592593
191
+ logical_fallacies 0.527607
192
+ machine_learning 0.357143
193
+ management 0.669903
194
+ marketing 0.705128
195
+ medical_genetics 0.560000
196
+ miscellaneous 0.646232
197
+ moral_disputes 0.560694
198
+ moral_scenarios 0.249162
199
+ nutrition 0.598039
200
+ philosophy 0.565916
201
+ prehistory 0.558642
202
+ professional_accounting 0.386525
203
+ professional_law 0.359192
204
+ professional_medicine 0.518382
205
+ professional_psychology 0.485294
206
+ public_relations 0.572727
207
+ security_studies 0.620408
208
+ sociology 0.701493
209
+ us_foreign_policy 0.750000
210
+ virology 0.415663
211
+ world_religions 0.695906
212
+ INFO: 2024-07-13 14:37:20,902: llmtf.base.nlpcoreteam/ruMMLU: metric
213
+ subject
214
+ STEM 0.456739
215
+ humanities 0.560123
216
+ other (business, health, misc.) 0.537831
217
+ social sciences 0.589212
218
+ INFO: 2024-07-13 14:37:20,909: llmtf.base.nlpcoreteam/ruMMLU: {'acc': 0.5359761297506582}
219
+ INFO: 2024-07-13 14:37:20,942: llmtf.base.evaluator: Ended eval
220
+ INFO: 2024-07-13 14:37:21,003: llmtf.base.evaluator:
221
+ mean daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA nlpcoreteam/enMMLU nlpcoreteam/ruMMLU
222
+ 0.616 0.397 0.274 0.640 0.448 1.000 0.993 0.505 0.691 0.673 0.536
223
+ INFO: 2024-07-13 14:38:13,255: llmtf.base.daru/treewayabstractive: Processing Dataset: 544.53s
224
+ INFO: 2024-07-13 14:38:13,256: llmtf.base.daru/treewayabstractive: Results for daru/treewayabstractive:
225
+ INFO: 2024-07-13 14:38:13,260: llmtf.base.daru/treewayabstractive: {'rouge1': 0.35574041658645894, 'rouge2': 0.1282333481459036}
226
+ INFO: 2024-07-13 14:38:13,262: llmtf.base.evaluator: Ended eval
227
+ INFO: 2024-07-13 14:38:13,270: llmtf.base.evaluator:
228
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA nlpcoreteam/enMMLU nlpcoreteam/ruMMLU
229
+ 0.582 0.242 0.397 0.274 0.640 0.448 1.000 0.993 0.505 0.691 0.673 0.536
230
+ INFO: 2024-07-13 14:40:26,872: llmtf.base.darumeru/cp_para_ru: Processing Dataset: 306.04s
231
+ INFO: 2024-07-13 14:40:26,875: llmtf.base.darumeru/cp_para_ru: Results for darumeru/cp_para_ru:
232
+ INFO: 2024-07-13 14:40:26,895: llmtf.base.darumeru/cp_para_ru: {'symbol_per_token': 2.968660662438201, 'len': 0.9950114211220992, 'lcs': 0.9146147408713498}
233
+ INFO: 2024-07-13 14:40:26,896: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
234
+ INFO: 2024-07-13 14:40:26,896: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
235
+ INFO: 2024-07-13 14:40:28,747: llmtf.base.darumeru/cp_para_en: Loading Dataset: 1.85s
236
+ INFO: 2024-07-13 14:40:42,169: llmtf.base.darumeru/ruTiE: Processing Dataset: 265.86s
237
+ INFO: 2024-07-13 14:40:42,170: llmtf.base.darumeru/ruTiE: Results for darumeru/ruTiE:
238
+ INFO: 2024-07-13 14:40:42,198: llmtf.base.darumeru/ruTiE: {'acc': 0.3511627906976744}
239
+ INFO: 2024-07-13 14:40:42,201: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
240
+ INFO: 2024-07-13 14:40:42,202: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
241
+ INFO: 2024-07-13 14:40:44,145: llmtf.base.darumeru/ruWorldTree: Loading Dataset: 1.94s
242
+ INFO: 2024-07-13 14:40:46,061: llmtf.base.darumeru/ruWorldTree: Processing Dataset: 1.92s
243
+ INFO: 2024-07-13 14:40:46,063: llmtf.base.darumeru/ruWorldTree: Results for darumeru/ruWorldTree:
244
+ INFO: 2024-07-13 14:40:46,081: llmtf.base.darumeru/ruWorldTree: {'acc': 0.8476190476190476, 'f1_macro': 0.8445201637796824}
245
+ INFO: 2024-07-13 14:40:46,082: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
246
+ INFO: 2024-07-13 14:40:46,082: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
247
+ INFO: 2024-07-13 14:40:48,101: llmtf.base.darumeru/RWSD: Loading Dataset: 2.02s
248
+ INFO: 2024-07-13 14:40:53,690: llmtf.base.darumeru/RWSD: Processing Dataset: 5.59s
249
+ INFO: 2024-07-13 14:40:53,692: llmtf.base.darumeru/RWSD: Results for darumeru/RWSD:
250
+ INFO: 2024-07-13 14:40:53,696: llmtf.base.darumeru/RWSD: {'acc': 0.5490196078431373}
251
+ INFO: 2024-07-13 14:40:53,697: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
252
+ INFO: 2024-07-13 14:40:53,697: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
253
+ INFO: 2024-07-13 14:40:56,414: llmtf.base.darumeru/USE: Loading Dataset: 2.72s
254
+ INFO: 2024-07-13 14:44:03,848: llmtf.base.darumeru/cp_para_en: Processing Dataset: 215.10s
255
+ INFO: 2024-07-13 14:44:03,851: llmtf.base.darumeru/cp_para_en: Results for darumeru/cp_para_en:
256
+ INFO: 2024-07-13 14:44:03,854: llmtf.base.darumeru/cp_para_en: {'symbol_per_token': 4.463140535341514, 'len': 0.9941296296409974, 'lcs': 0.955732821155511}
257
+ INFO: 2024-07-13 14:44:03,855: llmtf.base.evaluator: Ended eval
258
+ INFO: 2024-07-13 14:44:03,884: llmtf.base.evaluator:
259
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU
260
+ 0.626 0.242 0.397 0.274 0.640 0.448 0.549 0.956 0.915 1.000 0.993 0.505 0.691 0.351 0.846 0.673 0.536
261
+ INFO: 2024-07-13 14:45:47,572: llmtf.base.darumeru/USE: Processing Dataset: 291.16s
262
+ INFO: 2024-07-13 14:45:47,575: llmtf.base.darumeru/USE: Results for darumeru/USE:
263
+ INFO: 2024-07-13 14:45:47,607: llmtf.base.darumeru/USE: {'grade_norm': 0.07941176470588233}
264
+ INFO: 2024-07-13 14:45:47,610: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
265
+ INFO: 2024-07-13 14:45:47,611: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
266
+ INFO: 2024-07-13 14:45:52,951: llmtf.base.russiannlp/rucola_custom: Loading Dataset: 5.34s
267
+ INFO: 2024-07-13 14:46:34,251: llmtf.base.russiannlp/rucola_custom: Processing Dataset: 41.30s
268
+ INFO: 2024-07-13 14:46:34,255: llmtf.base.russiannlp/rucola_custom: Results for russiannlp/rucola_custom:
269
+ INFO: 2024-07-13 14:46:34,267: llmtf.base.russiannlp/rucola_custom: {'acc': 0.7061356297093649, 'mcc': 0.2603067425656207}
270
+ INFO: 2024-07-13 14:46:34,271: llmtf.base.evaluator: Ended eval
271
+ INFO: 2024-07-13 14:46:34,283: llmtf.base.evaluator:
272
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
273
+ 0.588 0.242 0.397 0.274 0.640 0.448 0.549 0.079 0.956 0.915 1.000 0.993 0.505 0.691 0.351 0.846 0.673 0.536 0.483
llmtf_eval_k0_bs8/evaluation_results.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
2
+ 0.588 0.242 0.397 0.274 0.640 0.448 0.549 0.079 0.956 0.915 1.000 0.993 0.505 0.691 0.351 0.846 0.673 0.536 0.483
llmtf_eval_k0_bs8/nlpcoreteam_enMMLU.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c874bfff402dc1e0898d2710e2741d51c679a7c9587622d1f591a21ee5f12abb
3
+ size 38085341
llmtf_eval_k0_bs8/nlpcoreteam_enMMLU_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 8,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "calculate_tokens_proba"
56
+ }
57
+ }