RefalMachine commited on
Commit
0ce93cf
·
verified ·
1 Parent(s): 8a8e023

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +38 -0
  2. llmtf_eval_k0_bs1/daru_treewayabstractive.jsonl +0 -0
  3. llmtf_eval_k0_bs1/daru_treewayabstractive_params.jsonl +57 -0
  4. llmtf_eval_k0_bs1/daru_treewayabstractive_total.jsonl +8 -0
  5. llmtf_eval_k0_bs1/daru_treewayextractive.jsonl +3 -0
  6. llmtf_eval_k0_bs1/daru_treewayextractive_params.jsonl +57 -0
  7. llmtf_eval_k0_bs1/daru_treewayextractive_total.jsonl +7 -0
  8. llmtf_eval_k0_bs1/darumeru_MultiQ.jsonl +0 -0
  9. llmtf_eval_k0_bs1/darumeru_MultiQ_params.jsonl +57 -0
  10. llmtf_eval_k0_bs1/darumeru_MultiQ_total.jsonl +8 -0
  11. llmtf_eval_k0_bs1/darumeru_PARus.jsonl +0 -0
  12. llmtf_eval_k0_bs1/darumeru_PARus_params.jsonl +57 -0
  13. llmtf_eval_k0_bs1/darumeru_PARus_total.jsonl +7 -0
  14. llmtf_eval_k0_bs1/darumeru_RCB.jsonl +0 -0
  15. llmtf_eval_k0_bs1/darumeru_RCB_params.jsonl +57 -0
  16. llmtf_eval_k0_bs1/darumeru_RCB_total.jsonl +8 -0
  17. llmtf_eval_k0_bs1/darumeru_RWSD.jsonl +0 -0
  18. llmtf_eval_k0_bs1/darumeru_RWSD_params.jsonl +57 -0
  19. llmtf_eval_k0_bs1/darumeru_RWSD_total.jsonl +7 -0
  20. llmtf_eval_k0_bs1/darumeru_USE.jsonl +0 -0
  21. llmtf_eval_k0_bs1/darumeru_USE_params.jsonl +57 -0
  22. llmtf_eval_k0_bs1/darumeru_USE_total.jsonl +7 -0
  23. llmtf_eval_k0_bs1/darumeru_cp_para_en.jsonl +0 -0
  24. llmtf_eval_k0_bs1/darumeru_cp_para_en_params.jsonl +57 -0
  25. llmtf_eval_k0_bs1/darumeru_cp_para_en_total.jsonl +9 -0
  26. llmtf_eval_k0_bs1/darumeru_cp_para_ru.jsonl +0 -0
  27. llmtf_eval_k0_bs1/darumeru_cp_para_ru_params.jsonl +57 -0
  28. llmtf_eval_k0_bs1/darumeru_cp_para_ru_total.jsonl +9 -0
  29. llmtf_eval_k0_bs1/darumeru_cp_sent_en.jsonl +0 -0
  30. llmtf_eval_k0_bs1/darumeru_cp_sent_en_params.jsonl +57 -0
  31. llmtf_eval_k0_bs1/darumeru_cp_sent_en_total.jsonl +9 -0
  32. llmtf_eval_k0_bs1/darumeru_cp_sent_ru.jsonl +0 -0
  33. llmtf_eval_k0_bs1/darumeru_cp_sent_ru_params.jsonl +57 -0
  34. llmtf_eval_k0_bs1/darumeru_cp_sent_ru_total.jsonl +9 -0
  35. llmtf_eval_k0_bs1/darumeru_ruMMLU.jsonl +3 -0
  36. llmtf_eval_k0_bs1/darumeru_ruMMLU_params.jsonl +57 -0
  37. llmtf_eval_k0_bs1/darumeru_ruMMLU_total.jsonl +7 -0
  38. llmtf_eval_k0_bs1/darumeru_ruOpenBookQA.jsonl +0 -0
  39. llmtf_eval_k0_bs1/darumeru_ruOpenBookQA_params.jsonl +57 -0
  40. llmtf_eval_k0_bs1/darumeru_ruOpenBookQA_total.jsonl +8 -0
  41. llmtf_eval_k0_bs1/darumeru_ruTiE.jsonl +3 -0
  42. llmtf_eval_k0_bs1/darumeru_ruTiE_params.jsonl +57 -0
  43. llmtf_eval_k0_bs1/darumeru_ruTiE_total.jsonl +7 -0
  44. llmtf_eval_k0_bs1/darumeru_ruWorldTree.jsonl +0 -0
  45. llmtf_eval_k0_bs1/darumeru_ruWorldTree_params.jsonl +57 -0
  46. llmtf_eval_k0_bs1/darumeru_ruWorldTree_total.jsonl +8 -0
  47. llmtf_eval_k0_bs1/evaluation_log.txt +273 -0
  48. llmtf_eval_k0_bs1/evaluation_results.txt +2 -0
  49. llmtf_eval_k0_bs1/nlpcoreteam_enMMLU.jsonl +3 -0
  50. llmtf_eval_k0_bs1/nlpcoreteam_enMMLU_params.jsonl +57 -0
.gitattributes CHANGED
@@ -33,3 +33,41 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ llmtf_eval_k0_bs1/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
37
+ llmtf_eval_k0_bs1/darumeru_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
38
+ llmtf_eval_k0_bs1/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
39
+ llmtf_eval_k0_bs1/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
40
+ llmtf_eval_k0_bs1/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
41
+ llmtf_eval_k0_bs16/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
42
+ llmtf_eval_k0_bs16/darumeru_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
43
+ llmtf_eval_k0_bs16/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
44
+ llmtf_eval_k0_bs16/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
45
+ llmtf_eval_k0_bs4/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
46
+ llmtf_eval_k0_bs4/darumeru_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
47
+ llmtf_eval_k0_bs4/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
48
+ llmtf_eval_k0_bs4/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
49
+ llmtf_eval_k0_bs4/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
50
+ llmtf_eval_k5_bs1/daru_treewayabstractive.jsonl filter=lfs diff=lfs merge=lfs -text
51
+ llmtf_eval_k5_bs1/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
52
+ llmtf_eval_k5_bs1/darumeru_MultiQ.jsonl filter=lfs diff=lfs merge=lfs -text
53
+ llmtf_eval_k5_bs1/darumeru_USE.jsonl filter=lfs diff=lfs merge=lfs -text
54
+ llmtf_eval_k5_bs1/darumeru_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
55
+ llmtf_eval_k5_bs1/darumeru_ruOpenBookQA.jsonl filter=lfs diff=lfs merge=lfs -text
56
+ llmtf_eval_k5_bs1/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
57
+ llmtf_eval_k5_bs1/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
58
+ llmtf_eval_k5_bs1/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
59
+ llmtf_eval_k5_bs1/russiannlp_rucola_custom.jsonl filter=lfs diff=lfs merge=lfs -text
60
+ llmtf_eval_k5_bs16/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
61
+ llmtf_eval_k5_bs16/darumeru_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
62
+ llmtf_eval_k5_bs16/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
63
+ llmtf_eval_k5_bs16/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
64
+ llmtf_eval_k5_bs4/daru_treewayabstractive.jsonl filter=lfs diff=lfs merge=lfs -text
65
+ llmtf_eval_k5_bs4/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
66
+ llmtf_eval_k5_bs4/darumeru_MultiQ.jsonl filter=lfs diff=lfs merge=lfs -text
67
+ llmtf_eval_k5_bs4/darumeru_USE.jsonl filter=lfs diff=lfs merge=lfs -text
68
+ llmtf_eval_k5_bs4/darumeru_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
69
+ llmtf_eval_k5_bs4/darumeru_ruOpenBookQA.jsonl filter=lfs diff=lfs merge=lfs -text
70
+ llmtf_eval_k5_bs4/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
71
+ llmtf_eval_k5_bs4/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
72
+ llmtf_eval_k5_bs4/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
73
+ llmtf_eval_k5_bs4/russiannlp_rucola_custom.jsonl filter=lfs diff=lfs merge=lfs -text
llmtf_eval_k0_bs1/daru_treewayabstractive.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs1/daru_treewayabstractive_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 512,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 1,
54
+ "max_sample_per_dataset": 500,
55
+ "method": "generate"
56
+ }
57
+ }
llmtf_eval_k0_bs1/daru_treewayabstractive_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "daru/treewayabstractive",
3
+ "results": {
4
+ "rouge1": 0.35429775115253526,
5
+ "rouge2": 0.12889173695955833
6
+ },
7
+ "leaderboard_result": 0.2415947440560468
8
+ }
llmtf_eval_k0_bs1/daru_treewayextractive.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffa3ec35722ce316fdaa3af761f8797f6db0f94a32596f1e05741e81068d3d9f
3
+ size 259989896
llmtf_eval_k0_bs1/daru_treewayextractive_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 1,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 1,
54
+ "max_sample_per_dataset": 500,
55
+ "method": "calculate_logsoftmax"
56
+ }
57
+ }
llmtf_eval_k0_bs1/daru_treewayextractive_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "daru/treewayextractive",
3
+ "results": {
4
+ "r-prec": 0.3960751082251082
5
+ },
6
+ "leaderboard_result": 0.3960751082251082
7
+ }
llmtf_eval_k0_bs1/darumeru_MultiQ.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs1/darumeru_MultiQ_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 1,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "generate"
56
+ }
57
+ }
llmtf_eval_k0_bs1/darumeru_MultiQ_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/MultiQ",
3
+ "results": {
4
+ "f1": 0.33947318357368605,
5
+ "em": 0.21414913957934992
6
+ },
7
+ "leaderboard_result": 0.276811161576518
8
+ }
llmtf_eval_k0_bs1/darumeru_PARus.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs1/darumeru_PARus_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 1,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "calculate_tokens_proba"
56
+ }
57
+ }
llmtf_eval_k0_bs1/darumeru_PARus_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/PARus",
3
+ "results": {
4
+ "acc": 0.66
5
+ },
6
+ "leaderboard_result": 0.66
7
+ }
llmtf_eval_k0_bs1/darumeru_RCB.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs1/darumeru_RCB_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 1,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "calculate_tokens_proba"
56
+ }
57
+ }
llmtf_eval_k0_bs1/darumeru_RCB_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/RCB",
3
+ "results": {
4
+ "acc": 0.5,
5
+ "f1_macro": 0.43018975381906
6
+ },
7
+ "leaderboard_result": 0.46509487690953
8
+ }
llmtf_eval_k0_bs1/darumeru_RWSD.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs1/darumeru_RWSD_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 1,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "calculate_tokens_proba"
56
+ }
57
+ }
llmtf_eval_k0_bs1/darumeru_RWSD_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/RWSD",
3
+ "results": {
4
+ "acc": 0.5441176470588235
5
+ },
6
+ "leaderboard_result": 0.5441176470588235
7
+ }
llmtf_eval_k0_bs1/darumeru_USE.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs1/darumeru_USE_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 1,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "generate"
56
+ }
57
+ }
llmtf_eval_k0_bs1/darumeru_USE_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/USE",
3
+ "results": {
4
+ "grade_norm": 0.07941176470588233
5
+ },
6
+ "leaderboard_result": 0.07941176470588233
7
+ }
llmtf_eval_k0_bs1/darumeru_cp_para_en.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs1/darumeru_cp_para_en_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 1024,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 1,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "generate"
56
+ }
57
+ }
llmtf_eval_k0_bs1/darumeru_cp_para_en_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_para_en",
3
+ "results": {
4
+ "symbol_per_token": 4.463061170262149,
5
+ "len": 0.9941296296409974,
6
+ "lcs": 0.9527227031116661
7
+ },
8
+ "leaderboard_result": 0.9527227031116661
9
+ }
llmtf_eval_k0_bs1/darumeru_cp_para_ru.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs1/darumeru_cp_para_ru_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 1024,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 1,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "generate"
56
+ }
57
+ }
llmtf_eval_k0_bs1/darumeru_cp_para_ru_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_para_ru",
3
+ "results": {
4
+ "symbol_per_token": 2.965959526577639,
5
+ "len": 0.9943389906706583,
6
+ "lcs": 0.9089360152526831
7
+ },
8
+ "leaderboard_result": 0.9089360152526831
9
+ }
llmtf_eval_k0_bs1/darumeru_cp_sent_en.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs1/darumeru_cp_sent_en_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 128,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 1,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "generate"
56
+ }
57
+ }
llmtf_eval_k0_bs1/darumeru_cp_sent_en_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_sent_en",
3
+ "results": {
4
+ "symbol_per_token": 4.424738337449315,
5
+ "len": 0.9996416196590585,
6
+ "lcs": 0.9958136839407484
7
+ },
8
+ "leaderboard_result": 0.9996416196590585
9
+ }
llmtf_eval_k0_bs1/darumeru_cp_sent_ru.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs1/darumeru_cp_sent_ru_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 128,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 1,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "generate"
56
+ }
57
+ }
llmtf_eval_k0_bs1/darumeru_cp_sent_ru_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_sent_ru",
3
+ "results": {
4
+ "symbol_per_token": 2.8288486051645507,
5
+ "len": 0.9929878804531707,
6
+ "lcs": 0.9502007070514518
7
+ },
8
+ "leaderboard_result": 0.9929878804531707
9
+ }
llmtf_eval_k0_bs1/darumeru_ruMMLU.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:454e30175faf86134597baf9cad35c4c5a62bdc849eee8d85a27c472c0d729cb
3
+ size 32909111
llmtf_eval_k0_bs1/darumeru_ruMMLU_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 1,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "calculate_tokens_proba"
56
+ }
57
+ }
llmtf_eval_k0_bs1/darumeru_ruMMLU_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/ruMMLU",
3
+ "results": {
4
+ "acc": 0.5046393295420533
5
+ },
6
+ "leaderboard_result": 0.5046393295420533
7
+ }
llmtf_eval_k0_bs1/darumeru_ruOpenBookQA.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs1/darumeru_ruOpenBookQA_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 1,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "calculate_tokens_proba"
56
+ }
57
+ }
llmtf_eval_k0_bs1/darumeru_ruOpenBookQA_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/ruOpenBookQA",
3
+ "results": {
4
+ "acc": 0.6924398625429553,
5
+ "f1_macro": 0.6928205333186971
6
+ },
7
+ "leaderboard_result": 0.6926301979308263
8
+ }
llmtf_eval_k0_bs1/darumeru_ruTiE.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52057ed1300597156320b34738e0b48e92b4112bbd55d3c9b283972df4eb6e15
3
+ size 12832579
llmtf_eval_k0_bs1/darumeru_ruTiE_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 1,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "calculate_tokens_proba"
56
+ }
57
+ }
llmtf_eval_k0_bs1/darumeru_ruTiE_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/ruTiE",
3
+ "results": {
4
+ "acc": 0.3511627906976744
5
+ },
6
+ "leaderboard_result": 0.3511627906976744
7
+ }
llmtf_eval_k0_bs1/darumeru_ruWorldTree.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0_bs1/darumeru_ruWorldTree_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 1,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "calculate_tokens_proba"
56
+ }
57
+ }
llmtf_eval_k0_bs1/darumeru_ruWorldTree_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/ruWorldTree",
3
+ "results": {
4
+ "acc": 0.8476190476190476,
5
+ "f1_macro": 0.8445201637796824
6
+ },
7
+ "leaderboard_result": 0.8460696056993651
8
+ }
llmtf_eval_k0_bs1/evaluation_log.txt ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ INFO: 2024-07-13 12:26:10,006: llmtf.base.evaluator: Starting eval on ['darumeru/multiq', 'darumeru/parus', 'darumeru/rcb', 'darumeru/ruopenbookqa', 'darumeru/rutie', 'darumeru/ruworldtree', 'darumeru/rwsd', 'darumeru/use', 'russiannlp/rucola_custom']
2
+ INFO: 2024-07-13 12:26:10,007: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
3
+ INFO: 2024-07-13 12:26:10,007: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
4
+ INFO: 2024-07-13 12:26:10,882: llmtf.base.evaluator: Starting eval on ['nlpcoreteam/rummlu']
5
+ INFO: 2024-07-13 12:26:10,882: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
6
+ INFO: 2024-07-13 12:26:10,882: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
7
+ INFO: 2024-07-13 12:26:10,986: llmtf.base.evaluator: Starting eval on ['darumeru/rummlu']
8
+ INFO: 2024-07-13 12:26:10,986: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
9
+ INFO: 2024-07-13 12:26:10,986: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
10
+ INFO: 2024-07-13 12:26:12,826: llmtf.base.evaluator: Starting eval on ['nlpcoreteam/enmmlu']
11
+ INFO: 2024-07-13 12:26:12,826: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
12
+ INFO: 2024-07-13 12:26:12,826: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
13
+ INFO: 2024-07-13 12:26:13,863: llmtf.base.evaluator: Starting eval on ['daru/treewayabstractive']
14
+ INFO: 2024-07-13 12:26:13,864: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
15
+ INFO: 2024-07-13 12:26:13,864: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
16
+ INFO: 2024-07-13 12:26:13,934: llmtf.base.darumeru/MultiQ: Loading Dataset: 3.93s
17
+ INFO: 2024-07-13 12:26:15,913: llmtf.base.evaluator: Starting eval on ['daru/treewayextractive']
18
+ INFO: 2024-07-13 12:26:15,914: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
19
+ INFO: 2024-07-13 12:26:15,914: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
20
+ INFO: 2024-07-13 12:26:17,900: llmtf.base.evaluator: Starting eval on ['darumeru/cp_sent_ru', 'darumeru/cp_sent_en', 'darumeru/cp_para_ru', 'darumeru/cp_para_en']
21
+ INFO: 2024-07-13 12:26:17,901: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
22
+ INFO: 2024-07-13 12:26:17,901: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
23
+ INFO: 2024-07-13 12:26:18,457: llmtf.base.daru/treewayabstractive: Loading Dataset: 4.59s
24
+ INFO: 2024-07-13 12:26:19,201: llmtf.base.darumeru/ruMMLU: Loading Dataset: 8.21s
25
+ INFO: 2024-07-13 12:26:20,501: llmtf.base.darumeru/cp_sent_ru: Loading Dataset: 2.60s
26
+ INFO: 2024-07-13 12:26:27,467: llmtf.base.daru/treewayextractive: Loading Dataset: 11.55s
27
+ INFO: 2024-07-13 12:28:34,411: llmtf.base.nlpcoreteam/enMMLU: Loading Dataset: 141.58s
28
+ INFO: 2024-07-13 12:28:36,431: llmtf.base.nlpcoreteam/ruMMLU: Loading Dataset: 145.55s
29
+ INFO: 2024-07-13 12:32:28,436: llmtf.base.darumeru/ruMMLU: Processing Dataset: 369.22s
30
+ INFO: 2024-07-13 12:32:28,453: llmtf.base.darumeru/ruMMLU: Results for darumeru/ruMMLU:
31
+ INFO: 2024-07-13 12:32:28,477: llmtf.base.darumeru/ruMMLU: {'acc': 0.5046393295420533}
32
+ INFO: 2024-07-13 12:32:28,511: llmtf.base.evaluator: Ended eval
33
+ INFO: 2024-07-13 12:32:28,516: llmtf.base.evaluator:
34
+ mean darumeru/ruMMLU
35
+ 0.505 0.505
36
+ INFO: 2024-07-13 12:33:16,551: llmtf.base.daru/treewayextractive: Processing Dataset: 409.08s
37
+ INFO: 2024-07-13 12:33:16,553: llmtf.base.daru/treewayextractive: Results for daru/treewayextractive:
38
+ INFO: 2024-07-13 12:33:16,780: llmtf.base.daru/treewayextractive: {'r-prec': 0.3960751082251082}
39
+ INFO: 2024-07-13 12:33:16,829: llmtf.base.evaluator: Ended eval
40
+ INFO: 2024-07-13 12:33:16,834: llmtf.base.evaluator:
41
+ mean daru/treewayextractive darumeru/ruMMLU
42
+ 0.450 0.396 0.505
43
+ INFO: 2024-07-13 12:35:38,012: llmtf.base.darumeru/cp_sent_ru: Processing Dataset: 557.51s
44
+ INFO: 2024-07-13 12:35:38,014: llmtf.base.darumeru/cp_sent_ru: Results for darumeru/cp_sent_ru:
45
+ INFO: 2024-07-13 12:35:38,031: llmtf.base.darumeru/cp_sent_ru: {'symbol_per_token': 2.8288486051645507, 'len': 0.9929878804531707, 'lcs': 0.9502007070514518}
46
+ INFO: 2024-07-13 12:35:38,032: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
47
+ INFO: 2024-07-13 12:35:38,032: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
48
+ INFO: 2024-07-13 12:35:40,708: llmtf.base.darumeru/cp_sent_en: Loading Dataset: 2.67s
49
+ INFO: 2024-07-13 12:36:22,266: llmtf.base.nlpcoreteam/enMMLU: Processing Dataset: 467.84s
50
+ INFO: 2024-07-13 12:36:22,281: llmtf.base.nlpcoreteam/enMMLU: Results for nlpcoreteam/enMMLU:
51
+ INFO: 2024-07-13 12:36:22,320: llmtf.base.nlpcoreteam/enMMLU: metric
52
+ subject
53
+ abstract_algebra 0.340000
54
+ anatomy 0.718519
55
+ astronomy 0.736842
56
+ business_ethics 0.720000
57
+ clinical_knowledge 0.735849
58
+ college_biology 0.791667
59
+ college_chemistry 0.470000
60
+ college_computer_science 0.590000
61
+ college_mathematics 0.300000
62
+ college_medicine 0.653179
63
+ college_physics 0.480392
64
+ computer_security 0.760000
65
+ conceptual_physics 0.565957
66
+ econometrics 0.517544
67
+ electrical_engineering 0.600000
68
+ elementary_mathematics 0.481481
69
+ formal_logic 0.523810
70
+ global_facts 0.410000
71
+ high_school_biology 0.800000
72
+ high_school_chemistry 0.551724
73
+ high_school_computer_science 0.730000
74
+ high_school_european_history 0.751515
75
+ high_school_geography 0.828283
76
+ high_school_government_and_politics 0.865285
77
+ high_school_macroeconomics 0.633333
78
+ high_school_mathematics 0.366667
79
+ high_school_microeconomics 0.747899
80
+ high_school_physics 0.423841
81
+ high_school_psychology 0.853211
82
+ high_school_statistics 0.532407
83
+ high_school_us_history 0.828431
84
+ high_school_world_history 0.835443
85
+ human_aging 0.721973
86
+ human_sexuality 0.778626
87
+ international_law 0.760331
88
+ jurisprudence 0.796296
89
+ logical_fallacies 0.779141
90
+ machine_learning 0.455357
91
+ management 0.805825
92
+ marketing 0.893162
93
+ medical_genetics 0.780000
94
+ miscellaneous 0.837803
95
+ moral_disputes 0.690751
96
+ moral_scenarios 0.289385
97
+ nutrition 0.764706
98
+ philosophy 0.720257
99
+ prehistory 0.709877
100
+ professional_accounting 0.531915
101
+ professional_law 0.479140
102
+ professional_medicine 0.731618
103
+ professional_psychology 0.674837
104
+ public_relations 0.654545
105
+ security_studies 0.714286
106
+ sociology 0.825871
107
+ us_foreign_policy 0.890000
108
+ virology 0.487952
109
+ world_religions 0.824561
110
+ INFO: 2024-07-13 12:36:22,327: llmtf.base.nlpcoreteam/enMMLU: metric
111
+ subject
112
+ STEM 0.554241
113
+ humanities 0.691457
114
+ other (business, health, misc.) 0.699464
115
+ social sciences 0.748643
116
+ INFO: 2024-07-13 12:36:22,379: llmtf.base.nlpcoreteam/enMMLU: {'acc': 0.6734513449759852}
117
+ INFO: 2024-07-13 12:36:22,409: llmtf.base.evaluator: Ended eval
118
+ INFO: 2024-07-13 12:36:22,416: llmtf.base.evaluator:
119
+ mean daru/treewayextractive darumeru/cp_sent_ru darumeru/ruMMLU nlpcoreteam/enMMLU
120
+ 0.642 0.396 0.993 0.505 0.673
121
+ INFO: 2024-07-13 12:37:10,439: llmtf.base.nlpcoreteam/ruMMLU: Processing Dataset: 514.01s
122
+ INFO: 2024-07-13 12:37:10,457: llmtf.base.nlpcoreteam/ruMMLU: Results for nlpcoreteam/ruMMLU:
123
+ INFO: 2024-07-13 12:37:10,498: llmtf.base.nlpcoreteam/ruMMLU: metric
124
+ subject
125
+ abstract_algebra 0.290000
126
+ anatomy 0.459259
127
+ astronomy 0.657895
128
+ business_ethics 0.600000
129
+ clinical_knowledge 0.562264
130
+ college_biology 0.548611
131
+ college_chemistry 0.400000
132
+ college_computer_science 0.470000
133
+ college_mathematics 0.330000
134
+ college_medicine 0.497110
135
+ college_physics 0.333333
136
+ computer_security 0.570000
137
+ conceptual_physics 0.493617
138
+ econometrics 0.342105
139
+ electrical_engineering 0.531034
140
+ elementary_mathematics 0.412698
141
+ formal_logic 0.380952
142
+ global_facts 0.350000
143
+ high_school_biology 0.635484
144
+ high_school_chemistry 0.428571
145
+ high_school_computer_science 0.620000
146
+ high_school_european_history 0.715152
147
+ high_school_geography 0.656566
148
+ high_school_government_and_politics 0.595855
149
+ high_school_macroeconomics 0.517949
150
+ high_school_mathematics 0.348148
151
+ high_school_microeconomics 0.495798
152
+ high_school_physics 0.350993
153
+ high_school_psychology 0.667890
154
+ high_school_statistics 0.458333
155
+ high_school_us_history 0.661765
156
+ high_school_world_history 0.708861
157
+ human_aging 0.556054
158
+ human_sexuality 0.664122
159
+ international_law 0.702479
160
+ jurisprudence 0.592593
161
+ logical_fallacies 0.527607
162
+ machine_learning 0.339286
163
+ management 0.669903
164
+ marketing 0.700855
165
+ medical_genetics 0.570000
166
+ miscellaneous 0.646232
167
+ moral_disputes 0.554913
168
+ moral_scenarios 0.248045
169
+ nutrition 0.594771
170
+ philosophy 0.565916
171
+ prehistory 0.558642
172
+ professional_accounting 0.386525
173
+ professional_law 0.362451
174
+ professional_medicine 0.522059
175
+ professional_psychology 0.480392
176
+ public_relations 0.563636
177
+ security_studies 0.620408
178
+ sociology 0.696517
179
+ us_foreign_policy 0.770000
180
+ virology 0.415663
181
+ world_religions 0.690058
182
+ INFO: 2024-07-13 12:37:10,507: llmtf.base.nlpcoreteam/ruMMLU: metric
183
+ subject
184
+ STEM 0.456556
185
+ humanities 0.559187
186
+ other (business, health, misc.) 0.537907
187
+ social sciences 0.589270
188
+ INFO: 2024-07-13 12:37:10,515: llmtf.base.nlpcoreteam/ruMMLU: {'acc': 0.5357299468552112}
189
+ INFO: 2024-07-13 12:37:10,547: llmtf.base.evaluator: Ended eval
190
+ INFO: 2024-07-13 12:37:10,554: llmtf.base.evaluator:
191
+ mean daru/treewayextractive darumeru/cp_sent_ru darumeru/ruMMLU nlpcoreteam/enMMLU nlpcoreteam/ruMMLU
192
+ 0.621 0.396 0.993 0.505 0.673 0.536
193
+ INFO: 2024-07-13 12:37:40,183: llmtf.base.darumeru/MultiQ: Processing Dataset: 686.25s
194
+ INFO: 2024-07-13 12:37:40,186: llmtf.base.darumeru/MultiQ: Results for darumeru/MultiQ:
195
+ INFO: 2024-07-13 12:37:40,191: llmtf.base.darumeru/MultiQ: {'f1': 0.33947318357368605, 'em': 0.21414913957934992}
196
+ INFO: 2024-07-13 12:37:40,196: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
197
+ INFO: 2024-07-13 12:37:40,196: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
198
+ INFO: 2024-07-13 12:37:42,624: llmtf.base.darumeru/PARus: Loading Dataset: 2.43s
199
+ INFO: 2024-07-13 12:37:48,888: llmtf.base.darumeru/PARus: Processing Dataset: 6.26s
200
+ INFO: 2024-07-13 12:37:48,890: llmtf.base.darumeru/PARus: Results for darumeru/PARus:
201
+ INFO: 2024-07-13 12:37:48,902: llmtf.base.darumeru/PARus: {'acc': 0.66}
202
+ INFO: 2024-07-13 12:37:48,904: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
203
+ INFO: 2024-07-13 12:37:48,904: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
204
+ INFO: 2024-07-13 12:37:51,352: llmtf.base.darumeru/RCB: Loading Dataset: 2.45s
205
+ INFO: 2024-07-13 12:37:59,057: llmtf.base.darumeru/RCB: Processing Dataset: 7.69s
206
+ INFO: 2024-07-13 12:37:59,059: llmtf.base.darumeru/RCB: Results for darumeru/RCB:
207
+ INFO: 2024-07-13 12:37:59,078: llmtf.base.darumeru/RCB: {'acc': 0.5, 'f1_macro': 0.43018975381906}
208
+ INFO: 2024-07-13 12:37:59,080: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
209
+ INFO: 2024-07-13 12:37:59,080: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
210
+ INFO: 2024-07-13 12:38:01,863: llmtf.base.darumeru/ruOpenBookQA: Loading Dataset: 2.78s
211
+ INFO: 2024-07-13 12:39:16,087: llmtf.base.darumeru/ruOpenBookQA: Processing Dataset: 74.22s
212
+ INFO: 2024-07-13 12:39:16,089: llmtf.base.darumeru/ruOpenBookQA: Results for darumeru/ruOpenBookQA:
213
+ INFO: 2024-07-13 12:39:16,102: llmtf.base.darumeru/ruOpenBookQA: {'acc': 0.6924398625429553, 'f1_macro': 0.6928205333186971}
214
+ INFO: 2024-07-13 12:39:16,109: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
215
+ INFO: 2024-07-13 12:39:16,109: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
216
+ INFO: 2024-07-13 12:39:23,241: llmtf.base.darumeru/ruTiE: Loading Dataset: 7.13s
217
+ INFO: 2024-07-13 12:43:16,950: llmtf.base.darumeru/cp_sent_en: Processing Dataset: 456.24s
218
+ INFO: 2024-07-13 12:43:16,970: llmtf.base.darumeru/cp_sent_en: Results for darumeru/cp_sent_en:
219
+ INFO: 2024-07-13 12:43:16,991: llmtf.base.darumeru/cp_sent_en: {'symbol_per_token': 4.424738337449315, 'len': 0.9996416196590585, 'lcs': 0.9958136839407484}
220
+ INFO: 2024-07-13 12:43:16,992: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
221
+ INFO: 2024-07-13 12:43:16,992: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
222
+ INFO: 2024-07-13 12:43:19,141: llmtf.base.darumeru/cp_para_ru: Loading Dataset: 2.15s
223
+ INFO: 2024-07-13 12:43:54,391: llmtf.base.darumeru/ruTiE: Processing Dataset: 271.14s
224
+ INFO: 2024-07-13 12:43:54,392: llmtf.base.darumeru/ruTiE: Results for darumeru/ruTiE:
225
+ INFO: 2024-07-13 12:43:54,420: llmtf.base.darumeru/ruTiE: {'acc': 0.3511627906976744}
226
+ INFO: 2024-07-13 12:43:54,423: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
227
+ INFO: 2024-07-13 12:43:54,423: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
228
+ INFO: 2024-07-13 12:43:57,142: llmtf.base.darumeru/ruWorldTree: Loading Dataset: 2.72s
229
+ INFO: 2024-07-13 12:44:00,566: llmtf.base.darumeru/ruWorldTree: Processing Dataset: 3.42s
230
+ INFO: 2024-07-13 12:44:00,583: llmtf.base.darumeru/ruWorldTree: Results for darumeru/ruWorldTree:
231
+ INFO: 2024-07-13 12:44:00,589: llmtf.base.darumeru/ruWorldTree: {'acc': 0.8476190476190476, 'f1_macro': 0.8445201637796824}
232
+ INFO: 2024-07-13 12:44:00,590: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
233
+ INFO: 2024-07-13 12:44:00,590: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
234
+ INFO: 2024-07-13 12:44:02,920: llmtf.base.darumeru/RWSD: Loading Dataset: 2.33s
235
+ INFO: 2024-07-13 12:44:10,009: llmtf.base.darumeru/RWSD: Processing Dataset: 7.09s
236
+ INFO: 2024-07-13 12:44:10,025: llmtf.base.darumeru/RWSD: Results for darumeru/RWSD:
237
+ INFO: 2024-07-13 12:44:10,029: llmtf.base.darumeru/RWSD: {'acc': 0.5441176470588235}
238
+ INFO: 2024-07-13 12:44:10,030: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
239
+ INFO: 2024-07-13 12:44:10,030: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
240
+ INFO: 2024-07-13 12:44:12,714: llmtf.base.darumeru/USE: Loading Dataset: 2.68s
241
+ INFO: 2024-07-13 12:55:27,344: llmtf.base.darumeru/USE: Processing Dataset: 674.63s
242
+ INFO: 2024-07-13 12:55:27,350: llmtf.base.darumeru/USE: Results for darumeru/USE:
243
+ INFO: 2024-07-13 12:55:27,461: llmtf.base.darumeru/USE: {'grade_norm': 0.07941176470588233}
244
+ INFO: 2024-07-13 12:55:27,464: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
245
+ INFO: 2024-07-13 12:55:27,464: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
246
+ INFO: 2024-07-13 12:55:30,023: llmtf.base.darumeru/cp_para_ru: Processing Dataset: 730.88s
247
+ INFO: 2024-07-13 12:55:30,025: llmtf.base.darumeru/cp_para_ru: Results for darumeru/cp_para_ru:
248
+ INFO: 2024-07-13 12:55:30,028: llmtf.base.darumeru/cp_para_ru: {'symbol_per_token': 2.965959526577639, 'len': 0.9943389906706583, 'lcs': 0.9089360152526831}
249
+ INFO: 2024-07-13 12:55:30,029: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
250
+ INFO: 2024-07-13 12:55:30,029: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
251
+ INFO: 2024-07-13 12:55:32,480: llmtf.base.russiannlp/rucola_custom: Loading Dataset: 5.02s
252
+ INFO: 2024-07-13 12:55:59,692: llmtf.base.darumeru/cp_para_en: Loading Dataset: 29.66s
253
+ INFO: 2024-07-13 12:56:23,955: llmtf.base.daru/treewayabstractive: Processing Dataset: 1805.50s
254
+ INFO: 2024-07-13 12:56:23,957: llmtf.base.daru/treewayabstractive: Results for daru/treewayabstractive:
255
+ INFO: 2024-07-13 12:56:23,966: llmtf.base.daru/treewayabstractive: {'rouge1': 0.35429775115253526, 'rouge2': 0.12889173695955833}
256
+ INFO: 2024-07-13 12:56:23,968: llmtf.base.evaluator: Ended eval
257
+ INFO: 2024-07-13 12:56:24,039: llmtf.base.evaluator:
258
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU
259
+ 0.573 0.242 0.396 0.277 0.660 0.465 0.544 0.079 0.909 1.000 0.993 0.505 0.693 0.351 0.846 0.673 0.536
260
+ INFO: 2024-07-13 12:56:59,954: llmtf.base.russiannlp/rucola_custom: Processing Dataset: 87.47s
261
+ INFO: 2024-07-13 12:56:59,973: llmtf.base.russiannlp/rucola_custom: Results for russiannlp/rucola_custom:
262
+ INFO: 2024-07-13 12:57:00,001: llmtf.base.russiannlp/rucola_custom: {'acc': 0.7068532472192322, 'mcc': 0.2623100586905413}
263
+ INFO: 2024-07-13 12:57:00,005: llmtf.base.evaluator: Ended eval
264
+ INFO: 2024-07-13 12:57:00,016: llmtf.base.evaluator:
265
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
266
+ 0.568 0.242 0.396 0.277 0.660 0.465 0.544 0.079 0.909 1.000 0.993 0.505 0.693 0.351 0.846 0.673 0.536 0.485
267
+ INFO: 2024-07-13 13:05:43,824: llmtf.base.darumeru/cp_para_en: Processing Dataset: 584.13s
268
+ INFO: 2024-07-13 13:05:43,847: llmtf.base.darumeru/cp_para_en: Results for darumeru/cp_para_en:
269
+ INFO: 2024-07-13 13:05:43,850: llmtf.base.darumeru/cp_para_en: {'symbol_per_token': 4.463061170262149, 'len': 0.9941296296409974, 'lcs': 0.9527227031116661}
270
+ INFO: 2024-07-13 13:05:43,851: llmtf.base.evaluator: Ended eval
271
+ INFO: 2024-07-13 13:05:43,862: llmtf.base.evaluator:
272
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
273
+ 0.589 0.242 0.396 0.277 0.660 0.465 0.544 0.079 0.953 0.909 1.000 0.993 0.505 0.693 0.351 0.846 0.673 0.536 0.485
llmtf_eval_k0_bs1/evaluation_results.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
2
+ 0.589 0.242 0.396 0.277 0.660 0.465 0.544 0.079 0.953 0.909 1.000 0.993 0.505 0.693 0.351 0.846 0.673 0.536 0.485
llmtf_eval_k0_bs1/nlpcoreteam_enMMLU.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a3488fe16dd4c6cc5496a219929273e2746e659cceba1baa45750cf002c05f5
3
+ size 38085342
llmtf_eval_k0_bs1/nlpcoreteam_enMMLU_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 0,
53
+ "batch_size": 1,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "calculate_tokens_proba"
56
+ }
57
+ }