RefalMachine commited on
Commit
ae34f4f
·
verified ·
1 Parent(s): 4fec6b2

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -84,3 +84,5 @@ llmtf_eval_k5_bs8/darumeru_ruOpenBookQA.jsonl filter=lfs diff=lfs merge=lfs -tex
84
  llmtf_eval_k5_bs8/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
85
  llmtf_eval_k5_bs8/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
86
  llmtf_eval_k5_bs8/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
 
 
 
84
  llmtf_eval_k5_bs8/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
85
  llmtf_eval_k5_bs8/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
86
  llmtf_eval_k5_bs8/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
87
+ llmtf_eval_k5_bs8/darumeru_USE.jsonl filter=lfs diff=lfs merge=lfs -text
88
+ llmtf_eval_k5_bs8/russiannlp_rucola_custom.jsonl filter=lfs diff=lfs merge=lfs -text
llmtf_eval_k5_bs8/darumeru_USE.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k5_bs8/darumeru_USE_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 5,
53
+ "batch_size": 8,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "generate"
56
+ }
57
+ }
llmtf_eval_k5_bs8/darumeru_USE_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/USE",
3
+ "results": {
4
+ "grade_norm": 0.18725490196078434
5
+ },
6
+ "leaderboard_result": 0.18725490196078434
7
+ }
llmtf_eval_k5_bs8/darumeru_ruMMLU.jsonl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21fd0a778fd73c6ca1a894afced27d7e372e1ca1463aaabad6bd7eb6120223f2
3
- size 71218197
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93da8b535385d1a614578802c57d0c2f7e3bb75760324e06095cacbbbe551288
3
+ size 95757423
llmtf_eval_k5_bs8/darumeru_ruMMLU_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 5,
53
+ "batch_size": 8,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "calculate_tokens_proba"
56
+ }
57
+ }
llmtf_eval_k5_bs8/darumeru_ruMMLU_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/ruMMLU",
3
+ "results": {
4
+ "acc": 0.5138182180983737
5
+ },
6
+ "leaderboard_result": 0.5138182180983737
7
+ }
llmtf_eval_k5_bs8/evaluation_log.txt CHANGED
@@ -107,3 +107,167 @@ INFO: 2024-07-13 14:46:14,658: llmtf.base.evaluator: Ended eval
107
  INFO: 2024-07-13 14:46:14,684: llmtf.base.evaluator:
108
  mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree
109
  0.684 0.247 0.397 0.432 0.850 0.481 0.544 0.973 0.977 1.000 0.998 0.768 0.351 0.875
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  INFO: 2024-07-13 14:46:14,684: llmtf.base.evaluator:
108
  mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree
109
  0.684 0.247 0.397 0.432 0.850 0.481 0.544 0.973 0.977 1.000 0.998 0.768 0.351 0.875
110
+ INFO: 2024-07-13 14:48:58,982: llmtf.base.darumeru/USE: Processing Dataset: 204.37s
111
+ INFO: 2024-07-13 14:48:58,999: llmtf.base.darumeru/USE: Results for darumeru/USE:
112
+ INFO: 2024-07-13 14:48:59,004: llmtf.base.darumeru/USE: {'grade_norm': 0.18725490196078434}
113
+ INFO: 2024-07-13 14:48:59,010: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [128001, 128009]
114
+ INFO: 2024-07-13 14:48:59,010: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
115
+ INFO: 2024-07-13 14:49:19,451: llmtf.base.russiannlp/rucola_custom: Loading Dataset: 20.44s
116
+ INFO: 2024-07-13 14:50:14,250: llmtf.base.nlpcoreteam/enMMLU: Processing Dataset: 1036.87s
117
+ INFO: 2024-07-13 14:50:14,255: llmtf.base.nlpcoreteam/enMMLU: Results for nlpcoreteam/enMMLU:
118
+ INFO: 2024-07-13 14:50:14,302: llmtf.base.nlpcoreteam/enMMLU: metric
119
+ subject
120
+ abstract_algebra 0.350000
121
+ anatomy 0.696296
122
+ astronomy 0.730263
123
+ business_ethics 0.700000
124
+ clinical_knowledge 0.754717
125
+ college_biology 0.812500
126
+ college_chemistry 0.500000
127
+ college_computer_science 0.590000
128
+ college_mathematics 0.330000
129
+ college_medicine 0.670520
130
+ college_physics 0.470588
131
+ computer_security 0.780000
132
+ conceptual_physics 0.570213
133
+ econometrics 0.561404
134
+ electrical_engineering 0.634483
135
+ elementary_mathematics 0.439153
136
+ formal_logic 0.507937
137
+ global_facts 0.430000
138
+ high_school_biology 0.800000
139
+ high_school_chemistry 0.517241
140
+ high_school_computer_science 0.760000
141
+ high_school_european_history 0.787879
142
+ high_school_geography 0.843434
143
+ high_school_government_and_politics 0.922280
144
+ high_school_macroeconomics 0.671795
145
+ high_school_mathematics 0.381481
146
+ high_school_microeconomics 0.764706
147
+ high_school_physics 0.417219
148
+ high_school_psychology 0.847706
149
+ high_school_statistics 0.537037
150
+ high_school_us_history 0.833333
151
+ high_school_world_history 0.835443
152
+ human_aging 0.730942
153
+ human_sexuality 0.801527
154
+ international_law 0.818182
155
+ jurisprudence 0.759259
156
+ logical_fallacies 0.766871
157
+ machine_learning 0.544643
158
+ management 0.825243
159
+ marketing 0.901709
160
+ medical_genetics 0.830000
161
+ miscellaneous 0.842912
162
+ moral_disputes 0.751445
163
+ moral_scenarios 0.497207
164
+ nutrition 0.754902
165
+ philosophy 0.720257
166
+ prehistory 0.753086
167
+ professional_accounting 0.556738
168
+ professional_law 0.483051
169
+ professional_medicine 0.742647
170
+ professional_psychology 0.717320
171
+ public_relations 0.690909
172
+ security_studies 0.722449
173
+ sociology 0.840796
174
+ us_foreign_policy 0.840000
175
+ virology 0.512048
176
+ world_religions 0.818713
177
+ INFO: 2024-07-13 14:50:14,310: llmtf.base.nlpcoreteam/enMMLU: metric
178
+ subject
179
+ STEM 0.564712
180
+ humanities 0.717897
181
+ other (business, health, misc.) 0.710620
182
+ social sciences 0.768694
183
+ INFO: 2024-07-13 14:50:14,318: llmtf.base.nlpcoreteam/enMMLU: {'acc': 0.6904807286717012}
184
+ INFO: 2024-07-13 14:50:14,385: llmtf.base.evaluator: Ended eval
185
+ INFO: 2024-07-13 14:50:14,399: llmtf.base.evaluator:
186
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU
187
+ 0.651 0.247 0.397 0.432 0.850 0.481 0.544 0.187 0.973 0.977 1.000 0.998 0.768 0.351 0.875 0.690
188
+ INFO: 2024-07-13 14:51:55,784: llmtf.base.darumeru/ruMMLU: Processing Dataset: 1262.30s
189
+ INFO: 2024-07-13 14:51:55,788: llmtf.base.darumeru/ruMMLU: Results for darumeru/ruMMLU:
190
+ INFO: 2024-07-13 14:51:55,799: llmtf.base.darumeru/ruMMLU: {'acc': 0.5138182180983737}
191
+ INFO: 2024-07-13 14:51:55,888: llmtf.base.evaluator: Ended eval
192
+ INFO: 2024-07-13 14:51:55,906: llmtf.base.evaluator:
193
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU
194
+ 0.643 0.247 0.397 0.432 0.850 0.481 0.544 0.187 0.973 0.977 1.000 0.998 0.514 0.768 0.351 0.875 0.690
195
+ INFO: 2024-07-13 14:52:18,001: llmtf.base.russiannlp/rucola_custom: Processing Dataset: 178.55s
196
+ INFO: 2024-07-13 14:52:18,002: llmtf.base.russiannlp/rucola_custom: Results for russiannlp/rucola_custom:
197
+ INFO: 2024-07-13 14:52:18,035: llmtf.base.russiannlp/rucola_custom: {'acc': 0.7115177610333692, 'mcc': 0.3362227509262135}
198
+ INFO: 2024-07-13 14:52:18,046: llmtf.base.evaluator: Ended eval
199
+ INFO: 2024-07-13 14:52:18,077: llmtf.base.evaluator:
200
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU russiannlp/rucola_custom
201
+ 0.636 0.247 0.397 0.432 0.850 0.481 0.544 0.187 0.973 0.977 1.000 0.998 0.514 0.768 0.351 0.875 0.690 0.524
202
+ INFO: 2024-07-13 14:59:07,852: llmtf.base.nlpcoreteam/ruMMLU: Processing Dataset: 1522.57s
203
+ INFO: 2024-07-13 14:59:07,871: llmtf.base.nlpcoreteam/ruMMLU: Results for nlpcoreteam/ruMMLU:
204
+ INFO: 2024-07-13 14:59:07,917: llmtf.base.nlpcoreteam/ruMMLU: metric
205
+ subject
206
+ abstract_algebra 0.330000
207
+ anatomy 0.511111
208
+ astronomy 0.651316
209
+ business_ethics 0.680000
210
+ clinical_knowledge 0.588679
211
+ college_biology 0.534722
212
+ college_chemistry 0.480000
213
+ college_computer_science 0.520000
214
+ college_mathematics 0.350000
215
+ college_medicine 0.549133
216
+ college_physics 0.352941
217
+ computer_security 0.720000
218
+ conceptual_physics 0.540426
219
+ econometrics 0.438596
220
+ electrical_engineering 0.572414
221
+ elementary_mathematics 0.417989
222
+ formal_logic 0.396825
223
+ global_facts 0.370000
224
+ high_school_biology 0.664516
225
+ high_school_chemistry 0.394089
226
+ high_school_computer_science 0.690000
227
+ high_school_european_history 0.763636
228
+ high_school_geography 0.666667
229
+ high_school_government_and_politics 0.647668
230
+ high_school_macroeconomics 0.553846
231
+ high_school_mathematics 0.348148
232
+ high_school_microeconomics 0.546218
233
+ high_school_physics 0.410596
234
+ high_school_psychology 0.682569
235
+ high_school_statistics 0.449074
236
+ high_school_us_history 0.691176
237
+ high_school_world_history 0.734177
238
+ human_aging 0.538117
239
+ human_sexuality 0.641221
240
+ international_law 0.743802
241
+ jurisprudence 0.657407
242
+ logical_fallacies 0.558282
243
+ machine_learning 0.401786
244
+ management 0.689320
245
+ marketing 0.730769
246
+ medical_genetics 0.670000
247
+ miscellaneous 0.650064
248
+ moral_disputes 0.630058
249
+ moral_scenarios 0.382123
250
+ nutrition 0.604575
251
+ philosophy 0.614148
252
+ prehistory 0.574074
253
+ professional_accounting 0.397163
254
+ professional_law 0.397001
255
+ professional_medicine 0.514706
256
+ professional_psychology 0.514706
257
+ public_relations 0.609091
258
+ security_studies 0.657143
259
+ sociology 0.676617
260
+ us_foreign_policy 0.740000
261
+ virology 0.457831
262
+ world_religions 0.695906
263
+ INFO: 2024-07-13 14:59:07,924: llmtf.base.nlpcoreteam/ruMMLU: metric
264
+ subject
265
+ STEM 0.490445
266
+ humanities 0.602971
267
+ other (business, health, misc.) 0.567962
268
+ social sciences 0.614529
269
+ INFO: 2024-07-13 14:59:07,947: llmtf.base.nlpcoreteam/ruMMLU: {'acc': 0.5689766403256171}
270
+ INFO: 2024-07-13 14:59:08,029: llmtf.base.evaluator: Ended eval
271
+ INFO: 2024-07-13 14:59:08,049: llmtf.base.evaluator:
272
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
273
+ 0.632 0.247 0.397 0.432 0.850 0.481 0.544 0.187 0.973 0.977 1.000 0.998 0.514 0.768 0.351 0.875 0.690 0.569 0.524
llmtf_eval_k5_bs8/evaluation_results.txt CHANGED
@@ -1,2 +1,2 @@
1
- mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree
2
- 0.684 0.247 0.397 0.432 0.850 0.481 0.544 0.973 0.977 1.000 0.998 0.768 0.351 0.875
 
1
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
2
+ 0.632 0.247 0.397 0.432 0.850 0.481 0.544 0.187 0.973 0.977 1.000 0.998 0.514 0.768 0.351 0.875 0.690 0.569 0.524
llmtf_eval_k5_bs8/nlpcoreteam_enMMLU.jsonl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6d6e7452aa5fb69fc90ce1043ec813596a88e68d975e02617b4a9ef072a353d
3
- size 67044933
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8095dc31486a7450b06dfe16a52353d758b19897f0f9e135d258aefa5f4544a2
3
+ size 82136115
llmtf_eval_k5_bs8/nlpcoreteam_enMMLU_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 5,
53
+ "batch_size": 8,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "calculate_tokens_proba"
56
+ }
57
+ }
llmtf_eval_k5_bs8/nlpcoreteam_enMMLU_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "nlpcoreteam/enMMLU",
3
+ "results": {
4
+ "acc": 0.6904807286717012
5
+ },
6
+ "leaderboard_result": 0.6904807286717012
7
+ }
llmtf_eval_k5_bs8/nlpcoreteam_ruMMLU.jsonl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d9f3227af64e84961c6d5415223c58db1e568796143a7248fb781ba986b53f2
3
- size 62853990
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0c6923c896671e3781de03ca9d473dd1d46bfb78adb8ae77032d01d297855ea
3
+ size 119771974
llmtf_eval_k5_bs8/nlpcoreteam_ruMMLU_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 5,
53
+ "batch_size": 8,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "calculate_tokens_proba"
56
+ }
57
+ }
llmtf_eval_k5_bs8/nlpcoreteam_ruMMLU_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "nlpcoreteam/ruMMLU",
3
+ "results": {
4
+ "acc": 0.5689766403256171
5
+ },
6
+ "leaderboard_result": 0.5689766403256171
7
+ }
llmtf_eval_k5_bs8/russiannlp_rucola_custom.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4be29752e06af3c15c296cce6829f0d26c34edc0764325aa8a4338974c6862bb
3
+ size 13288973
llmtf_eval_k5_bs8/russiannlp_rucola_custom_params.jsonl ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "NousResearch/Meta-Llama-3-8B-Instruct",
5
+ "generation_config": {
6
+ "bos_token_id": 128000,
7
+ "do_sample": true,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128009
11
+ ],
12
+ "max_length": 8192,
13
+ "max_new_tokens": 1,
14
+ "pad_token_id": 128001,
15
+ "stop_strings": [],
16
+ "temperature": 0.1,
17
+ "top_k": 40,
18
+ "top_p": 0.9,
19
+ "transformers_version": "4.38.2",
20
+ "trust_remote_code": [
21
+ false
22
+ ]
23
+ },
24
+ "conversation_template": {
25
+ "system_prompt": "",
26
+ "system_message_template": "",
27
+ "user_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
28
+ "bot_message_template": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>",
29
+ "bot_message_template_incomplete": "<|start_header_id|>{role}<|end_header_id|>\n\n{content}",
30
+ "user_role": "user",
31
+ "bot_role": "assistant",
32
+ "system_role": "system",
33
+ "global_prefix": "<|begin_of_text|>",
34
+ "suffix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
35
+ "add_special_tokens": false,
36
+ "eos_token": "<|eot_id|>"
37
+ },
38
+ "load_in_8bit": false,
39
+ "torch_dtype": "auto",
40
+ "use_flash_attention_2": true,
41
+ "device_map": "cuda:0",
42
+ "use_fast_tokenizer": true,
43
+ "leading_space": false,
44
+ "space_token": null,
45
+ "trust_remote_code": [
46
+ false
47
+ ],
48
+ "max_model_len": 8192
49
+ },
50
+ "task_params": {
51
+ "max_len": 4000,
52
+ "few_shot_count": 5,
53
+ "batch_size": 8,
54
+ "max_sample_per_dataset": 10000000000000,
55
+ "method": "calculate_tokens_proba"
56
+ }
57
+ }
llmtf_eval_k5_bs8/russiannlp_rucola_custom_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "russiannlp/rucola_custom",
3
+ "results": {
4
+ "acc": 0.7115177610333692,
5
+ "mcc": 0.3362227509262135
6
+ },
7
+ "leaderboard_result": 0.5238702559797913
8
+ }