quantumly commited on
Commit
eedc32e
·
verified ·
1 Parent(s): 2a3a104

v0.6 tuned + quantile + tail-correction: 2026-04-27

Browse files
Files changed (1) hide show
  1. v0_6_metadata.json +657 -0
v0_6_metadata.json ADDED
@@ -0,0 +1,657 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trained_at": "2026-04-27T07:37:26.101815+00:00",
3
+ "data_run_date": "2026-04-25",
4
+ "llm_run_date": "2026-04-26",
5
+ "version": "v0.6-tuning-quantile-stacking",
6
+ "description": "v0.5 features + Optuna-tuned hyperparams + quantile regression at 0.05/0.5/0.95 + tail-correction stacking",
7
+ "parent_version": "v0.5",
8
+ "changes_from_parent": [
9
+ "Optuna search (60 trials)",
10
+ "Quantile regression trio (q=0.05, 0.5, 0.95)",
11
+ "Tail-correction stacking layer on q=0.5 residuals"
12
+ ],
13
+ "embedders": [
14
+ "mpnet-finetuned (from v0.3)",
15
+ "BAAI/bge-base-en-v1.5"
16
+ ],
17
+ "splits": {
18
+ "train": {
19
+ "rows": 265240,
20
+ "start": "2022-01-28",
21
+ "end": "2023-09-30"
22
+ },
23
+ "val": {
24
+ "rows": 3545,
25
+ "start": "2023-10-01",
26
+ "end": "2023-12-31"
27
+ },
28
+ "test": {
29
+ "rows": 2744,
30
+ "start": "2024-01-01",
31
+ "end": "2024-05-04"
32
+ }
33
+ },
34
+ "feature_count": 212,
35
+ "feature_cols": [
36
+ "pca_000",
37
+ "pca_001",
38
+ "pca_002",
39
+ "pca_003",
40
+ "pca_004",
41
+ "pca_005",
42
+ "pca_006",
43
+ "pca_007",
44
+ "pca_008",
45
+ "pca_009",
46
+ "pca_010",
47
+ "pca_011",
48
+ "pca_012",
49
+ "pca_013",
50
+ "pca_014",
51
+ "pca_015",
52
+ "pca_016",
53
+ "pca_017",
54
+ "pca_018",
55
+ "pca_019",
56
+ "pca_020",
57
+ "pca_021",
58
+ "pca_022",
59
+ "pca_023",
60
+ "pca_024",
61
+ "pca_025",
62
+ "pca_026",
63
+ "pca_027",
64
+ "pca_028",
65
+ "pca_029",
66
+ "pca_030",
67
+ "pca_031",
68
+ "pca_032",
69
+ "pca_033",
70
+ "pca_034",
71
+ "pca_035",
72
+ "pca_036",
73
+ "pca_037",
74
+ "pca_038",
75
+ "pca_039",
76
+ "pca_040",
77
+ "pca_041",
78
+ "pca_042",
79
+ "pca_043",
80
+ "pca_044",
81
+ "pca_045",
82
+ "pca_046",
83
+ "pca_047",
84
+ "pca_048",
85
+ "pca_049",
86
+ "pca_050",
87
+ "pca_051",
88
+ "pca_052",
89
+ "pca_053",
90
+ "pca_054",
91
+ "pca_055",
92
+ "pca_056",
93
+ "pca_057",
94
+ "pca_058",
95
+ "pca_059",
96
+ "pca_060",
97
+ "pca_061",
98
+ "pca_062",
99
+ "pca_063",
100
+ "len",
101
+ "n_digits",
102
+ "n_letters",
103
+ "n_special",
104
+ "n_lower",
105
+ "n_upper",
106
+ "is_palindrome",
107
+ "is_all_digits",
108
+ "is_all_letters",
109
+ "is_ascii",
110
+ "has_unicode",
111
+ "starts_digit",
112
+ "ends_digit",
113
+ "max_char_run",
114
+ "n_unique_chars",
115
+ "in_wikipedia",
116
+ "in_geonames",
117
+ "in_us_firstname",
118
+ "in_iso3166",
119
+ "in_ticker",
120
+ "in_sec_edgar",
121
+ "in_wiktionary_en",
122
+ "wordlist_hits",
123
+ "club__logistics",
124
+ "club__prepunk_full_rankings",
125
+ "club__gamertags",
126
+ "club__natural_wonders",
127
+ "club__social_handles",
128
+ "club__mythical_creatures",
129
+ "club__top_nouns",
130
+ "club__common_english",
131
+ "club__firstnames_usa",
132
+ "club__luxury",
133
+ "club__us_government",
134
+ "club__paranormal",
135
+ "club__pokemon_gen3",
136
+ "club__top500_cities_usa",
137
+ "club__gamertags_double",
138
+ "club__historic_figures",
139
+ "club__catholicism",
140
+ "club__performing_arts",
141
+ "club__sports",
142
+ "club__crayola_classic",
143
+ "club__personas",
144
+ "club__gen_alpha",
145
+ "club__familynames_usa",
146
+ "club__crypto_terms",
147
+ "club__wikidata_top_fantasy_char",
148
+ "club__country_codes",
149
+ "club__finance_terms",
150
+ "club__fine_art",
151
+ "club__home",
152
+ "club__common_animals",
153
+ "club__conspiracy_theories",
154
+ "club__holidays",
155
+ "club__top_crypto_tickers",
156
+ "club__pokemon_gen1",
157
+ "club__pokemon_gen2",
158
+ "club__top_crypto_names",
159
+ "club__top500_cities_global",
160
+ "club__memes",
161
+ "club__currency_symbols",
162
+ "club__pokemon_gen4",
163
+ "club__us_states",
164
+ "club__currency_names",
165
+ "n_clubs",
166
+ "trademark_conflict",
167
+ "name_age_days",
168
+ "prior_transfer_count",
169
+ "fg_value",
170
+ "eth_tvl_usd",
171
+ "eth_stable_mcap",
172
+ "eth_dex_volume",
173
+ "nft_total_fee_usd",
174
+ "fame_score",
175
+ "crypto_relevance_ord",
176
+ "brand_collision_risk_ord",
177
+ "kind__concept",
178
+ "kind__random",
179
+ "kind__brand",
180
+ "kind__surname",
181
+ "kind__first_name",
182
+ "kind__abbreviation",
183
+ "kind__place",
184
+ "kind__other",
185
+ "kind__unknown",
186
+ "origin__english",
187
+ "origin__none",
188
+ "origin__mixed",
189
+ "origin__spanish",
190
+ "origin__german",
191
+ "origin__french",
192
+ "origin__japanese",
193
+ "origin__chinese",
194
+ "origin__italian",
195
+ "origin__slavic",
196
+ "origin__korean",
197
+ "origin__arabic",
198
+ "origin__other",
199
+ "origin__unknown",
200
+ "desc_pca_000",
201
+ "desc_pca_001",
202
+ "desc_pca_002",
203
+ "desc_pca_003",
204
+ "desc_pca_004",
205
+ "desc_pca_005",
206
+ "desc_pca_006",
207
+ "desc_pca_007",
208
+ "desc_pca_008",
209
+ "desc_pca_009",
210
+ "desc_pca_010",
211
+ "desc_pca_011",
212
+ "desc_pca_012",
213
+ "desc_pca_013",
214
+ "desc_pca_014",
215
+ "desc_pca_015",
216
+ "desc_pca_016",
217
+ "desc_pca_017",
218
+ "desc_pca_018",
219
+ "desc_pca_019",
220
+ "desc_pca_020",
221
+ "desc_pca_021",
222
+ "desc_pca_022",
223
+ "desc_pca_023",
224
+ "desc_pca_024",
225
+ "desc_pca_025",
226
+ "desc_pca_026",
227
+ "desc_pca_027",
228
+ "desc_pca_028",
229
+ "desc_pca_029",
230
+ "desc_pca_030",
231
+ "desc_pca_031",
232
+ "knnmp_count",
233
+ "knnmp_mean_log",
234
+ "knnmp_median_log",
235
+ "knnmp_p90_log",
236
+ "knnmp_max_sim",
237
+ "knnmp_min_sim",
238
+ "knnmp_log_max",
239
+ "knnmp_log_min",
240
+ "knnbg_count",
241
+ "knnbg_mean_log",
242
+ "knnbg_median_log",
243
+ "knnbg_p90_log",
244
+ "knnbg_max_sim",
245
+ "knnbg_min_sim",
246
+ "knnbg_log_max",
247
+ "knnbg_log_min"
248
+ ],
249
+ "pca_dim_concat": 64,
250
+ "pca_dim_description": 32,
251
+ "name_kind_values": [
252
+ "concept",
253
+ "random",
254
+ "brand",
255
+ "surname",
256
+ "first_name",
257
+ "abbreviation",
258
+ "place",
259
+ "other",
260
+ "unknown"
261
+ ],
262
+ "cultural_origin_values": [
263
+ "english",
264
+ "none",
265
+ "mixed",
266
+ "spanish",
267
+ "german",
268
+ "french",
269
+ "japanese",
270
+ "chinese",
271
+ "italian",
272
+ "slavic",
273
+ "korean",
274
+ "arabic",
275
+ "other",
276
+ "unknown"
277
+ ],
278
+ "best_xgb_params": {
279
+ "tree_method": "hist",
280
+ "device": "cuda",
281
+ "seed": 42,
282
+ "max_depth": 12,
283
+ "learning_rate": 0.007564423378912868,
284
+ "subsample": 0.7048816055233034,
285
+ "colsample_bytree": 0.8377589264565862,
286
+ "colsample_bylevel": 0.4011055562046537,
287
+ "min_child_weight": 7,
288
+ "reg_alpha": 2.609660530404674,
289
+ "reg_lambda": 1.946648297232411,
290
+ "gamma": 2.3669086866916134
291
+ },
292
+ "optuna": {
293
+ "n_trials": 60,
294
+ "best_val_rmse": 1.0080678462982178,
295
+ "best_params": {
296
+ "max_depth": 12,
297
+ "learning_rate": 0.007564423378912868,
298
+ "subsample": 0.7048816055233034,
299
+ "colsample_bytree": 0.8377589264565862,
300
+ "colsample_bylevel": 0.4011055562046537,
301
+ "min_child_weight": 7,
302
+ "reg_alpha": 2.609660530404674,
303
+ "reg_lambda": 1.946648297232411,
304
+ "gamma": 2.3669086866916134
305
+ }
306
+ },
307
+ "quantile_models": {
308
+ "q05": {
309
+ "best_iteration": 798,
310
+ "best_val_rmse": 1.9434242115149494
311
+ },
312
+ "q50": {
313
+ "best_iteration": 2262,
314
+ "best_val_rmse": 1.0197268625497296
315
+ },
316
+ "q95": {
317
+ "best_iteration": 1773,
318
+ "best_val_rmse": 2.2088471640402845
319
+ }
320
+ },
321
+ "tail_correction": {
322
+ "best_iteration": 795,
323
+ "best_val_rmse": 1.0014360745042261,
324
+ "cv_folds": 3,
325
+ "fold_metrics": [
326
+ {
327
+ "fold": 0,
328
+ "best_iter": 2256
329
+ },
330
+ {
331
+ "fold": 1,
332
+ "best_iter": 2261
333
+ },
334
+ {
335
+ "fold": 2,
336
+ "best_iter": 2261
337
+ }
338
+ ]
339
+ },
340
+ "metrics": {
341
+ "final": {
342
+ "train": {
343
+ "r2_log": 0.8784301280975342,
344
+ "rmse_log": 0.5630198121070862,
345
+ "mae_log": 0.34918516874313354,
346
+ "median_ape": 0.1952281892299652,
347
+ "bias_log": 0.0008572799852117896
348
+ },
349
+ "val": {
350
+ "r2_log": 0.7011631727218628,
351
+ "rmse_log": 1.0014359951019287,
352
+ "mae_log": 0.687095046043396,
353
+ "median_ape": 0.4087112247943878,
354
+ "bias_log": 0.02248295769095421
355
+ },
356
+ "test": {
357
+ "r2_log": 0.45456844568252563,
358
+ "rmse_log": 1.3734359741210938,
359
+ "mae_log": 1.1084247827529907,
360
+ "median_ape": 0.9863521456718445,
361
+ "bias_log": 0.5754767060279846
362
+ }
363
+ },
364
+ "base": {
365
+ "train": {
366
+ "r2_log": 0.8039833307266235,
367
+ "rmse_log": 0.7149196863174438,
368
+ "mae_log": 0.4110703468322754,
369
+ "median_ape": 0.19649738073349,
370
+ "bias_log": -0.07636234164237976
371
+ },
372
+ "val": {
373
+ "r2_log": 0.6901469230651855,
374
+ "rmse_log": 1.019727349281311,
375
+ "mae_log": 0.6950583457946777,
376
+ "median_ape": 0.40584105253219604,
377
+ "bias_log": -0.040105946362018585
378
+ },
379
+ "test": {
380
+ "r2_log": 0.48208510875701904,
381
+ "rmse_log": 1.3383431434631348,
382
+ "mae_log": 1.0609599351882935,
383
+ "median_ape": 0.9018900394439697,
384
+ "bias_log": 0.4112372100353241
385
+ }
386
+ },
387
+ "coverage": {
388
+ "train": {
389
+ "coverage_90pct": 0.9052216860202081,
390
+ "median_interval_log": 2.52069091796875,
391
+ "median_interval_ratio": 12.437186771527003
392
+ },
393
+ "val": {
394
+ "coverage_90pct": 0.7870239774330042,
395
+ "median_interval_log": 3.475064992904663,
396
+ "median_interval_ratio": 32.299927734517325
397
+ },
398
+ "test": {
399
+ "coverage_90pct": 0.642128279883382,
400
+ "median_interval_log": 3.9815444946289062,
401
+ "median_interval_ratio": 53.5997548637558
402
+ }
403
+ }
404
+ },
405
+ "top_features_base": [
406
+ {
407
+ "name": "knnmp_mean_log",
408
+ "gain": 79.17141723632812
409
+ },
410
+ {
411
+ "name": "knnmp_median_log",
412
+ "gain": 69.66679382324219
413
+ },
414
+ {
415
+ "name": "len",
416
+ "gain": 59.86244201660156
417
+ },
418
+ {
419
+ "name": "knnmp_p90_log",
420
+ "gain": 42.315155029296875
421
+ },
422
+ {
423
+ "name": "is_all_digits",
424
+ "gain": 37.01343536376953
425
+ },
426
+ {
427
+ "name": "knnmp_log_min",
428
+ "gain": 35.15000534057617
429
+ },
430
+ {
431
+ "name": "knnmp_count",
432
+ "gain": 33.0050048828125
433
+ },
434
+ {
435
+ "name": "pca_004",
436
+ "gain": 29.43238639831543
437
+ },
438
+ {
439
+ "name": "ends_digit",
440
+ "gain": 23.06843376159668
441
+ },
442
+ {
443
+ "name": "knnmp_log_max",
444
+ "gain": 22.34497833251953
445
+ },
446
+ {
447
+ "name": "pca_002",
448
+ "gain": 21.7791690826416
449
+ },
450
+ {
451
+ "name": "origin__chinese",
452
+ "gain": 21.658403396606445
453
+ },
454
+ {
455
+ "name": "in_wikipedia",
456
+ "gain": 20.88755989074707
457
+ },
458
+ {
459
+ "name": "n_digits",
460
+ "gain": 20.82081413269043
461
+ },
462
+ {
463
+ "name": "eth_stable_mcap",
464
+ "gain": 20.470684051513672
465
+ },
466
+ {
467
+ "name": "brand_collision_risk_ord",
468
+ "gain": 19.85502052307129
469
+ },
470
+ {
471
+ "name": "kind__random",
472
+ "gain": 19.53866958618164
473
+ },
474
+ {
475
+ "name": "n_unique_chars",
476
+ "gain": 18.933242797851562
477
+ },
478
+ {
479
+ "name": "eth_tvl_usd",
480
+ "gain": 18.808229446411133
481
+ },
482
+ {
483
+ "name": "knnbg_count",
484
+ "gain": 18.738136291503906
485
+ },
486
+ {
487
+ "name": "name_age_days",
488
+ "gain": 18.64664077758789
489
+ },
490
+ {
491
+ "name": "starts_digit",
492
+ "gain": 17.717424392700195
493
+ },
494
+ {
495
+ "name": "origin__none",
496
+ "gain": 17.592206954956055
497
+ },
498
+ {
499
+ "name": "n_letters",
500
+ "gain": 17.253936767578125
501
+ },
502
+ {
503
+ "name": "is_palindrome",
504
+ "gain": 16.289718627929688
505
+ },
506
+ {
507
+ "name": "origin__arabic",
508
+ "gain": 15.088671684265137
509
+ },
510
+ {
511
+ "name": "kind__abbreviation",
512
+ "gain": 15.046517372131348
513
+ },
514
+ {
515
+ "name": "n_special",
516
+ "gain": 14.652754783630371
517
+ },
518
+ {
519
+ "name": "n_lower",
520
+ "gain": 14.477858543395996
521
+ },
522
+ {
523
+ "name": "desc_pca_000",
524
+ "gain": 13.754626274108887
525
+ }
526
+ ],
527
+ "top_features_tail": [
528
+ {
529
+ "name": "in_iso3166",
530
+ "gain": 20.713716506958008
531
+ },
532
+ {
533
+ "name": "prior_transfer_count",
534
+ "gain": 19.949108123779297
535
+ },
536
+ {
537
+ "name": "n_lower",
538
+ "gain": 19.065231323242188
539
+ },
540
+ {
541
+ "name": "brand_collision_risk_ord",
542
+ "gain": 19.003070831298828
543
+ },
544
+ {
545
+ "name": "n_letters",
546
+ "gain": 16.631563186645508
547
+ },
548
+ {
549
+ "name": "in_wiktionary_en",
550
+ "gain": 15.533679962158203
551
+ },
552
+ {
553
+ "name": "club__crypto_terms",
554
+ "gain": 15.453104972839355
555
+ },
556
+ {
557
+ "name": "fame_score",
558
+ "gain": 15.42578125
559
+ },
560
+ {
561
+ "name": "has_unicode",
562
+ "gain": 14.748347282409668
563
+ },
564
+ {
565
+ "name": "n_clubs",
566
+ "gain": 14.500151634216309
567
+ },
568
+ {
569
+ "name": "club__gamertags_double",
570
+ "gain": 14.302302360534668
571
+ },
572
+ {
573
+ "name": "is_all_letters",
574
+ "gain": 14.27059555053711
575
+ },
576
+ {
577
+ "name": "origin__english",
578
+ "gain": 14.242754936218262
579
+ },
580
+ {
581
+ "name": "crypto_relevance_ord",
582
+ "gain": 14.161867141723633
583
+ },
584
+ {
585
+ "name": "trademark_conflict",
586
+ "gain": 13.951108932495117
587
+ },
588
+ {
589
+ "name": "club__currency_symbols",
590
+ "gain": 13.67905330657959
591
+ },
592
+ {
593
+ "name": "n_upper",
594
+ "gain": 13.592854499816895
595
+ },
596
+ {
597
+ "name": "name_age_days",
598
+ "gain": 13.328607559204102
599
+ },
600
+ {
601
+ "name": "ends_digit",
602
+ "gain": 13.104514122009277
603
+ },
604
+ {
605
+ "name": "wordlist_hits",
606
+ "gain": 12.854232788085938
607
+ },
608
+ {
609
+ "name": "starts_digit",
610
+ "gain": 12.26870059967041
611
+ },
612
+ {
613
+ "name": "is_ascii",
614
+ "gain": 12.160033226013184
615
+ },
616
+ {
617
+ "name": "kind__random",
618
+ "gain": 11.966256141662598
619
+ },
620
+ {
621
+ "name": "club__prepunk_full_rankings",
622
+ "gain": 11.913127899169922
623
+ },
624
+ {
625
+ "name": "max_char_run",
626
+ "gain": 11.784753799438477
627
+ },
628
+ {
629
+ "name": "n_unique_chars",
630
+ "gain": 11.777941703796387
631
+ },
632
+ {
633
+ "name": "eth_stable_mcap",
634
+ "gain": 11.768474578857422
635
+ },
636
+ {
637
+ "name": "origin__italian",
638
+ "gain": 11.581886291503906
639
+ },
640
+ {
641
+ "name": "in_geonames",
642
+ "gain": 11.519492149353027
643
+ },
644
+ {
645
+ "name": "n_special",
646
+ "gain": 11.42654800415039
647
+ }
648
+ ],
649
+ "inference_recipe": {
650
+ "description": "Inference combines 4 models",
651
+ "point_estimate": "final_log = q50_model(features) + tail_model(features)",
652
+ "lower_bound_90pct": "low_log = q05_model(features)",
653
+ "upper_bound_90pct": "high_log = q95_model(features)",
654
+ "output_usd": "np.exp(final_log)"
655
+ },
656
+ "wandb_run": "https://wandb.ai/quantumly-aletheia-research/ens-appraiser/runs/i35wokpu"
657
+ }