emsesc commited on
Commit
927a4de
·
1 Parent(s): 0b575f7

cleanup app.py into modularized components

Browse files
Files changed (6) hide show
  1. app.py +60 -1011
  2. config.py +56 -0
  3. data_utils.py +237 -0
  4. graphs/leaderboard.py +156 -293
  5. helpers.py +90 -0
  6. layout_components.py +603 -0
app.py CHANGED
@@ -1,94 +1,26 @@
1
  from dash import Dash, html, dcc, Input, Output, State
2
  import pandas as pd
3
  import dash_mantine_components as dmc
4
- import duckdb
5
  import time
 
 
 
6
  from graphs.leaderboard import (
7
- button_style,
8
- get_top_n_leaderboard,
9
- render_table_content,
10
- build_leaderboard_query,
 
 
 
11
  )
12
- from dash_iconify import DashIconify
13
 
14
  # Initialize the app
15
  app = Dash(suppress_callback_exceptions=True) # suppress callback exceptions for multi-page layout
16
  server = app.server
17
 
18
-
19
- # Add dataset URLs (used by the helper to create views)
20
- HF_DATASET_ID = "mmpr/open_model_evolution_data"
21
- hf_parquet_url_1 = "https://huggingface.co/datasets/emsesc/open_model_evolution_data/resolve/main/all_downloads_with_annotations.parquet"
22
- hf_parquet_url_2 = "https://huggingface.co/datasets/emsesc/open_model_evolution_data/resolve/main/one_year_rolling.parquet"
23
-
24
- # Helper: create a fresh in-memory DuckDB connection and (re)create parquet-backed views.
25
- def create_fresh_duckdb_with_views():
26
- """
27
- Returns a fresh in-memory DuckDB connection with httpfs enabled and the
28
- all_downloads / one_year_rolling views created from the remote parquet URLs.
29
- Caller must close the returned connection.
30
- """
31
- local_con = duckdb.connect(database=":memory:", read_only=False)
32
- try:
33
- # try to install/load httpfs if necessary; ignore errors if preinstalled
34
- try:
35
- local_con.execute("INSTALL httpfs;")
36
- local_con.execute("LOAD httpfs;")
37
- except Exception:
38
- pass
39
-
40
- # keep HF Spaces behavior consistent
41
- try:
42
- local_con.execute("SET enable_http_metadata_cache = false;")
43
- local_con.execute("SET enable_object_cache = false;")
44
- except Exception:
45
- pass
46
-
47
- # create views referencing remote parquet files
48
- local_con.execute(f"""
49
- CREATE OR REPLACE VIEW all_downloads AS
50
- SELECT * FROM read_parquet('{hf_parquet_url_1}')
51
- """)
52
- local_con.execute(f"""
53
- CREATE OR REPLACE VIEW one_year_rolling AS
54
- SELECT * FROM read_parquet('{hf_parquet_url_2}')
55
- """)
56
- except Exception:
57
- # If view creation fails, ensure connection is still returned for caller to handle/close
58
- pass
59
- return local_con
60
-
61
- # Query for most recent date in all_downloads
62
- def get_last_updated():
63
- try:
64
- conn = create_fresh_duckdb_with_views()
65
- try:
66
- result = conn.execute("SELECT MAX(time) as max_time FROM all_downloads").fetchdf()
67
- finally:
68
- conn.close()
69
-
70
- max_time = result["max_time"].iloc[0]
71
- if pd.isnull(max_time):
72
- return "N/A"
73
- dt = pd.to_datetime(max_time)
74
- return dt.strftime("%b %d, %Y")
75
- except Exception:
76
- return "N/A"
77
-
78
-
79
- # DuckDB connection (global)
80
- con = duckdb.connect(database=":memory:", read_only=False)
81
-
82
- # disable all caching so HF Spaces always read latest parquet
83
- con.execute("SET enable_http_metadata_cache = false;")
84
- con.execute("SET enable_object_cache = false;")
85
-
86
- # Load parquet files from Hugging Face using DuckDB
87
- HF_DATASET_ID = "mmpr/open_model_evolution_data"
88
- hf_parquet_url_1 = "https://huggingface.co/datasets/mmpr/open_model_evolution_data/resolve/main/all_downloads_with_annotations.parquet"
89
- hf_parquet_url_2 = "https://huggingface.co/datasets/mmpr/open_model_evolution_data/resolve/main/one_year_rolling.parquet"
90
-
91
- print(f"Attempting to connect to dataset from Hugging Face Hub: {HF_DATASET_ID}")
92
  try:
93
  overall_start_time = time.time()
94
 
@@ -111,153 +43,27 @@ except Exception as e:
111
  # Create a dcc slider for time range selection by year (readable marks)
112
  start_ts = int(start_dt.timestamp())
113
  end_ts = int(end_dt.timestamp())
114
-
115
-
116
- def ordinal(n):
117
- # Helper to get ordinal suffix for a day
118
- if 10 <= n % 100 <= 20:
119
- suffix = "th"
120
- else:
121
- suffix = {1: "st", 2: "nd", 3: "rd"}.get(n % 10, "th")
122
- return f"{n}{suffix}"
123
-
124
-
125
- def format_date(dt):
126
- # Format date as "Oct 8th, 2025"
127
- return dt.strftime("%b") + f" {ordinal(dt.day)}, {dt.year}"
128
-
129
-
130
- marks = []
131
- # Add start label (e.g. "Jan 2020")
132
- marks.append({"value": start_ts, "label": start_dt.strftime("%b %Y")})
133
- # Add yearly marks between start and end (e.g. "2021", "2022")
134
- # for yr in range(start_dt.year, end_dt.year + 1):
135
- # yr_ts = int(pd.Timestamp(year=yr, month=1, day=1).timestamp())
136
- # start_yr = int(pd.Timestamp(year=start_dt.year, month=1, day=1).timestamp())
137
- # if yr_ts != start_yr and yr_ts != end_ts:
138
- # marks.append({"value": yr_ts, "label": str(yr)})
139
- # Add end label (e.g. "Dec 2024")
140
- marks.append({"value": end_ts, "label": end_dt.strftime("%b %Y")})
141
-
142
-
143
- def get_thumb_labels(values):
144
- # Returns formatted labels for both thumbs
145
- distance = abs(values[1] - values[0])
146
- close = distance < 4 * 30 * 86400 # 4 months
147
-
148
- label_style = {
149
- "background": "#fff",
150
- "color": "#082030",
151
- "fontWeight": "bold",
152
- "fontSize": "13px",
153
- "borderRadius": "8px",
154
- "padding": "2px 8px",
155
- "boxShadow": "0 1px 4px rgba(8,32,48,0.10)",
156
- "position": "absolute",
157
- "left": "50%",
158
- "transform": "translateX(-50%)",
159
- "whiteSpace": "nowrap",
160
- "zIndex": 100,
161
- }
162
-
163
- if close:
164
- # Move first label above, second label below (closer to slider)
165
- style_top_1 = label_style.copy()
166
- style_top_1["top"] = "-38px"
167
- style_top_2 = label_style.copy()
168
- style_top_2["top"] = "14px"
169
- return [
170
- html.Div(
171
- format_date(pd.to_datetime(values[0], unit="s")),
172
- style=style_top_1,
173
- ),
174
- html.Div(
175
- format_date(pd.to_datetime(values[1], unit="s")),
176
- style=style_top_2,
177
- ),
178
- ]
179
- else:
180
- # Both labels below the slider (closer to slider)
181
- style_top_1 = label_style.copy()
182
- style_top_1["top"] = "14px"
183
- style_top_2 = label_style.copy()
184
- style_top_2["top"] = "14px"
185
- return [
186
- html.Div(
187
- format_date(pd.to_datetime(values[0], unit="s")),
188
- style=style_top_1,
189
- ),
190
- html.Div(
191
- format_date(pd.to_datetime(values[1], unit="s")),
192
- style=style_top_2,
193
- ),
194
- ]
195
-
196
-
197
- # Create a dcc slider for time range selection by year
198
- time_slider = dmc.RangeSlider(
199
- id="time-slider",
200
- min=start_ts,
201
- max=end_ts,
202
- value=[
203
- start_ts,
204
- end_ts,
205
- ],
206
- step=24 * 60 * 60,
207
- color="#AC482A",
208
- size="md",
209
- radius="xl",
210
- marks=marks,
211
- style={"width": "95%", "paddingLeft": "60px"}, # updated paddingLeft
212
- label=None,
213
- showLabelOnHover=False,
214
- labelTransitionProps={"transition": "fade", "duration": 150},
215
- thumbChildren=get_thumb_labels([start_ts, end_ts]),
216
  )
217
 
218
- # Create a dcc slider for single date selection (all-time mode)
219
- def get_thumb_label_single(value):
220
- """Returns formatted label for single thumb"""
221
- label_style = {
222
- "background": "#fff",
223
- "color": "#082030",
224
- "fontWeight": "bold",
225
- "fontSize": "13px",
226
- "borderRadius": "8px",
227
- "padding": "2px 8px",
228
- "boxShadow": "0 1px 4px rgba(8,32,48,0.10)",
229
- "position": "absolute",
230
- "left": "50%",
231
- "transform": "translateX(-50%)",
232
- "whiteSpace": "nowrap",
233
- "zIndex": 100,
234
- "top": "14px",
235
- }
236
- return [
237
- html.Div(
238
- format_date(pd.to_datetime(value, unit="s")),
239
- style=label_style,
240
- )
241
- ]
242
-
243
- time_slider_alltime = dmc.Slider(
244
- id="time-slider-alltime",
245
- min=start_ts,
246
- max=end_ts,
247
- value=end_ts,
248
- step=24 * 60 * 60,
249
- color="#AC482A",
250
- size="md",
251
- radius="xl",
252
- marks=marks,
253
- style={"width": "95%", "paddingLeft": "60px"},
254
- label=None,
255
- showLabelOnHover=False,
256
- labelTransitionProps={"transition": "fade", "duration": 150},
257
- thumbChildren=get_thumb_label_single(end_ts),
258
  )
259
 
260
- # Add a dcc.Store to hold the selected view (all_downloads or one_year_rolling)
 
 
261
  app.layout = dmc.MantineProvider(
262
  theme={
263
  "colorScheme": "light",
@@ -269,748 +75,15 @@ app.layout = dmc.MantineProvider(
269
  dcc.Store(id="model-attribution-type", data="uploader"),
270
  dcc.Store(id="time-slider-value", data=[start_ts, end_ts]),
271
  dcc.Store(id="time-slider-alltime-value", data=end_ts),
272
- html.Div(
273
- [
274
- # Header
275
- html.Div(
276
- [
277
- html.Div(
278
- [
279
- html.Span(
280
- [
281
- html.Span(
282
- className="live-dot",
283
- ),
284
- html.Span(
285
- "LIVE",
286
- className="live-label",
287
- ),
288
- ],
289
- className="live-row",
290
- ),
291
- html.Span(
292
- f"Last updated: {get_last_updated()}",
293
- className="last-updated",
294
- ),
295
- ],
296
- className="header-status-row",
297
- ),
298
- html.Div(
299
- [
300
- html.A(
301
- children=[
302
- html.Img(
303
- src="assets/images/dpi.svg",
304
- className="header-logo-img",
305
- ),
306
- "Data Provenance Initiative",
307
- ],
308
- href="https://www.dataprovenance.org/",
309
- target="_blank",
310
- className="no-bg-link header-link",
311
- ),
312
- html.A(
313
- children=[
314
- html.Img(
315
- src="assets/images/hf.svg",
316
- className="header-logo-img",
317
- ),
318
- html.Span(
319
- "Hugging Face",
320
- className="hf-brand-text",
321
- ),
322
- ],
323
- href="https://huggingface.co/",
324
- target="_blank",
325
- className="no-bg-link header-link",
326
- ),
327
- html.A(
328
- children=[
329
- html.Span(
330
- "Read the paper",
331
- className="paper-text",
332
- ),
333
- ],
334
- href="https://arxiv.org/abs/2512.03073",
335
- target="_blank",
336
- className="no-bg-link header-link paper-link",
337
- ),
338
- ],
339
- className="header-links-row",
340
- ),
341
- ],
342
- style={
343
- "display": "flex",
344
- "justifyContent": "space-between",
345
- "alignItems": "center",
346
- "padding": "18px 24px",
347
- "gap": "24px",
348
- "backgroundColor": "#082030", # restored dark background
349
- },
350
- className="responsive-header", # <-- add class
351
- ),
352
- html.Div(
353
- children=[
354
- dmc.Alert(
355
- # add an icon to the alert
356
- icon=DashIconify(
357
- icon="mdi:information-outline",
358
- width=18,
359
- height=18,
360
- style={"color": "#1A5F8D"},
361
- ),
362
- children=[
363
- "Note: This dashboard uses ",
364
- html.A(
365
- "public Hugging Face",
366
- href="https://huggingface.co/datasets/hfmlsoc/hub_weekly_snapshots",
367
- target="_blank",
368
- style={
369
- "color": "#1A5F8D",
370
- "fontWeight": "bold",
371
- "textDecoration": "underline",
372
- },
373
- ),
374
- " download data, which is less precise than data analyzed in the paper.",
375
- ],
376
- color="blue",
377
- radius="md",
378
- variant="light",
379
- withCloseButton=True,
380
- style={
381
- "marginTop": "16px",
382
- "marginBottom": "8px",
383
- "fontSize": "15px",
384
- "fontWeight": "500",
385
- "marginLeft": "auto",
386
- "marginRight": "auto",
387
- },
388
- ),
389
- html.Span(
390
- "The Open Model Leaderboard",
391
- style={
392
- "fontSize": 40,
393
- "fontWeight": "700",
394
- "textAlign": "center",
395
- "marginTop": "20px",
396
- "marginBottom": "20px",
397
- },
398
- ),
399
- ],
400
- style={
401
- "display": "flex",
402
- "flexDirection": "column",
403
- "alignItems": "center",
404
- "justifyContent": "center",
405
- "gap": "12px",
406
- "marginTop": "20px",
407
- "marginBottom": "20px",
408
- },
409
- className="responsive-title-row", # <-- add class
410
- ),
411
- html.Div(
412
- children=[
413
- "This leaderboard assesses concentrations of power in the open model ecosystem through ranking user downloads across three groups: countries, developers, and models. Explore how user downloads of models are distributed among these groups and identify key players shaping the open model ecosystem on Hugging Face. This dashboard accompanies the paper titled ",
414
- html.A(
415
- "Economies of Open Intelligence: Tracing Power & Participation in the Model Ecosystem.",
416
- href="https://arxiv.org/abs/2512.03073",
417
- target="_blank",
418
- style={
419
- "color": "#AC482A",
420
- "fontWeight": "700",
421
- "textDecoration": "underline",
422
- },
423
- ),
424
- ],
425
- style={
426
- "fontSize": 14,
427
- "marginTop": 18,
428
- "marginBottom": 12,
429
- "marginLeft": 100,
430
- "marginRight": 100,
431
- "textAlign": "center",
432
- },
433
- className="responsive-intro", # <-- add class
434
- ),
435
- html.Div(
436
- children=[
437
- html.Div(
438
- [
439
- html.Div(
440
- html.Span(
441
- [
442
- "Download View",
443
- dmc.HoverCard(
444
- width=260,
445
- shadow="md",
446
- position="top",
447
- children=[
448
- dmc.HoverCardTarget(
449
- html.Span(
450
- DashIconify(
451
- icon="mdi:information-outline",
452
- width=16,
453
- height=16,
454
- style={
455
- "marginLeft": "6px",
456
- "color": "#AC482A",
457
- "verticalAlign": "middle",
458
- },
459
- ),
460
- style={"cursor": "pointer"},
461
- )
462
- ),
463
- dmc.HoverCardDropdown(
464
- dmc.Text(
465
- "We believe this filter isolates more authentic usage, mitigating the impact of automatic software downloads for older models.",
466
- size="sm",
467
- style={"maxWidth": "240px"},
468
- )
469
- ),
470
- ],
471
- ),
472
- ],
473
- className="filter-label-row",
474
- ),
475
- className="filter-label-container",
476
- ),
477
- html.Div(
478
- [
479
- dmc.SegmentedControl(
480
- id="segmented",
481
- value="all-downloads",
482
- color="#AC482A",
483
- transitionDuration=200,
484
- data=[
485
- {
486
- "value": "all-downloads",
487
- "label": "All Downloads",
488
- },
489
- {
490
- "value": "filtered-downloads",
491
- "label": html.Span(
492
- ["Filtered Downloads"]
493
- ),
494
- },
495
- ],
496
- mb=10,
497
- ),
498
- ],
499
- className="filter-segmented-row",
500
- ),
501
- html.Div(
502
- "Choose whether to count all downloads, or only downloads up to one year from model creation.",
503
- className="filter-description",
504
- ),
505
- html.Div(
506
- [
507
- html.Div(
508
- "Model Attribution",
509
- className="filter-label",
510
- ),
511
- dmc.SegmentedControl(
512
- id="model-attribution-segmented",
513
- value="uploader",
514
- color="#AC482A",
515
- transitionDuration=200,
516
- data=[
517
- {
518
- "value": "uploader",
519
- "label": "Model Uploader",
520
- },
521
- {
522
- "value": "original_creator",
523
- "label": "Original Model Creator",
524
- },
525
- ],
526
- mb=10,
527
- ),
528
- html.Div(
529
- "Toggle between having downloads attributed to the account that uploaded the model, or the account that uploaded the model that this was originally derived from.",
530
- className="filter-description",
531
- ),
532
- ],
533
- style={"marginTop": "10px"},
534
- ),
535
- html.Span(
536
- id="global-toggle-status",
537
- className="global-toggle-status",
538
- ),
539
- ],
540
- className="main-content-left",
541
- ),
542
- html.Div(
543
- [
544
- html.Div(
545
- [
546
- html.Span(
547
- "Download Date Range",
548
- className="filter-label",
549
- ),
550
- dmc.HoverCard(
551
- width=260,
552
- shadow="md",
553
- position="top",
554
- children=[
555
- dmc.HoverCardTarget(
556
- html.Span(
557
- DashIconify(
558
- icon="mdi:information-outline",
559
- width=16,
560
- height=16,
561
- style={
562
- "marginLeft": "6px",
563
- "color": "#AC482A",
564
- "verticalAlign": "middle",
565
- },
566
- ),
567
- style={"cursor": "pointer"},
568
- )
569
- ),
570
- dmc.HoverCardDropdown(
571
- dmc.Text(
572
- "Toggle between viewing downloads between a date range or all-time downloads at a single date.",
573
- size="sm",
574
- style={"maxWidth": "240px"},
575
- )
576
- ),
577
- ],
578
- ),
579
- ],
580
- className="filter-label-row",
581
- ),
582
- dmc.Switch(
583
- id="time-range-toggle",
584
- label="All-time",
585
- checked=False,
586
- color="#AC482A",
587
- style={"marginBottom": "12px"},
588
- ),
589
- dcc.Loading(
590
- id="loading-slider",
591
- type="circle",
592
- color="#AC482A",
593
- children=html.Div(
594
- id="slider-container",
595
- children=[time_slider],
596
- ),
597
- ),
598
- html.Div(
599
- id="slider-description",
600
- children="Adjust the time range to filter leaderboard results by when models were downloaded by users.",
601
- className="filter-description filter-description-margin",
602
- ),
603
- html.Div(
604
- [
605
- html.Div(
606
- [
607
- DashIconify(
608
- icon="mdi:lightbulb-on-outline",
609
- width=20,
610
- height=20,
611
- style={
612
- "marginRight": "8px",
613
- "color": "#082030",
614
- },
615
- ),
616
- html.Span("Tip"),
617
- ],
618
- className="tip-title",
619
- ),
620
- html.Div(
621
- [
622
- "Try switching between ",
623
- html.Span(
624
- "All Downloads",
625
- className="tip-highlight",
626
- ),
627
- " and ",
628
- html.Span(
629
- "Filtered Downloads",
630
- className="tip-highlight",
631
- ),
632
- " to compare net popularity (but many duplicate, unused downloads) versus more immediate interest as models are released. ",
633
- "You can also toggle between ",
634
- html.Span(
635
- "Model Uploader",
636
- className="tip-highlight",
637
- ),
638
- " and ",
639
- html.Span(
640
- "Original Model Creator",
641
- className="tip-highlight",
642
- ),
643
- " to see how attribution affects perceived popularity.",
644
- ],
645
- className="tip-description",
646
- ),
647
- ],
648
- className="tip-section",
649
- ),
650
- ],
651
- className="main-content-right",
652
- ),
653
- ],
654
- style={
655
- "display": "flex",
656
- "gap": "24px",
657
- "padding": "32px",
658
- "alignItems": "flex-start",
659
- "marginLeft": "100px",
660
- "marginRight": "100px",
661
- "backgroundColor": "#FFFBF9",
662
- "borderRadius": "18px",
663
- },
664
- className="responsive-main-content", # <-- add class
665
- ),
666
- html.Div(
667
- [
668
- dcc.Tabs(
669
- id="leaderboard-tabs",
670
- value="Countries",
671
- children=[
672
- dcc.Tab(
673
- label="Countries",
674
- value="Countries",
675
- style={
676
- "backgroundColor": "transparent",
677
- "border": "none",
678
- "padding": "10px 18px",
679
- "color": "#6B7280",
680
- "fontWeight": "500",
681
- },
682
- selected_style={
683
- "backgroundColor": "transparent",
684
- "border": "none",
685
- "padding": "10px 18px",
686
- "fontWeight": "700",
687
- "borderBottom": "3px solid #082030",
688
- },
689
- children=[
690
- html.Div(
691
- children=[
692
- "The country leaderboard shows how downloads are distributed across different nations, highlighting which countries are leading in model usage and adoption. The metadata includes the ",
693
- html.Span(
694
- "country", className="meta-var"
695
- ),
696
- " and number of ",
697
- html.Span(
698
- "user downloads",
699
- className="meta-var",
700
- ),
701
- ".",
702
- ],
703
- className="tab-description",
704
- ),
705
- html.Div(
706
- dcc.Loading(
707
- id="loading-countries",
708
- type="circle",
709
- color="#AC482A",
710
- children=html.Div(
711
- id="top_countries-table"
712
- ),
713
- ),
714
- className="responsive-table-wrapper", # <-- add wrapper for scroll
715
- ),
716
- html.Button(
717
- id="top_countries-toggle",
718
- children="▼ Show Top 50",
719
- n_clicks=0,
720
- style={**button_style, "border": "none"},
721
- ),
722
- ],
723
- ),
724
- dcc.Tab(
725
- label="Developers",
726
- value="Developers",
727
- style={
728
- "backgroundColor": "transparent",
729
- "border": "none",
730
- "padding": "10px 18px",
731
- "color": "#6B7280",
732
- "fontWeight": "500",
733
- },
734
- selected_style={
735
- "backgroundColor": "transparent",
736
- "border": "none",
737
- "padding": "10px 18px",
738
- "fontWeight": "700",
739
- "borderBottom": "3px solid #082030",
740
- },
741
- children=[
742
- html.Div(
743
- children=[
744
- "The developer leaderboard highlights the most influential model creators on Hugging Face, showcasing which developers have garnered the highest download counts for their models. The metadata includes the ",
745
- html.Span(
746
- "developer", className="meta-var"
747
- ),
748
- ", number of ",
749
- html.Span(
750
- "user downloads",
751
- className="meta-var",
752
- ),
753
- ", and ",
754
- html.Span(
755
- "country", className="meta-var"
756
- ),
757
- ".",
758
- ],
759
- className="tab-description",
760
- ),
761
- html.Div(
762
- dcc.Loading(
763
- id="loading-developers",
764
- type="circle",
765
- color="#AC482A",
766
- children=html.Div(
767
- id="top_developers-table"
768
- ),
769
- ),
770
- className="responsive-table-wrapper",
771
- ),
772
- html.Button(
773
- id="top_developers-toggle",
774
- children="▼ Show Top 50",
775
- n_clicks=0,
776
- style={**button_style, "border": "none"},
777
- ),
778
- ],
779
- ),
780
- dcc.Tab(
781
- label="Models",
782
- value="Models",
783
- style={
784
- "backgroundColor": "transparent",
785
- "border": "none",
786
- "padding": "10px 18px",
787
- "color": "#6B7280",
788
- "fontWeight": "500",
789
- },
790
- selected_style={
791
- "backgroundColor": "transparent",
792
- "border": "none",
793
- "padding": "10px 18px",
794
- "fontWeight": "700",
795
- "borderBottom": "3px solid #082030",
796
- },
797
- children=[
798
- html.Div(
799
- children=[
800
- "The model leaderboard ranks individual models based on their download counts, revealing which models are most popular among users on Hugging Face. The metadata includes the ",
801
- html.Span(
802
- "model name", className="meta-var"
803
- ),
804
- ", number of ",
805
- html.Span(
806
- "user downloads",
807
- className="meta-var",
808
- ),
809
- ", ",
810
- html.Span(
811
- "developer", className="meta-var"
812
- ),
813
- ", and ",
814
- html.Span(
815
- "modality", className="meta-var"
816
- ),
817
- " (the input and output types of the model).",
818
- ],
819
- className="tab-description",
820
- ),
821
- html.Div(
822
- dcc.Loading(
823
- id="loading-models",
824
- type="circle",
825
- color="#AC482A",
826
- children=html.Div(
827
- id="top_models-table"
828
- ),
829
- ),
830
- className="responsive-table-wrapper",
831
- ),
832
- html.Button(
833
- id="top_models-toggle",
834
- children="▼ Show Top 50",
835
- n_clicks=0,
836
- style={**button_style, "border": "none"},
837
- ),
838
- ],
839
- ),
840
- ],
841
- ),
842
- ],
843
- style={
844
- "borderRadius": "18px",
845
- "padding": "32px",
846
- "marginTop": "12px",
847
- "marginBottom": "12px",
848
- "marginLeft": "50px",
849
- "marginRight": "50px",
850
- },
851
- className="responsive-tabs", # <-- add class
852
- ),
853
- ],
854
- style={
855
- "fontFamily": "Inter",
856
- "backgroundColor": "#ffffff",
857
- "minHeight": "100vh",
858
- },
859
- ),
860
  ],
861
  )
862
 
 
 
 
863
 
864
- # Callbacks for interactivity
865
- # -- helper utilities to consolidate duplicated callback logic --
866
- def _get_filtered_top_n_from_duckdb(
867
- slider_value, group_col, top_n, view="all_downloads"
868
- ):
869
- """
870
- Query DuckDB to get model-level rows with per-model total_downloads (delta or full)
871
- Returns a DataFrame with columns including:
872
- - group_key (the grouping column)
873
- - org_country_single, author, derived_author, merged_country_groups_single, merged_modality, model
874
- - total_downloads (per-model downloads in requested window)
875
- - percent_of_total (percent of total across all returned model deltas)
876
- """
877
-
878
- # Create a fresh connection and load parquet-backed views for each call
879
- local_con = create_fresh_duckdb_with_views()
880
-
881
- try:
882
- # Compute date window (if slider_value provided, use it; otherwise cover full range)
883
- if slider_value and len(slider_value) == 2:
884
- start = pd.to_datetime(slider_value[0], unit="s")
885
- end = pd.to_datetime(slider_value[1], unit="s")
886
- else:
887
- start = pd.to_datetime("1970-01-01")
888
- # keep previous behavior if end_dt exists
889
- try:
890
- end_local = end_dt # may be defined from initial load
891
- except NameError:
892
- end_local = pd.Timestamp.now()
893
- end = end_local
894
-
895
- start_str = str(start)
896
- end_str = str(end)
897
-
898
- # Build query using shared function
899
- query = build_leaderboard_query(group_col, top_n, start_str, end_str, view=view)
900
-
901
- # execute using the fresh local connection
902
- result_df = local_con.execute(query).fetchdf()
903
- return result_df
904
- finally:
905
- local_con.close()
906
-
907
-
908
- def _get_filtered_top_n_alltime_from_duckdb(
909
- slider_value, group_col, top_n, view="all_downloads"
910
- ):
911
- """
912
- Query DuckDB to get model-level rows with all-time (cumulative) total_downloads at a specific date.
913
- Returns a DataFrame with columns including:
914
- - group_key (the grouping column)
915
- - org_country_single, author, derived_author, merged_country_groups_single, merged_modality, model
916
- - total_downloads (cumulative downloads up to the selected date)
917
- - percent_of_total (percent of total across all returned models)
918
- """
919
-
920
- # Create a fresh connection and load parquet-backed views for each call
921
- local_con = create_fresh_duckdb_with_views()
922
-
923
- try:
924
- # Get the single date from slider_value (all-time mode passes a single value)
925
- if slider_value is not None:
926
- date = pd.to_datetime(slider_value, unit="s")
927
- else:
928
- # Fallback to end_dt if available
929
- try:
930
- date = end_dt
931
- except NameError:
932
- date = pd.Timestamp.now()
933
-
934
- date_str = str(date)
935
-
936
- # Build query using shared function for all-time
937
- query = build_leaderboard_query(group_col, top_n, date_str=date_str, view=view)
938
-
939
- # execute using the fresh local connection
940
- result_df = local_con.execute(query).fetchdf()
941
- return result_df
942
- finally:
943
- local_con.close()
944
-
945
-
946
- def _leaderboard_callback_logic(
947
- n_clicks,
948
- slider_value,
949
- current_label,
950
- group_col,
951
- filename,
952
- default_label="▼ Show Top 50",
953
- chip_color="#F0F9FF",
954
- view="all_downloads",
955
- derived_author_toggle=True,
956
- is_alltime=False,
957
- ):
958
- # Normalize label on first load
959
- if current_label is None:
960
- current_label = default_label
961
-
962
- # Determine top_n and next label
963
- if n_clicks == 0:
964
- top_n = 10
965
- new_label = current_label
966
- elif "Show Top 50" in current_label:
967
- top_n, new_label = 50, "▼ Show Top 100"
968
- elif "Show Top 100" in current_label:
969
- top_n, new_label = 100, "▲ Show Less"
970
- else:
971
- top_n, new_label = 10, "▼ Show Top 50"
972
-
973
- # Get filtered and aggregated data directly from DuckDB
974
- # Use all-time query if is_alltime flag is True
975
- if is_alltime:
976
- df_filtered = _get_filtered_top_n_alltime_from_duckdb(
977
- slider_value, group_col, top_n, view=view
978
- )
979
- else:
980
- df_filtered = _get_filtered_top_n_from_duckdb(
981
- slider_value, group_col, top_n, view=view
982
- )
983
-
984
- # If the SQL query returned no rows, ask user to broaden date range
985
- if df_filtered is None or df_filtered.empty:
986
- msg = html.Div(
987
- "No data found in this time range. Try broadening the download date range.",
988
- style={"padding": "18px", "fontSize": "16px", "color": "#082030"},
989
- )
990
- return msg, new_label
991
-
992
- # Process the already-filtered data - pass derived_author_toggle
993
- df, download_df = get_top_n_leaderboard(
994
- df_filtered, group_col, top_n, derived_author_toggle=derived_author_toggle
995
- )
996
-
997
- # If processing produced no rows, ask user to broaden date range
998
- if df is None or (hasattr(df, "empty") and df.empty):
999
- msg = html.Div(
1000
- "No data found in this time range. Try broadening the download date range.",
1001
- style={"padding": "18px", "fontSize": "16px", "color": "#082030"},
1002
- )
1003
- return msg, new_label
1004
-
1005
- return render_table_content(
1006
- df, download_df, chip_color=chip_color, filename=filename
1007
- ), new_label
1008
-
1009
-
1010
- # -- end helpers --
1011
-
1012
-
1013
- # --- Callback to store model attribution type ---
1014
  @app.callback(
1015
  Output("model-attribution-type", "data"),
1016
  Input("model-attribution-segmented", "value"),
@@ -1018,8 +91,7 @@ def _leaderboard_callback_logic(
1018
  def update_model_attribution_type(selected_value):
1019
  return selected_value
1020
 
1021
-
1022
- # --- Callback to toggle between RangeSlider and Slider ---
1023
  @app.callback(
1024
  Output("slider-container", "children"),
1025
  Output("slider-description", "children"),
@@ -1030,48 +102,28 @@ def update_model_attribution_type(selected_value):
1030
  def toggle_time_slider(is_alltime, range_value, alltime_value):
1031
  if is_alltime:
1032
  description = "Select a specific date to view all-time cumulative downloads up to that point."
1033
- # Recreate the all-time slider with current stored value
1034
- slider = dmc.Slider(
1035
- id="time-slider-alltime",
1036
- min=start_ts,
1037
- max=end_ts,
1038
- value=alltime_value if alltime_value is not None else end_ts,
1039
- step=24 * 60 * 60,
1040
- color="#AC482A",
1041
- size="md",
1042
- radius="xl",
1043
- marks=marks,
1044
- style={"width": "95%", "paddingLeft": "60px"},
1045
- label=None,
1046
- showLabelOnHover=False,
1047
- labelTransitionProps={"transition": "fade", "duration": 150},
1048
- thumbChildren=get_thumb_label_single(alltime_value if alltime_value is not None else end_ts),
1049
  )
1050
  return [slider], description
1051
  else:
1052
  description = "Adjust the slider to filter leaderboard results by the difference in downloads within the time range."
1053
- # Recreate the range slider with current stored value
1054
  slider_val = range_value if range_value is not None else [start_ts, end_ts]
1055
- slider = dmc.RangeSlider(
1056
- id="time-slider",
1057
- min=start_ts,
1058
- max=end_ts,
1059
- value=slider_val,
1060
- step=24 * 60 * 60,
1061
- color="#AC482A",
1062
- size="md",
1063
- radius="xl",
1064
- marks=marks,
1065
- style={"width": "95%", "paddingLeft": "60px"},
1066
- label=None,
1067
- showLabelOnHover=False,
1068
- labelTransitionProps={"transition": "fade", "duration": 150},
1069
- thumbChildren=get_thumb_labels(slider_val),
1070
  )
1071
  return [slider], description
1072
 
1073
-
1074
- # --- Callbacks to sync slider values to stores ---
1075
  @app.callback(
1076
  Output("time-slider-value", "data"),
1077
  Input("time-slider", "value"),
@@ -1079,7 +131,7 @@ def toggle_time_slider(is_alltime, range_value, alltime_value):
1079
  def sync_time_slider_value(value):
1080
  return value
1081
 
1082
-
1083
  @app.callback(
1084
  Output("time-slider-alltime-value", "data"),
1085
  Input("time-slider-alltime", "value"),
@@ -1087,8 +139,7 @@ def sync_time_slider_value(value):
1087
  def sync_time_slider_alltime_value(value):
1088
  return value
1089
 
1090
-
1091
- # Callbacks for interactivity (modularized)
1092
  @app.callback(
1093
  Output("top_countries-table", "children"),
1094
  Output("top_countries-toggle", "children"),
@@ -1106,7 +157,7 @@ def update_top_countries(
1106
  # Use the appropriate slider value based on the toggle
1107
  active_slider_value = slider_alltime_value if is_alltime else slider_value
1108
 
1109
- return _leaderboard_callback_logic(
1110
  n_clicks,
1111
  active_slider_value,
1112
  current_label,
@@ -1119,7 +170,7 @@ def update_top_countries(
1119
  is_alltime=is_alltime,
1120
  )
1121
 
1122
-
1123
  @app.callback(
1124
  Output("top_developers-table", "children"),
1125
  Output("top_developers-toggle", "children"),
@@ -1139,20 +190,20 @@ def update_top_developers(
1139
  # Use the appropriate slider value based on the toggle
1140
  active_slider_value = slider_alltime_value if is_alltime else slider_value
1141
 
1142
- return _leaderboard_callback_logic(
1143
  n_clicks,
1144
  active_slider_value,
1145
  current_label,
1146
  group_col=group_col,
1147
  filename="top_developers",
1148
  default_label="▼ Show Top 50",
1149
- chip_color="#FFF0E6",
1150
  view=selected_view,
1151
  derived_author_toggle=(attribution_type == "original_creator"),
1152
  is_alltime=is_alltime,
1153
  )
1154
 
1155
-
1156
  @app.callback(
1157
  Output("top_models-table", "children"),
1158
  Output("top_models-toggle", "children"),
@@ -1170,7 +221,7 @@ def update_top_models(
1170
  # Use the appropriate slider value based on the toggle
1171
  active_slider_value = slider_alltime_value if is_alltime else slider_value
1172
 
1173
- return _leaderboard_callback_logic(
1174
  n_clicks,
1175
  active_slider_value,
1176
  current_label,
@@ -1183,7 +234,7 @@ def update_top_models(
1183
  is_alltime=is_alltime,
1184
  )
1185
 
1186
-
1187
  @app.callback(
1188
  Output("time-slider", "thumbChildren"),
1189
  Input("time-slider", "value"),
@@ -1191,7 +242,7 @@ def update_top_models(
1191
  def update_thumb_labels(values):
1192
  return get_thumb_labels(values)
1193
 
1194
-
1195
  @app.callback(
1196
  Output("time-slider-alltime", "thumbChildren"),
1197
  Input("time-slider-alltime", "value"),
@@ -1199,8 +250,7 @@ def update_thumb_labels(values):
1199
  def update_thumb_label_alltime(value):
1200
  return get_thumb_label_single(value)
1201
 
1202
-
1203
- # --- Add callback to update selected view based on segmented control ---
1204
  @app.callback(
1205
  Output("selected-view", "data"),
1206
  Input("segmented", "value"),
@@ -1210,7 +260,6 @@ def update_selected_view(seg_value):
1210
  return "one_year_rolling"
1211
  return "all_downloads"
1212
 
1213
-
1214
  # Run the app
1215
  if __name__ == "__main__":
1216
  app.run(debug=True)
 
1
  from dash import Dash, html, dcc, Input, Output, State
2
  import pandas as pd
3
  import dash_mantine_components as dmc
 
4
  import time
5
+
6
+ from config import DATASET_ID
7
+ from data_utils import create_fresh_duckdb_with_views, get_last_updated
8
  from graphs.leaderboard import (
9
+ leaderboard_callback_logic,
10
+ )
11
+ from helpers import build_slider_marks, get_thumb_label_single, get_thumb_labels
12
+ from layout_components import (
13
+ build_main_layout,
14
+ build_range_slider,
15
+ build_single_slider,
16
  )
 
17
 
18
  # Initialize the app
19
  app = Dash(suppress_callback_exceptions=True) # suppress callback exceptions for multi-page layout
20
  server = app.server
21
 
22
+ # Load dataset and determine time range
23
+ print(f"Attempting to connect to dataset from Hugging Face Hub: {DATASET_ID}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  try:
25
  overall_start_time = time.time()
26
 
 
43
  # Create a dcc slider for time range selection by year (readable marks)
44
  start_ts = int(start_dt.timestamp())
45
  end_ts = int(end_dt.timestamp())
46
+ marks = build_slider_marks(start_dt, end_dt)
47
+
48
+ time_slider = build_range_slider(
49
+ start_ts,
50
+ end_ts,
51
+ [start_ts, end_ts],
52
+ marks,
53
+ thumb_children=get_thumb_labels([start_ts, end_ts]),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  )
55
 
56
+ time_slider_alltime = build_single_slider(
57
+ start_ts,
58
+ end_ts,
59
+ end_ts,
60
+ marks,
61
+ thumb_children=get_thumb_label_single(end_ts),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  )
63
 
64
+ last_updated_label = get_last_updated()
65
+
66
+ # Define the app layout
67
  app.layout = dmc.MantineProvider(
68
  theme={
69
  "colorScheme": "light",
 
75
  dcc.Store(id="model-attribution-type", data="uploader"),
76
  dcc.Store(id="time-slider-value", data=[start_ts, end_ts]),
77
  dcc.Store(id="time-slider-alltime-value", data=end_ts),
78
+ build_main_layout(last_updated_label, time_slider),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  ],
80
  )
81
 
82
+ # ----------
83
+ # Callbacks
84
+ # ----------
85
 
86
+ # Update model attribution type based on user selection
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  @app.callback(
88
  Output("model-attribution-type", "data"),
89
  Input("model-attribution-segmented", "value"),
 
91
  def update_model_attribution_type(selected_value):
92
  return selected_value
93
 
94
+ # Toggle between range slider and all-time slider
 
95
  @app.callback(
96
  Output("slider-container", "children"),
97
  Output("slider-description", "children"),
 
102
  def toggle_time_slider(is_alltime, range_value, alltime_value):
103
  if is_alltime:
104
  description = "Select a specific date to view all-time cumulative downloads up to that point."
105
+ slider_value = alltime_value if alltime_value is not None else end_ts
106
+ slider = build_single_slider(
107
+ start_ts,
108
+ end_ts,
109
+ slider_value,
110
+ marks,
111
+ thumb_children=get_thumb_label_single(slider_value),
 
 
 
 
 
 
 
 
 
112
  )
113
  return [slider], description
114
  else:
115
  description = "Adjust the slider to filter leaderboard results by the difference in downloads within the time range."
 
116
  slider_val = range_value if range_value is not None else [start_ts, end_ts]
117
+ slider = build_range_slider(
118
+ start_ts,
119
+ end_ts,
120
+ slider_val,
121
+ marks,
122
+ thumb_children=get_thumb_labels(slider_val),
 
 
 
 
 
 
 
 
 
123
  )
124
  return [slider], description
125
 
126
+ # Sync slider values to dcc.Store components
 
127
  @app.callback(
128
  Output("time-slider-value", "data"),
129
  Input("time-slider", "value"),
 
131
  def sync_time_slider_value(value):
132
  return value
133
 
134
+ # Sync all-time slider value to dcc.Store component
135
  @app.callback(
136
  Output("time-slider-alltime-value", "data"),
137
  Input("time-slider-alltime", "value"),
 
139
  def sync_time_slider_alltime_value(value):
140
  return value
141
 
142
+ # Update Top Countries leaderboard
 
143
  @app.callback(
144
  Output("top_countries-table", "children"),
145
  Output("top_countries-toggle", "children"),
 
157
  # Use the appropriate slider value based on the toggle
158
  active_slider_value = slider_alltime_value if is_alltime else slider_value
159
 
160
+ return leaderboard_callback_logic(
161
  n_clicks,
162
  active_slider_value,
163
  current_label,
 
170
  is_alltime=is_alltime,
171
  )
172
 
173
+ # Update Top Developers leaderboard
174
  @app.callback(
175
  Output("top_developers-table", "children"),
176
  Output("top_developers-toggle", "children"),
 
190
  # Use the appropriate slider value based on the toggle
191
  active_slider_value = slider_alltime_value if is_alltime else slider_value
192
 
193
+ return leaderboard_callback_logic(
194
  n_clicks,
195
  active_slider_value,
196
  current_label,
197
  group_col=group_col,
198
  filename="top_developers",
199
  default_label="▼ Show Top 50",
200
+ chip_color="#F0F9FF",
201
  view=selected_view,
202
  derived_author_toggle=(attribution_type == "original_creator"),
203
  is_alltime=is_alltime,
204
  )
205
 
206
+ # Update Top Models leaderboard
207
  @app.callback(
208
  Output("top_models-table", "children"),
209
  Output("top_models-toggle", "children"),
 
221
  # Use the appropriate slider value based on the toggle
222
  active_slider_value = slider_alltime_value if is_alltime else slider_value
223
 
224
+ return leaderboard_callback_logic(
225
  n_clicks,
226
  active_slider_value,
227
  current_label,
 
234
  is_alltime=is_alltime,
235
  )
236
 
237
+ # Update thumb labels for range slider
238
  @app.callback(
239
  Output("time-slider", "thumbChildren"),
240
  Input("time-slider", "value"),
 
242
  def update_thumb_labels(values):
243
  return get_thumb_labels(values)
244
 
245
+ # Update thumb label for all-time slider
246
  @app.callback(
247
  Output("time-slider-alltime", "thumbChildren"),
248
  Input("time-slider-alltime", "value"),
 
250
  def update_thumb_label_alltime(value):
251
  return get_thumb_label_single(value)
252
 
253
+ # Update selected view based on segmented control
 
254
  @app.callback(
255
  Output("selected-view", "data"),
256
  Input("segmented", "value"),
 
260
  return "one_year_rolling"
261
  return "all_downloads"
262
 
 
263
  # Run the app
264
  if __name__ == "__main__":
265
  app.run(debug=True)
config.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PRIMARY_COLOR = "#AC482A"
2
+ DARK_BACKGROUND = "#082030"
3
+ DATASET_ID = "mmpr/open_model_evolution_data"
4
+ HF_PARQUET_URL_1 = "https://huggingface.co/datasets/mmpr/open_model_evolution_data/resolve/main/all_downloads_with_annotations.parquet"
5
+ HF_PARQUET_URL_2 = "https://huggingface.co/datasets/mmpr/open_model_evolution_data/resolve/main/one_year_rolling.parquet"
6
+
7
+ BUTTON_STYLE = {
8
+ "display": "inline-block",
9
+ "marginBottom": "10px",
10
+ "marginRight": "15px",
11
+ "marginTop": "30px",
12
+ "padding": "6px 16px",
13
+ "backgroundColor": DARK_BACKGROUND,
14
+ "color": "white",
15
+ "borderRadius": "6px",
16
+ "textDecoration": "none",
17
+ "fontWeight": "bold",
18
+ "fontSize": "14px",
19
+ }
20
+
21
+ COMPANY_ICON_MAP = {
22
+ "google": "../assets/icons/google.png",
23
+ "distilbert": "../assets/images/hf.svg",
24
+ "sentence-transformers": "../assets/images/hf.svg",
25
+ "facebook": "../assets/icons/meta.png",
26
+ "openai": "../assets/icons/openai.png",
27
+ "amazon": "../assets/icons/amazon.png",
28
+ "microsoft": "../assets/icons/microsoft.png",
29
+ }
30
+
31
+ COUNTRY_EMOJI_FALLBACK = {
32
+ "User": "👤",
33
+ "Organization": "🏢",
34
+ "Model": "📦",
35
+ }
36
+
37
+ META_COLS_MAP = {
38
+ "org_country_single": ["org_country_single", "total_downloads"],
39
+ "author": [
40
+ "org_country_single",
41
+ "author",
42
+ "total_downloads",
43
+ ],
44
+ "derived_author": [
45
+ "org_country_single",
46
+ "derived_author",
47
+ "total_downloads",
48
+ ],
49
+ "model": [
50
+ "org_country_single",
51
+ "author",
52
+ "derived_author",
53
+ "merged_modality",
54
+ "total_downloads",
55
+ ],
56
+ }
data_utils.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import duckdb
2
+ import pandas as pd
3
+
4
+ from config import DATASET_ID, HF_PARQUET_URL_1, HF_PARQUET_URL_2
5
+
6
+
7
+ def create_fresh_duckdb_with_views(parquet_url_1: str = HF_PARQUET_URL_1, parquet_url_2: str = HF_PARQUET_URL_2):
8
+ """Return a fresh DuckDB connection with parquet-backed views configured."""
9
+ local_con = duckdb.connect(database=":memory:", read_only=False)
10
+ try:
11
+ try:
12
+ local_con.execute("INSTALL httpfs;")
13
+ local_con.execute("LOAD httpfs;")
14
+ except Exception:
15
+ pass
16
+
17
+ try:
18
+ local_con.execute("SET enable_http_metadata_cache = false;")
19
+ local_con.execute("SET enable_object_cache = false;")
20
+ except Exception:
21
+ pass
22
+
23
+ local_con.execute(
24
+ f"""
25
+ CREATE OR REPLACE VIEW all_downloads AS
26
+ SELECT * FROM read_parquet('{parquet_url_1}')
27
+ """
28
+ )
29
+ local_con.execute(
30
+ f"""
31
+ CREATE OR REPLACE VIEW one_year_rolling AS
32
+ SELECT * FROM read_parquet('{parquet_url_2}')
33
+ """
34
+ )
35
+ except Exception:
36
+ pass
37
+ return local_con
38
+
39
+
40
+ def get_last_updated():
41
+ """Return the latest timestamp available in the all_downloads view."""
42
+ try:
43
+ conn = create_fresh_duckdb_with_views()
44
+ try:
45
+ result = conn.execute("SELECT MAX(time) as max_time FROM all_downloads").fetchdf()
46
+ finally:
47
+ conn.close()
48
+
49
+ max_time = result["max_time"].iloc[0]
50
+ if pd.isnull(max_time):
51
+ return "N/A"
52
+ dt = pd.to_datetime(max_time)
53
+ return dt.strftime("%b %d, %Y")
54
+ except Exception:
55
+ return "N/A"
56
+
57
+
58
+ def build_leaderboard_query(
59
+ group_col,
60
+ top_n,
61
+ start_str=None,
62
+ end_str=None,
63
+ date_str=None,
64
+ view="all_downloads",
65
+ ):
66
+ """Build the SQL query string for leaderboard data."""
67
+ is_alltime = date_str is not None
68
+
69
+ if group_col == "org_country_single":
70
+ group_expr = """CASE
71
+ WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
72
+ WHEN org_country_single IN ('International', 'Online', 'Online?') THEN 'International/Online'
73
+ ELSE org_country_single
74
+ END"""
75
+ else:
76
+ group_expr = group_col
77
+
78
+ if is_alltime:
79
+ base_where = f"WHERE time <= '{date_str}'"
80
+ downloads_calc = f"COALESCE(MAX(CASE WHEN time <= '{date_str}' THEN downloadsAllTime END), 0) AS total_downloads"
81
+ else:
82
+ base_where = ""
83
+ downloads_calc = f"""COALESCE(MAX(CASE WHEN time <= '{end_str}' THEN downloadsAllTime END), 0)
84
+ - COALESCE(MAX(CASE WHEN time < '{start_str}' THEN downloadsAllTime END), 0)
85
+ AS total_downloads"""
86
+
87
+ if group_col == "derived_author":
88
+ return f"""
89
+ WITH base_data AS (
90
+ SELECT
91
+ {group_expr} AS group_key,
92
+ CASE
93
+ WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
94
+ WHEN org_country_single IN ('International', 'Online', 'Online?') THEN 'International/Online'
95
+ ELSE org_country_single
96
+ END AS org_country_single,
97
+ author,
98
+ derived_author,
99
+ merged_country_groups_single,
100
+ merged_modality,
101
+ model,
102
+ time,
103
+ downloadsAllTime
104
+ FROM {view}
105
+ {base_where}
106
+ ),
107
+
108
+ author_country_lookup AS (
109
+ SELECT DISTINCT
110
+ derived_author,
111
+ FIRST_VALUE(org_country_single) OVER (PARTITION BY derived_author ORDER BY downloadsAllTime DESC) AS derived_author_country
112
+ FROM base_data
113
+ WHERE derived_author IS NOT NULL
114
+ ),
115
+
116
+ author_merged_country_lookup AS (
117
+ SELECT DISTINCT
118
+ derived_author,
119
+ FIRST_VALUE(merged_country_groups_single) OVER (PARTITION BY derived_author ORDER BY downloadsAllTime DESC) AS derived_author_merged_country
120
+ FROM base_data
121
+ WHERE derived_author IS NOT NULL
122
+ ),
123
+
124
+ model_metrics AS (
125
+ SELECT
126
+ model,
127
+ group_key,
128
+ ANY_VALUE(org_country_single) AS org_country_single,
129
+ ANY_VALUE(author) AS author,
130
+ ANY_VALUE(derived_author) AS derived_author,
131
+ ANY_VALUE(merged_country_groups_single) AS merged_country_groups_single,
132
+ ANY_VALUE(merged_modality) AS merged_modality,
133
+ {downloads_calc}
134
+ FROM base_data
135
+ GROUP BY model, group_key
136
+ ),
137
+
138
+ total_downloads_cte AS (
139
+ SELECT SUM(total_downloads) AS total_downloads_all FROM model_metrics
140
+ )
141
+
142
+ SELECT
143
+ mm.model,
144
+ mm.group_key,
145
+ acl.derived_author_country AS org_country_single,
146
+ amc.derived_author_merged_country AS merged_country_groups_single,
147
+ mm.author,
148
+ mm.derived_author,
149
+ mm.merged_modality,
150
+ mm.total_downloads,
151
+ CASE WHEN td.total_downloads_all = 0 THEN 0 ELSE ROUND(mm.total_downloads * 100.0 / td.total_downloads_all, 2) END AS percent_of_total
152
+ FROM model_metrics mm
153
+ LEFT JOIN author_country_lookup acl ON mm.group_key = acl.derived_author
154
+ LEFT JOIN author_merged_country_lookup amc ON mm.group_key = amc.derived_author
155
+ CROSS JOIN total_downloads_cte td
156
+ WHERE mm.total_downloads > 0
157
+ ORDER BY mm.total_downloads DESC
158
+ LIMIT {top_n * 10};
159
+ """
160
+
161
+ return f"""
162
+ WITH base_data AS (
163
+ SELECT
164
+ {group_expr} AS group_key,
165
+ CASE
166
+ WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
167
+ WHEN org_country_single IN ('International', 'Online', 'Online?') THEN 'International/Online'
168
+ ELSE org_country_single
169
+ END AS org_country_single,
170
+ author,
171
+ derived_author,
172
+ merged_country_groups_single,
173
+ merged_modality,
174
+ model,
175
+ time,
176
+ downloadsAllTime
177
+ FROM {view}
178
+ {base_where}
179
+ ),
180
+
181
+ model_metrics AS (
182
+ SELECT
183
+ model,
184
+ group_key,
185
+ ANY_VALUE(org_country_single) AS org_country_single,
186
+ ANY_VALUE(author) AS author,
187
+ ANY_VALUE(derived_author) AS derived_author,
188
+ ANY_VALUE(merged_country_groups_single) AS merged_country_groups_single,
189
+ ANY_VALUE(merged_modality) AS merged_modality,
190
+ {downloads_calc}
191
+ FROM base_data
192
+ GROUP BY model, group_key
193
+ ),
194
+
195
+ total_downloads_cte AS (
196
+ SELECT SUM(total_downloads) AS total_downloads_all FROM model_metrics
197
+ )
198
+
199
+ SELECT
200
+ mm.model,
201
+ mm.group_key,
202
+ mm.org_country_single,
203
+ mm.author,
204
+ mm.derived_author,
205
+ mm.merged_country_groups_single,
206
+ mm.merged_modality,
207
+ mm.total_downloads,
208
+ CASE WHEN td.total_downloads_all = 0 THEN 0 ELSE ROUND(mm.total_downloads * 100.0 / td.total_downloads_all, 2) END AS percent_of_total
209
+ FROM model_metrics mm
210
+ CROSS JOIN total_downloads_cte td
211
+ WHERE mm.total_downloads > 0
212
+ ORDER BY mm.total_downloads DESC
213
+ LIMIT {top_n * 10};
214
+ """
215
+
216
+
217
+ def get_top_n_from_duckdb(con, group_col, top_n=10, time_filter=None, view="all_downloads"):
218
+ """Query DuckDB directly to get model-level rows with per-model total_downloads."""
219
+ if time_filter and len(time_filter) == 2:
220
+ start = pd.to_datetime(time_filter[0], unit="s")
221
+ end = pd.to_datetime(time_filter[1], unit="s")
222
+ else:
223
+ start = pd.to_datetime("1970-01-01")
224
+ end = pd.Timestamp.now()
225
+
226
+ start_str = str(start)
227
+ end_str = str(end)
228
+ query = build_leaderboard_query(group_col, top_n, start_str, end_str, view=view)
229
+
230
+ conn_local = create_fresh_duckdb_with_views()
231
+ try:
232
+ return conn_local.execute(query).fetchdf()
233
+ except Exception as exc:
234
+ print(f"Error querying DuckDB: {exc}")
235
+ return pd.DataFrame()
236
+ finally:
237
+ conn_local.close()
graphs/leaderboard.py CHANGED
@@ -1,63 +1,162 @@
 
1
  import pandas as pd
2
  from dash import html
3
  from dash_iconify import DashIconify
4
  import dash_mantine_components as dmc
5
- import base64
6
  import countryflag
7
- import duckdb
8
-
9
- button_style = {
10
- "display": "inline-block",
11
- "marginBottom": "10px",
12
- "marginRight": "15px",
13
- "marginTop": "30px",
14
- "padding": "6px 16px",
15
- "backgroundColor": "#082030",
16
- "color": "white",
17
- "borderRadius": "6px",
18
- "textDecoration": "none",
19
- "fontWeight": "bold",
20
- "fontSize": "14px",
21
- }
22
-
23
- company_icon_map = {
24
- "google": "../assets/icons/google.png",
25
- "distilbert": "../assets/images/hf.svg",
26
- "sentence-transformers": "../assets/images/hf.svg",
27
- "facebook": "../assets/icons/meta.png",
28
- "openai": "../assets/icons/openai.png",
29
- "amazon": "../assets/icons/amazon.png",
30
- "microsoft": "../assets/icons/microsoft.png",
31
- }
32
-
33
- country_emoji_fallback = {
34
- "User": "👤",
35
- "Organization": "🏢",
36
- "Model": "📦",
37
- }
38
-
39
- meta_cols_map = {
40
- "org_country_single": ["org_country_single", "total_downloads"],
41
- "author": [
42
- "org_country_single",
43
- "author",
44
- "total_downloads",
45
- ],
46
- "derived_author": [
47
- "org_country_single",
48
- "derived_author",
49
- "total_downloads",
50
- ],
51
- "model": [
52
- "org_country_single",
53
- "author",
54
- "derived_author",
55
- "merged_modality",
56
- "total_downloads",
57
- ],
58
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
 
 
 
 
 
61
  # Chip renderer
62
  def chip(text, bg_color="#F0F0F0"):
63
  return html.Span(
@@ -368,7 +467,7 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10, derived_author_toggl
368
  download_top["% of total"] = download_top["% of total"].round(2)
369
 
370
  # All relevant metadata columns for the grouping
371
- meta_cols = meta_cols_map.get(group_col, [])
372
 
373
  # Collect metadata per group by inspecting the underlying model-level rows
374
  meta_map = {}
@@ -398,15 +497,15 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10, derived_author_toggl
398
  try:
399
  flag_emoji = countryflag.getflag(c)
400
  if not flag_emoji or flag_emoji == c:
401
- flag_emoji = country_emoji_fallback.get(c, "🌍")
402
  except Exception:
403
- flag_emoji = country_emoji_fallback.get(c, "🌍")
404
  chips.append((flag_emoji, c, "country"))
405
 
406
  # Author - use derived_author_toggle to determine which column
407
  author_key = "derived_author" if derived_author_toggle else "author"
408
  for a in meta.get(author_key, []):
409
- icon = company_icon_map.get(a, "")
410
  if icon == "":
411
  if meta.get("merged_country_groups_single", ["User"])[0] != "User":
412
  icon = "🏢"
@@ -459,230 +558,6 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10, derived_author_toggl
459
  return display_for_render, download_top
460
 
461
 
462
- # Add dataset URLs used to create views when running queries from this module
463
- hf_parquet_url_1 = "https://huggingface.co/datasets/emsesc/open_model_evolution_data/resolve/main/all_downloads_with_annotations.parquet"
464
- hf_parquet_url_2 = "https://huggingface.co/datasets/emsesc/open_model_evolution_data/resolve/main/one_year_rolling.parquet"
465
-
466
-
467
- def create_fresh_duckdb_with_views():
468
- """
469
- Returns a fresh in-memory DuckDB connection with httpfs enabled and the
470
- all_downloads / one_year_rolling views created from the remote parquet URLs.
471
- Caller must close the returned connection.
472
- """
473
- local_con = duckdb.connect(database=":memory:", read_only=False)
474
- try:
475
- try:
476
- local_con.execute("INSTALL httpfs;")
477
- local_con.execute("LOAD httpfs;")
478
- except Exception:
479
- pass
480
- try:
481
- local_con.execute("SET enable_http_metadata_cache = false;")
482
- local_con.execute("SET enable_object_cache = false;")
483
- except Exception:
484
- pass
485
-
486
- local_con.execute(f"""
487
- CREATE OR REPLACE VIEW all_downloads AS
488
- SELECT * FROM read_parquet('{hf_parquet_url_1}')
489
- """)
490
- local_con.execute(f"""
491
- CREATE OR REPLACE VIEW one_year_rolling AS
492
- SELECT * FROM read_parquet('{hf_parquet_url_2}')
493
- """)
494
- except Exception:
495
- pass
496
- return local_con
497
-
498
-
499
- def build_leaderboard_query(
500
- group_col,
501
- top_n,
502
- start_str=None,
503
- end_str=None,
504
- date_str=None,
505
- view="all_downloads",
506
- ):
507
- """
508
- Build and return the SQL query string for leaderboard data.
509
-
510
- Supports two modes:
511
- 1. Time-range mode: Pass start_str and end_str to get downloads delta
512
- 2. All-time mode: Pass date_str to get cumulative downloads as of that date
513
-
514
- Args:
515
- group_col: Column to group by (e.g., 'author', 'derived_author', 'org_country_single')
516
- top_n: Number of top entries to return
517
- start_str: Start date string (for time-range mode)
518
- end_str: End date string (for time-range mode)
519
- date_str: Specific date string (for all-time mode)
520
- view: DuckDB view name ('all_downloads' or 'one_year_rolling')
521
-
522
- Returns:
523
- SQL query string
524
- """
525
- # Determine mode
526
- is_alltime = date_str is not None
527
-
528
- # handle country grouping normalization
529
- if group_col == "org_country_single":
530
- group_expr = """CASE
531
- WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
532
- WHEN org_country_single IN ('International', 'Online', 'Online?') THEN 'International/Online'
533
- ELSE org_country_single
534
- END"""
535
- else:
536
- group_expr = group_col
537
-
538
- # Determine WHERE clause and total_downloads calculation
539
- if is_alltime:
540
- base_where = f"WHERE time <= '{date_str}'"
541
- downloads_calc = f"COALESCE(MAX(CASE WHEN time <= '{date_str}' THEN downloadsAllTime END), 0) AS total_downloads"
542
- else:
543
- base_where = ""
544
- downloads_calc = f"""COALESCE(MAX(CASE WHEN time <= '{end_str}' THEN downloadsAllTime END), 0)
545
- - COALESCE(MAX(CASE WHEN time < '{start_str}' THEN downloadsAllTime END), 0)
546
- AS total_downloads"""
547
-
548
- # Derived-author special-case (uses author-derived lookups)
549
- if group_col == "derived_author":
550
- return f"""
551
- WITH base_data AS (
552
- SELECT
553
- {group_expr} AS group_key,
554
- CASE
555
- WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
556
- WHEN org_country_single IN ('International', 'Online', 'Online?') THEN 'International/Online'
557
- ELSE org_country_single
558
- END AS org_country_single,
559
- author,
560
- derived_author,
561
- merged_country_groups_single,
562
- merged_modality,
563
- model,
564
- time,
565
- downloadsAllTime
566
- FROM {view}
567
- {base_where}
568
- ),
569
-
570
- author_country_lookup AS (
571
- SELECT DISTINCT
572
- derived_author,
573
- FIRST_VALUE(org_country_single) OVER (PARTITION BY derived_author ORDER BY downloadsAllTime DESC) AS derived_author_country
574
- FROM base_data
575
- WHERE derived_author IS NOT NULL
576
- ),
577
-
578
- author_merged_country_lookup AS (
579
- SELECT DISTINCT
580
- derived_author,
581
- FIRST_VALUE(merged_country_groups_single) OVER (PARTITION BY derived_author ORDER BY downloadsAllTime DESC) AS derived_author_merged_country
582
- FROM base_data
583
- WHERE derived_author IS NOT NULL
584
- ),
585
-
586
- model_metrics AS (
587
- SELECT
588
- model,
589
- group_key,
590
- ANY_VALUE(org_country_single) AS org_country_single,
591
- ANY_VALUE(author) AS author,
592
- ANY_VALUE(derived_author) AS derived_author,
593
- ANY_VALUE(merged_country_groups_single) AS merged_country_groups_single,
594
- ANY_VALUE(merged_modality) AS merged_modality,
595
- {downloads_calc}
596
- FROM base_data
597
- GROUP BY model, group_key
598
- ),
599
-
600
- total_downloads_cte AS (
601
- SELECT SUM(total_downloads) AS total_downloads_all FROM model_metrics
602
- )
603
-
604
- SELECT
605
- mm.model,
606
- mm.group_key,
607
- acl.derived_author_country AS org_country_single,
608
- amc.derived_author_merged_country AS merged_country_groups_single,
609
- mm.author,
610
- mm.derived_author,
611
- mm.merged_modality,
612
- mm.total_downloads,
613
- CASE WHEN td.total_downloads_all = 0 THEN 0 ELSE ROUND(mm.total_downloads * 100.0 / td.total_downloads_all, 2) END AS percent_of_total
614
- FROM model_metrics mm
615
- LEFT JOIN author_country_lookup acl ON mm.group_key = acl.derived_author
616
- LEFT JOIN author_merged_country_lookup amc ON mm.group_key = amc.derived_author
617
- CROSS JOIN total_downloads_cte td
618
- WHERE mm.total_downloads > 0
619
- ORDER BY mm.total_downloads DESC
620
- LIMIT {top_n * 10};
621
- """
622
-
623
- # Generic grouping SQL
624
- return f"""
625
- WITH base_data AS (
626
- SELECT
627
- {group_expr} AS group_key,
628
- CASE
629
- WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
630
- WHEN org_country_single IN ('International', 'Online', 'Online?') THEN 'International/Online'
631
- ELSE org_country_single
632
- END AS org_country_single,
633
- author,
634
- derived_author,
635
- merged_country_groups_single,
636
- merged_modality,
637
- model,
638
- time,
639
- downloadsAllTime
640
- FROM {view}
641
- {base_where}
642
- ),
643
-
644
- model_metrics AS (
645
- SELECT
646
- model,
647
- group_key,
648
- ANY_VALUE(org_country_single) AS org_country_single,
649
- ANY_VALUE(author) AS author,
650
- ANY_VALUE(derived_author) AS derived_author,
651
- ANY_VALUE(merged_country_groups_single) AS merged_country_groups_single,
652
- ANY_VALUE(merged_modality) AS merged_modality,
653
- {downloads_calc}
654
- FROM base_data
655
- GROUP BY model, group_key
656
- ),
657
-
658
- total_downloads_cte AS (
659
- SELECT SUM(total_downloads) AS total_downloads_all FROM model_metrics
660
- )
661
-
662
- SELECT
663
- mm.model,
664
- mm.group_key,
665
- mm.org_country_single,
666
- mm.author,
667
- mm.derived_author,
668
- mm.merged_country_groups_single,
669
- mm.merged_modality,
670
- mm.total_downloads,
671
- CASE WHEN td.total_downloads_all = 0 THEN 0 ELSE ROUND(mm.total_downloads * 100.0 / td.total_downloads_all, 2) END AS percent_of_total
672
- FROM model_metrics mm
673
- CROSS JOIN total_downloads_cte td
674
- WHERE mm.total_downloads > 0
675
- ORDER BY mm.total_downloads DESC
676
- LIMIT {top_n * 10};
677
- """
678
-
679
-
680
- def build_leaderboard_query_alltime(group_col, top_n, date_str, view="all_downloads"):
681
- """
682
- Deprecated: Use build_leaderboard_query with date_str parameter instead.
683
- Build and return the SQL query string for all-time downloads at a specific date.
684
- """
685
- return build_leaderboard_query(group_col, top_n, date_str=date_str, view=view)
686
 
687
 
688
  def get_top_n_from_duckdb(
@@ -718,15 +593,3 @@ def get_top_n_from_duckdb(
718
  return pd.DataFrame()
719
  finally:
720
  conn_local.close()
721
-
722
-
723
- def format_large_number(n):
724
- """Shorten large numbers, e.g. 5,000,000 -> '5 million'."""
725
- if n >= 1_000_000_000:
726
- return f"{n / 1_000_000_000:.1f} billion"
727
- elif n >= 1_000_000:
728
- return f"{n / 1_000_000:.1f} million"
729
- elif n >= 1_000:
730
- return f"{n / 1_000:.1f}k"
731
- else:
732
- return str(int(n))
 
1
+ import base64
2
  import pandas as pd
3
  from dash import html
4
  from dash_iconify import DashIconify
5
  import dash_mantine_components as dmc
 
6
  import countryflag
7
+
8
+ from config import COMPANY_ICON_MAP, COUNTRY_EMOJI_FALLBACK, META_COLS_MAP
9
+ from data_utils import build_leaderboard_query, create_fresh_duckdb_with_views
10
+ from helpers import format_large_number
11
+
12
+
13
+ # =============================
14
+ # Leaderboard Data Fetching
15
+ # =============================
16
+
17
+ def get_filtered_top_n_from_duckdb(
18
+ slider_value, group_col, top_n, view="all_downloads"
19
+ ):
20
+ """
21
+ Query DuckDB to get model-level rows with per-model total_downloads (delta or full)
22
+ Returns a DataFrame with columns including:
23
+ - group_key (the grouping column)
24
+ - org_country_single, author, derived_author, merged_country_groups_single, merged_modality, model
25
+ - total_downloads (per-model downloads in requested window)
26
+ - percent_of_total (percent of total across all returned model deltas)
27
+ """
28
+
29
+ # Create a fresh connection and load parquet-backed views for each call
30
+ local_con = create_fresh_duckdb_with_views()
31
+
32
+ try:
33
+ # Compute date window (if slider_value provided, use it; otherwise cover full range)
34
+ if slider_value and len(slider_value) == 2:
35
+ start = pd.to_datetime(slider_value[0], unit="s")
36
+ end = pd.to_datetime(slider_value[1], unit="s")
37
+ else:
38
+ start = pd.to_datetime("1970-01-01")
39
+ end = pd.Timestamp.now()
40
+
41
+ start_str = str(start)
42
+ end_str = str(end)
43
+
44
+ # Build query using shared function
45
+ query = build_leaderboard_query(group_col, top_n, start_str, end_str, view=view)
46
+
47
+ # execute using the fresh local connection
48
+ result_df = local_con.execute(query).fetchdf()
49
+ return result_df
50
+ finally:
51
+ local_con.close()
52
+
53
+
54
+ def get_filtered_top_n_alltime_from_duckdb(
55
+ slider_value, group_col, top_n, view="all_downloads"
56
+ ):
57
+ """
58
+ Query DuckDB to get model-level rows with all-time (cumulative) total_downloads at a specific date.
59
+ Returns a DataFrame with columns including:
60
+ - group_key (the grouping column)
61
+ - org_country_single, author, derived_author, merged_country_groups_single, merged_modality, model
62
+ - total_downloads (cumulative downloads up to the selected date)
63
+ - percent_of_total (percent of total across all returned models)
64
+ """
65
+
66
+ # Create a fresh connection and load parquet-backed views for each call
67
+ local_con = create_fresh_duckdb_with_views()
68
+
69
+ try:
70
+ # Get the single date from slider_value (all-time mode passes a single value)
71
+ if slider_value is not None:
72
+ date = pd.to_datetime(slider_value, unit="s")
73
+ else:
74
+ date = pd.Timestamp.now()
75
+
76
+ date_str = str(date)
77
+
78
+ # Build query using shared function for all-time
79
+ query = build_leaderboard_query(group_col, top_n, date_str=date_str, view=view)
80
+
81
+ # execute using the fresh local connection
82
+ result_df = local_con.execute(query).fetchdf()
83
+ return result_df
84
+ finally:
85
+ local_con.close()
86
+
87
+
88
+ def leaderboard_callback_logic(
89
+ n_clicks,
90
+ slider_value,
91
+ current_label,
92
+ group_col,
93
+ filename,
94
+ default_label="▼ Show Top 50",
95
+ chip_color="#F0F9FF",
96
+ view="all_downloads",
97
+ derived_author_toggle=True,
98
+ is_alltime=False,
99
+ ):
100
+ """
101
+ Core logic for handling leaderboard updates based on user interactions.
102
+ Returns tuple of (table_content, new_label) for the callback.
103
+ """
104
+ # Normalize label on first load
105
+ if current_label is None:
106
+ current_label = default_label
107
+
108
+ # Determine top_n and next label
109
+ if n_clicks == 0:
110
+ top_n = 10
111
+ new_label = current_label
112
+ elif "Show Top 50" in current_label:
113
+ top_n, new_label = 50, "▼ Show Top 100"
114
+ elif "Show Top 100" in current_label:
115
+ top_n, new_label = 100, "▲ Show Less"
116
+ else:
117
+ top_n, new_label = 10, "▼ Show Top 50"
118
+
119
+ # Get filtered and aggregated data directly from DuckDB
120
+ # Use all-time query if is_alltime flag is True
121
+ if is_alltime:
122
+ df_filtered = get_filtered_top_n_alltime_from_duckdb(
123
+ slider_value, group_col, top_n, view=view
124
+ )
125
+ else:
126
+ df_filtered = get_filtered_top_n_from_duckdb(
127
+ slider_value, group_col, top_n, view=view
128
+ )
129
+
130
+ # If the SQL query returned no rows, ask user to broaden date range
131
+ if df_filtered is None or df_filtered.empty:
132
+ msg = html.Div(
133
+ "No data found in this time range. Try broadening the download date range.",
134
+ style={"padding": "18px", "fontSize": "16px", "color": "#082030"},
135
+ )
136
+ return msg, new_label
137
+
138
+ # Process the already-filtered data - pass derived_author_toggle
139
+ df, download_df = get_top_n_leaderboard(
140
+ df_filtered, group_col, top_n, derived_author_toggle=derived_author_toggle
141
+ )
142
+
143
+ # If processing produced no rows, ask user to broaden date range
144
+ if df is None or (hasattr(df, "empty") and df.empty):
145
+ msg = html.Div(
146
+ "No data found in this time range. Try broadening the download date range.",
147
+ style={"padding": "18px", "fontSize": "16px", "color": "#082030"},
148
+ )
149
+ return msg, new_label
150
+
151
+ return render_table_content(
152
+ df, download_df, chip_color=chip_color, filename=filename
153
+ ), new_label
154
 
155
 
156
+ # =============================
157
+ # UI Rendering Components
158
+ # =============================
159
+
160
  # Chip renderer
161
  def chip(text, bg_color="#F0F0F0"):
162
  return html.Span(
 
467
  download_top["% of total"] = download_top["% of total"].round(2)
468
 
469
  # All relevant metadata columns for the grouping
470
+ meta_cols = META_COLS_MAP.get(group_col, [])
471
 
472
  # Collect metadata per group by inspecting the underlying model-level rows
473
  meta_map = {}
 
497
  try:
498
  flag_emoji = countryflag.getflag(c)
499
  if not flag_emoji or flag_emoji == c:
500
+ flag_emoji = COUNTRY_EMOJI_FALLBACK.get(c, "🌍")
501
  except Exception:
502
+ flag_emoji = COUNTRY_EMOJI_FALLBACK.get(c, "🌍")
503
  chips.append((flag_emoji, c, "country"))
504
 
505
  # Author - use derived_author_toggle to determine which column
506
  author_key = "derived_author" if derived_author_toggle else "author"
507
  for a in meta.get(author_key, []):
508
+ icon = COMPANY_ICON_MAP.get(a, "")
509
  if icon == "":
510
  if meta.get("merged_country_groups_single", ["User"])[0] != "User":
511
  icon = "🏢"
 
558
  return display_for_render, download_top
559
 
560
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
561
 
562
 
563
  def get_top_n_from_duckdb(
 
593
  return pd.DataFrame()
594
  finally:
595
  conn_local.close()
 
 
 
 
 
 
 
 
 
 
 
 
helpers.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from dash import html
3
+
4
+ def ordinal(n: int) -> str:
5
+ """Return the ordinal suffix for a day (e.g., 1 -> 1st)."""
6
+ if 10 <= n % 100 <= 20:
7
+ suffix = "th"
8
+ else:
9
+ suffix = {1: "st", 2: "nd", 3: "rd"}.get(n % 10, "th")
10
+ return f"{n}{suffix}"
11
+
12
+
13
+ def format_date(dt: pd.Timestamp) -> str:
14
+ """Format a pandas Timestamp into a readable string."""
15
+ return dt.strftime("%b") + f" {ordinal(dt.day)}, {dt.year}"
16
+
17
+
18
+ def build_slider_marks(start_dt: pd.Timestamp, end_dt: pd.Timestamp):
19
+ """Create slider marks for the range and all-time sliders."""
20
+ return [
21
+ {"value": int(start_dt.timestamp()), "label": start_dt.strftime("%b %Y")},
22
+ {"value": int(end_dt.timestamp()), "label": end_dt.strftime("%b %Y")},
23
+ ]
24
+
25
+ def get_thumb_labels(values):
26
+ """Generate thumb labels for the range slider."""
27
+ distance = abs(values[1] - values[0])
28
+ close = distance < 4 * 30 * 86400 # 4 months
29
+
30
+ label_style = {
31
+ "background": "#fff",
32
+ "color": "#082030",
33
+ "fontWeight": "bold",
34
+ "fontSize": "13px",
35
+ "borderRadius": "8px",
36
+ "padding": "2px 8px",
37
+ "boxShadow": "0 1px 4px rgba(8,32,48,0.10)",
38
+ "position": "absolute",
39
+ "left": "50%",
40
+ "transform": "translateX(-50%)",
41
+ "whiteSpace": "nowrap",
42
+ "zIndex": 100,
43
+ }
44
+
45
+ if close:
46
+ style_top_1 = label_style.copy()
47
+ style_top_1["top"] = "-38px"
48
+ style_top_2 = label_style.copy()
49
+ style_top_2["top"] = "14px"
50
+ else:
51
+ style_top_1 = label_style.copy()
52
+ style_top_1["top"] = "14px"
53
+ style_top_2 = label_style.copy()
54
+ style_top_2["top"] = "14px"
55
+
56
+ return [
57
+ html.Div(pd.to_datetime(values[0], unit="s").strftime("%b %d, %Y"), style=style_top_1),
58
+ html.Div(pd.to_datetime(values[1], unit="s").strftime("%b %d, %Y"), style=style_top_2),
59
+ ]
60
+
61
+
62
+ def get_thumb_label_single(value):
63
+ """Generate thumb label for the all-time slider."""
64
+ label_style = {
65
+ "background": "#fff",
66
+ "color": "#082030",
67
+ "fontWeight": "bold",
68
+ "fontSize": "13px",
69
+ "borderRadius": "8px",
70
+ "padding": "2px 8px",
71
+ "boxShadow": "0 1px 4px rgba(8,32,48,0.10)",
72
+ "position": "absolute",
73
+ "left": "50%",
74
+ "transform": "translateX(-50%)",
75
+ "whiteSpace": "nowrap",
76
+ "zIndex": 100,
77
+ "top": "14px",
78
+ }
79
+ return [html.Div(pd.to_datetime(value, unit="s").strftime("%b %d, %Y"), style=label_style)]
80
+
81
+
82
+ def format_large_number(n: int) -> str:
83
+ """Shorten large numbers, e.g. 5,000,000 -> '5 million'."""
84
+ if n >= 1_000_000_000:
85
+ return f"{n / 1_000_000_000:.1f} billion"
86
+ if n >= 1_000_000:
87
+ return f"{n / 1_000_000:.1f} million"
88
+ if n >= 1_000:
89
+ return f"{n / 1_000:.1f}k"
90
+ return str(int(n))
layout_components.py ADDED
@@ -0,0 +1,603 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dash import html, dcc
2
+ import dash_mantine_components as dmc
3
+ from dash_iconify import DashIconify
4
+
5
+ from config import BUTTON_STYLE, DARK_BACKGROUND, PRIMARY_COLOR
6
+
7
+
8
+ def build_header(last_updated: str) -> html.Div:
9
+ """Top header with live badge and partner logos."""
10
+ return html.Div(
11
+ [
12
+ html.Div(
13
+ [
14
+ html.Span(
15
+ [
16
+ html.Span(className="live-dot"),
17
+ html.Span("LIVE", className="live-label"),
18
+ ],
19
+ className="live-row",
20
+ ),
21
+ html.Span(
22
+ f"Last updated: {last_updated}", className="last-updated"
23
+ ),
24
+ ],
25
+ className="header-status-row",
26
+ ),
27
+ html.Div(
28
+ [
29
+ html.A(
30
+ children=[
31
+ html.Img(
32
+ src="assets/images/dpi.svg",
33
+ className="header-logo-img",
34
+ ),
35
+ "Data Provenance Initiative",
36
+ ],
37
+ href="https://www.dataprovenance.org/",
38
+ target="_blank",
39
+ className="no-bg-link header-link",
40
+ ),
41
+ html.A(
42
+ children=[
43
+ html.Img(
44
+ src="assets/images/hf.svg",
45
+ className="header-logo-img",
46
+ ),
47
+ html.Span("Hugging Face", className="hf-brand-text"),
48
+ ],
49
+ href="https://huggingface.co/",
50
+ target="_blank",
51
+ className="no-bg-link header-link",
52
+ ),
53
+ html.A(
54
+ children=[html.Span("Read the paper", className="paper-text")],
55
+ href="https://arxiv.org/abs/2512.03073",
56
+ target="_blank",
57
+ className="no-bg-link header-link paper-link",
58
+ ),
59
+ ],
60
+ className="header-links-row",
61
+ ),
62
+ ],
63
+ style={
64
+ "display": "flex",
65
+ "justifyContent": "space-between",
66
+ "alignItems": "center",
67
+ "padding": "18px 24px",
68
+ "gap": "24px",
69
+ "backgroundColor": DARK_BACKGROUND,
70
+ },
71
+ className="responsive-header",
72
+ )
73
+
74
+
75
+ def build_range_slider(start_ts: int, end_ts: int, value, marks, thumb_children=None):
76
+ """Create the range slider used for time deltas."""
77
+ return dmc.RangeSlider(
78
+ id="time-slider",
79
+ min=start_ts,
80
+ max=end_ts,
81
+ value=value,
82
+ step=24 * 60 * 60,
83
+ color=PRIMARY_COLOR,
84
+ size="md",
85
+ radius="xl",
86
+ marks=marks,
87
+ style={"width": "95%", "paddingLeft": "60px"},
88
+ label=None,
89
+ showLabelOnHover=False,
90
+ labelTransitionProps={"transition": "fade", "duration": 150},
91
+ thumbChildren=thumb_children,
92
+ )
93
+
94
+
95
+ def build_single_slider(start_ts: int, end_ts: int, value, marks, thumb_children=None):
96
+ """Create the single-value slider used for all-time selection."""
97
+ return dmc.Slider(
98
+ id="time-slider-alltime",
99
+ min=start_ts,
100
+ max=end_ts,
101
+ value=value,
102
+ step=24 * 60 * 60,
103
+ color=PRIMARY_COLOR,
104
+ size="md",
105
+ radius="xl",
106
+ marks=marks,
107
+ style={"width": "95%", "paddingLeft": "60px"},
108
+ label=None,
109
+ showLabelOnHover=False,
110
+ labelTransitionProps={"transition": "fade", "duration": 150},
111
+ thumbChildren=thumb_children,
112
+ )
113
+
114
+
115
+ def build_alert_and_title() -> html.Div:
116
+ """Intro alert and title block."""
117
+ return html.Div(
118
+ children=[
119
+ dmc.Alert(
120
+ icon=DashIconify(
121
+ icon="mdi:information-outline",
122
+ width=18,
123
+ height=18,
124
+ style={"color": "#1A5F8D"},
125
+ ),
126
+ children=[
127
+ "Note: This dashboard uses ",
128
+ html.A(
129
+ "public Hugging Face",
130
+ href="https://huggingface.co/datasets/hfmlsoc/hub_weekly_snapshots",
131
+ target="_blank",
132
+ style={
133
+ "color": "#1A5F8D",
134
+ "fontWeight": "bold",
135
+ "textDecoration": "underline",
136
+ },
137
+ ),
138
+ " download data, which is less precise than data analyzed in the paper.",
139
+ ],
140
+ color="blue",
141
+ radius="md",
142
+ variant="light",
143
+ withCloseButton=True,
144
+ style={
145
+ "marginTop": "16px",
146
+ "marginBottom": "8px",
147
+ "fontSize": "15px",
148
+ "fontWeight": "500",
149
+ "marginLeft": "auto",
150
+ "marginRight": "auto",
151
+ },
152
+ ),
153
+ html.Span(
154
+ "The Open Model Leaderboard",
155
+ style={
156
+ "fontSize": 40,
157
+ "fontWeight": "700",
158
+ "textAlign": "center",
159
+ "marginTop": "20px",
160
+ "marginBottom": "20px",
161
+ },
162
+ ),
163
+ ],
164
+ style={
165
+ "display": "flex",
166
+ "flexDirection": "column",
167
+ "alignItems": "center",
168
+ "justifyContent": "center",
169
+ "gap": "12px",
170
+ "marginTop": "20px",
171
+ "marginBottom": "20px",
172
+ },
173
+ className="responsive-title-row",
174
+ )
175
+
176
+
177
+ def build_intro_paragraph() -> html.Div:
178
+ """Body intro paragraph under the title."""
179
+ return html.Div(
180
+ children=[
181
+ "This leaderboard assesses concentrations of power in the open model ecosystem through ranking user downloads across three groups: countries, developers, and models. Explore how user downloads of models are distributed among these groups and identify key players shaping the open model ecosystem on Hugging Face. This dashboard accompanies the paper titled ",
182
+ html.A(
183
+ "Economies of Open Intelligence: Tracing Power & Participation in the Model Ecosystem.",
184
+ href="https://arxiv.org/abs/2512.03073",
185
+ target="_blank",
186
+ style={
187
+ "color": PRIMARY_COLOR,
188
+ "fontWeight": "700",
189
+ "textDecoration": "underline",
190
+ },
191
+ ),
192
+ ],
193
+ style={
194
+ "fontSize": 14,
195
+ "marginTop": 18,
196
+ "marginBottom": 12,
197
+ "marginLeft": 100,
198
+ "marginRight": 100,
199
+ "textAlign": "center",
200
+ },
201
+ className="responsive-intro",
202
+ )
203
+
204
+
205
+ def build_filter_controls(time_slider_component) -> html.Div:
206
+ """Filter controls block with segmented controls and sliders."""
207
+ return html.Div(
208
+ children=[
209
+ html.Div(
210
+ [
211
+ html.Div(
212
+ html.Span(
213
+ [
214
+ "Download View",
215
+ dmc.HoverCard(
216
+ width=260,
217
+ shadow="md",
218
+ position="top",
219
+ children=[
220
+ dmc.HoverCardTarget(
221
+ html.Span(
222
+ DashIconify(
223
+ icon="mdi:information-outline",
224
+ width=16,
225
+ height=16,
226
+ style={
227
+ "marginLeft": "6px",
228
+ "color": PRIMARY_COLOR,
229
+ "verticalAlign": "middle",
230
+ },
231
+ ),
232
+ style={"cursor": "pointer"},
233
+ )
234
+ ),
235
+ dmc.HoverCardDropdown(
236
+ dmc.Text(
237
+ "We believe this filter isolates more authentic usage, mitigating the impact of automatic software downloads for older models.",
238
+ size="sm",
239
+ style={"maxWidth": "240px"},
240
+ )
241
+ ),
242
+ ],
243
+ ),
244
+ ],
245
+ className="filter-label-row",
246
+ ),
247
+ className="filter-label-container",
248
+ ),
249
+ html.Div(
250
+ [
251
+ dmc.SegmentedControl(
252
+ id="segmented",
253
+ value="all-downloads",
254
+ color=PRIMARY_COLOR,
255
+ transitionDuration=200,
256
+ data=[
257
+ {
258
+ "value": "all-downloads",
259
+ "label": "All Downloads",
260
+ },
261
+ {
262
+ "value": "filtered-downloads",
263
+ "label": html.Span(["Filtered Downloads"]),
264
+ },
265
+ ],
266
+ mb=10,
267
+ ),
268
+ ],
269
+ className="filter-segmented-row",
270
+ ),
271
+ html.Div(
272
+ "Choose whether to count all downloads, or only downloads up to one year from model creation.",
273
+ className="filter-description",
274
+ ),
275
+ html.Div(
276
+ [
277
+ html.Div("Model Attribution", className="filter-label"),
278
+ dmc.SegmentedControl(
279
+ id="model-attribution-segmented",
280
+ value="uploader",
281
+ color=PRIMARY_COLOR,
282
+ transitionDuration=200,
283
+ data=[
284
+ {"value": "uploader", "label": "Model Uploader"},
285
+ {
286
+ "value": "original_creator",
287
+ "label": "Original Model Creator",
288
+ },
289
+ ],
290
+ mb=10,
291
+ ),
292
+ html.Div(
293
+ "Toggle between having downloads attributed to the account that uploaded the model, or the account that uploaded the model that this was originally derived from.",
294
+ className="filter-description",
295
+ ),
296
+ ],
297
+ style={"marginTop": "10px"},
298
+ ),
299
+ html.Span(
300
+ id="global-toggle-status", className="global-toggle-status"
301
+ ),
302
+ ],
303
+ className="main-content-left",
304
+ ),
305
+ html.Div(
306
+ [
307
+ html.Div(
308
+ [
309
+ html.Span("Download Date Range", className="filter-label"),
310
+ dmc.HoverCard(
311
+ width=260,
312
+ shadow="md",
313
+ position="top",
314
+ children=[
315
+ dmc.HoverCardTarget(
316
+ html.Span(
317
+ DashIconify(
318
+ icon="mdi:information-outline",
319
+ width=16,
320
+ height=16,
321
+ style={
322
+ "marginLeft": "6px",
323
+ "color": PRIMARY_COLOR,
324
+ "verticalAlign": "middle",
325
+ },
326
+ ),
327
+ style={"cursor": "pointer"},
328
+ )
329
+ ),
330
+ dmc.HoverCardDropdown(
331
+ dmc.Text(
332
+ "Toggle between viewing downloads between a date range or all-time downloads at a single date.",
333
+ size="sm",
334
+ style={"maxWidth": "240px"},
335
+ )
336
+ ),
337
+ ],
338
+ ),
339
+ ],
340
+ className="filter-label-row",
341
+ ),
342
+ dmc.Switch(
343
+ id="time-range-toggle",
344
+ label="All-time",
345
+ checked=False,
346
+ color=PRIMARY_COLOR,
347
+ style={"marginBottom": "12px"},
348
+ ),
349
+ dcc.Loading(
350
+ id="loading-slider",
351
+ type="circle",
352
+ color=PRIMARY_COLOR,
353
+ children=html.Div(
354
+ id="slider-container", children=[time_slider_component]
355
+ ),
356
+ ),
357
+ html.Div(
358
+ id="slider-description",
359
+ children="Adjust the time range to filter leaderboard results by when models were downloaded by users.",
360
+ className="filter-description filter-description-margin",
361
+ ),
362
+ html.Div(
363
+ [
364
+ html.Div(
365
+ [
366
+ DashIconify(
367
+ icon="mdi:lightbulb-on-outline",
368
+ width=20,
369
+ height=20,
370
+ style={
371
+ "marginRight": "8px",
372
+ "color": DARK_BACKGROUND,
373
+ },
374
+ ),
375
+ html.Span("Tip"),
376
+ ],
377
+ className="tip-title",
378
+ ),
379
+ html.Div(
380
+ [
381
+ "Try switching between ",
382
+ html.Span(
383
+ "All Downloads", className="tip-highlight"
384
+ ),
385
+ " and ",
386
+ html.Span(
387
+ "Filtered Downloads", className="tip-highlight"
388
+ ),
389
+ " to compare net popularity (but many duplicate, unused downloads) versus more immediate interest as models are released. ",
390
+ "You can also toggle between ",
391
+ html.Span(
392
+ "Model Uploader", className="tip-highlight"
393
+ ),
394
+ " and ",
395
+ html.Span(
396
+ "Original Model Creator",
397
+ className="tip-highlight",
398
+ ),
399
+ " to see how attribution affects perceived popularity.",
400
+ ],
401
+ className="tip-description",
402
+ ),
403
+ ],
404
+ className="tip-section",
405
+ ),
406
+ ],
407
+ className="main-content-right",
408
+ ),
409
+ ],
410
+ style={
411
+ "display": "flex",
412
+ "gap": "24px",
413
+ "padding": "32px",
414
+ "alignItems": "flex-start",
415
+ "marginLeft": "100px",
416
+ "marginRight": "100px",
417
+ "backgroundColor": "#FFFBF9",
418
+ "borderRadius": "18px",
419
+ },
420
+ className="responsive-main-content",
421
+ )
422
+
423
+
424
+ def build_leaderboard_tabs() -> html.Div:
425
+ """Tabbed leaderboard section."""
426
+ return html.Div(
427
+ [
428
+ dcc.Tabs(
429
+ id="leaderboard-tabs",
430
+ value="Countries",
431
+ children=[
432
+ dcc.Tab(
433
+ label="Countries",
434
+ value="Countries",
435
+ style={
436
+ "backgroundColor": "transparent",
437
+ "border": "none",
438
+ "padding": "10px 18px",
439
+ "color": "#6B7280",
440
+ "fontWeight": "500",
441
+ },
442
+ selected_style={
443
+ "backgroundColor": "transparent",
444
+ "border": "none",
445
+ "padding": "10px 18px",
446
+ "fontWeight": "700",
447
+ "borderBottom": "3px solid #082030",
448
+ },
449
+ children=[
450
+ html.Div(
451
+ children=[
452
+ "The country leaderboard shows how downloads are distributed across different nations, highlighting which countries are leading in model usage and adoption. The metadata includes the ",
453
+ html.Span("country", className="meta-var"),
454
+ " and number of ",
455
+ html.Span("user downloads", className="meta-var"),
456
+ ".",
457
+ ],
458
+ className="tab-description",
459
+ ),
460
+ html.Div(
461
+ dcc.Loading(
462
+ id="loading-countries",
463
+ type="circle",
464
+ color=PRIMARY_COLOR,
465
+ children=html.Div(id="top_countries-table"),
466
+ ),
467
+ className="responsive-table-wrapper",
468
+ ),
469
+ html.Button(
470
+ id="top_countries-toggle",
471
+ children="▼ Show Top 50",
472
+ n_clicks=0,
473
+ style={**BUTTON_STYLE, "border": "none"},
474
+ ),
475
+ ],
476
+ ),
477
+ dcc.Tab(
478
+ label="Developers",
479
+ value="Developers",
480
+ style={
481
+ "backgroundColor": "transparent",
482
+ "border": "none",
483
+ "padding": "10px 18px",
484
+ "color": "#6B7280",
485
+ "fontWeight": "500",
486
+ },
487
+ selected_style={
488
+ "backgroundColor": "transparent",
489
+ "border": "none",
490
+ "padding": "10px 18px",
491
+ "fontWeight": "700",
492
+ "borderBottom": "3px solid #082030",
493
+ },
494
+ children=[
495
+ html.Div(
496
+ children=[
497
+ "The developer leaderboard highlights the most influential model creators on Hugging Face, showcasing which developers have garnered the highest download counts for their models. The metadata includes the ",
498
+ html.Span("developer", className="meta-var"),
499
+ ", number of ",
500
+ html.Span("user downloads", className="meta-var"),
501
+ ", and ",
502
+ html.Span("country", className="meta-var"),
503
+ ".",
504
+ ],
505
+ className="tab-description",
506
+ ),
507
+ html.Div(
508
+ dcc.Loading(
509
+ id="loading-developers",
510
+ type="circle",
511
+ color=PRIMARY_COLOR,
512
+ children=html.Div(id="top_developers-table"),
513
+ ),
514
+ className="responsive-table-wrapper",
515
+ ),
516
+ html.Button(
517
+ id="top_developers-toggle",
518
+ children="▼ Show Top 50",
519
+ n_clicks=0,
520
+ style={**BUTTON_STYLE, "border": "none"},
521
+ ),
522
+ ],
523
+ ),
524
+ dcc.Tab(
525
+ label="Models",
526
+ value="Models",
527
+ style={
528
+ "backgroundColor": "transparent",
529
+ "border": "none",
530
+ "padding": "10px 18px",
531
+ "color": "#6B7280",
532
+ "fontWeight": "500",
533
+ },
534
+ selected_style={
535
+ "backgroundColor": "transparent",
536
+ "border": "none",
537
+ "padding": "10px 18px",
538
+ "fontWeight": "700",
539
+ "borderBottom": "3px solid #082030",
540
+ },
541
+ children=[
542
+ html.Div(
543
+ children=[
544
+ "The model leaderboard ranks individual models based on their download counts, revealing which models are most popular among users on Hugging Face. The metadata includes the ",
545
+ html.Span("model name", className="meta-var"),
546
+ ", number of ",
547
+ html.Span("user downloads", className="meta-var"),
548
+ ", ",
549
+ html.Span("developer", className="meta-var"),
550
+ ", and ",
551
+ html.Span("modality", className="meta-var"),
552
+ " (the input and output types of the model).",
553
+ ],
554
+ className="tab-description",
555
+ ),
556
+ html.Div(
557
+ dcc.Loading(
558
+ id="loading-models",
559
+ type="circle",
560
+ color=PRIMARY_COLOR,
561
+ children=html.Div(id="top_models-table"),
562
+ ),
563
+ className="responsive-table-wrapper",
564
+ ),
565
+ html.Button(
566
+ id="top_models-toggle",
567
+ children="▼ Show Top 50",
568
+ n_clicks=0,
569
+ style={**BUTTON_STYLE, "border": "none"},
570
+ ),
571
+ ],
572
+ ),
573
+ ],
574
+ ),
575
+ ],
576
+ style={
577
+ "borderRadius": "18px",
578
+ "padding": "32px",
579
+ "marginTop": "12px",
580
+ "marginBottom": "12px",
581
+ "marginLeft": "50px",
582
+ "marginRight": "50px",
583
+ },
584
+ className="responsive-tabs",
585
+ )
586
+
587
+
588
+ def build_main_layout(last_updated: str, time_slider_component) -> html.Div:
589
+ """Full page layout assembled from smaller sections."""
590
+ return html.Div(
591
+ [
592
+ build_header(last_updated),
593
+ build_alert_and_title(),
594
+ build_intro_paragraph(),
595
+ build_filter_controls(time_slider_component),
596
+ build_leaderboard_tabs(),
597
+ ],
598
+ style={
599
+ "fontFamily": "Inter",
600
+ "backgroundColor": "#ffffff",
601
+ "minHeight": "100vh",
602
+ },
603
+ )