Spaces:
Sleeping
Sleeping
cleanup app.py into modularized components
Browse files- app.py +60 -1011
- config.py +56 -0
- data_utils.py +237 -0
- graphs/leaderboard.py +156 -293
- helpers.py +90 -0
- layout_components.py +603 -0
app.py
CHANGED
|
@@ -1,94 +1,26 @@
|
|
| 1 |
from dash import Dash, html, dcc, Input, Output, State
|
| 2 |
import pandas as pd
|
| 3 |
import dash_mantine_components as dmc
|
| 4 |
-
import duckdb
|
| 5 |
import time
|
|
|
|
|
|
|
|
|
|
| 6 |
from graphs.leaderboard import (
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
| 11 |
)
|
| 12 |
-
from dash_iconify import DashIconify
|
| 13 |
|
| 14 |
# Initialize the app
|
| 15 |
app = Dash(suppress_callback_exceptions=True) # suppress callback exceptions for multi-page layout
|
| 16 |
server = app.server
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
HF_DATASET_ID = "mmpr/open_model_evolution_data"
|
| 21 |
-
hf_parquet_url_1 = "https://huggingface.co/datasets/emsesc/open_model_evolution_data/resolve/main/all_downloads_with_annotations.parquet"
|
| 22 |
-
hf_parquet_url_2 = "https://huggingface.co/datasets/emsesc/open_model_evolution_data/resolve/main/one_year_rolling.parquet"
|
| 23 |
-
|
| 24 |
-
# Helper: create a fresh in-memory DuckDB connection and (re)create parquet-backed views.
|
| 25 |
-
def create_fresh_duckdb_with_views():
|
| 26 |
-
"""
|
| 27 |
-
Returns a fresh in-memory DuckDB connection with httpfs enabled and the
|
| 28 |
-
all_downloads / one_year_rolling views created from the remote parquet URLs.
|
| 29 |
-
Caller must close the returned connection.
|
| 30 |
-
"""
|
| 31 |
-
local_con = duckdb.connect(database=":memory:", read_only=False)
|
| 32 |
-
try:
|
| 33 |
-
# try to install/load httpfs if necessary; ignore errors if preinstalled
|
| 34 |
-
try:
|
| 35 |
-
local_con.execute("INSTALL httpfs;")
|
| 36 |
-
local_con.execute("LOAD httpfs;")
|
| 37 |
-
except Exception:
|
| 38 |
-
pass
|
| 39 |
-
|
| 40 |
-
# keep HF Spaces behavior consistent
|
| 41 |
-
try:
|
| 42 |
-
local_con.execute("SET enable_http_metadata_cache = false;")
|
| 43 |
-
local_con.execute("SET enable_object_cache = false;")
|
| 44 |
-
except Exception:
|
| 45 |
-
pass
|
| 46 |
-
|
| 47 |
-
# create views referencing remote parquet files
|
| 48 |
-
local_con.execute(f"""
|
| 49 |
-
CREATE OR REPLACE VIEW all_downloads AS
|
| 50 |
-
SELECT * FROM read_parquet('{hf_parquet_url_1}')
|
| 51 |
-
""")
|
| 52 |
-
local_con.execute(f"""
|
| 53 |
-
CREATE OR REPLACE VIEW one_year_rolling AS
|
| 54 |
-
SELECT * FROM read_parquet('{hf_parquet_url_2}')
|
| 55 |
-
""")
|
| 56 |
-
except Exception:
|
| 57 |
-
# If view creation fails, ensure connection is still returned for caller to handle/close
|
| 58 |
-
pass
|
| 59 |
-
return local_con
|
| 60 |
-
|
| 61 |
-
# Query for most recent date in all_downloads
|
| 62 |
-
def get_last_updated():
|
| 63 |
-
try:
|
| 64 |
-
conn = create_fresh_duckdb_with_views()
|
| 65 |
-
try:
|
| 66 |
-
result = conn.execute("SELECT MAX(time) as max_time FROM all_downloads").fetchdf()
|
| 67 |
-
finally:
|
| 68 |
-
conn.close()
|
| 69 |
-
|
| 70 |
-
max_time = result["max_time"].iloc[0]
|
| 71 |
-
if pd.isnull(max_time):
|
| 72 |
-
return "N/A"
|
| 73 |
-
dt = pd.to_datetime(max_time)
|
| 74 |
-
return dt.strftime("%b %d, %Y")
|
| 75 |
-
except Exception:
|
| 76 |
-
return "N/A"
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
# DuckDB connection (global)
|
| 80 |
-
con = duckdb.connect(database=":memory:", read_only=False)
|
| 81 |
-
|
| 82 |
-
# disable all caching so HF Spaces always read latest parquet
|
| 83 |
-
con.execute("SET enable_http_metadata_cache = false;")
|
| 84 |
-
con.execute("SET enable_object_cache = false;")
|
| 85 |
-
|
| 86 |
-
# Load parquet files from Hugging Face using DuckDB
|
| 87 |
-
HF_DATASET_ID = "mmpr/open_model_evolution_data"
|
| 88 |
-
hf_parquet_url_1 = "https://huggingface.co/datasets/mmpr/open_model_evolution_data/resolve/main/all_downloads_with_annotations.parquet"
|
| 89 |
-
hf_parquet_url_2 = "https://huggingface.co/datasets/mmpr/open_model_evolution_data/resolve/main/one_year_rolling.parquet"
|
| 90 |
-
|
| 91 |
-
print(f"Attempting to connect to dataset from Hugging Face Hub: {HF_DATASET_ID}")
|
| 92 |
try:
|
| 93 |
overall_start_time = time.time()
|
| 94 |
|
|
@@ -111,153 +43,27 @@ except Exception as e:
|
|
| 111 |
# Create a dcc slider for time range selection by year (readable marks)
|
| 112 |
start_ts = int(start_dt.timestamp())
|
| 113 |
end_ts = int(end_dt.timestamp())
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
return f"{n}{suffix}"
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
def format_date(dt):
|
| 126 |
-
# Format date as "Oct 8th, 2025"
|
| 127 |
-
return dt.strftime("%b") + f" {ordinal(dt.day)}, {dt.year}"
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
marks = []
|
| 131 |
-
# Add start label (e.g. "Jan 2020")
|
| 132 |
-
marks.append({"value": start_ts, "label": start_dt.strftime("%b %Y")})
|
| 133 |
-
# Add yearly marks between start and end (e.g. "2021", "2022")
|
| 134 |
-
# for yr in range(start_dt.year, end_dt.year + 1):
|
| 135 |
-
# yr_ts = int(pd.Timestamp(year=yr, month=1, day=1).timestamp())
|
| 136 |
-
# start_yr = int(pd.Timestamp(year=start_dt.year, month=1, day=1).timestamp())
|
| 137 |
-
# if yr_ts != start_yr and yr_ts != end_ts:
|
| 138 |
-
# marks.append({"value": yr_ts, "label": str(yr)})
|
| 139 |
-
# Add end label (e.g. "Dec 2024")
|
| 140 |
-
marks.append({"value": end_ts, "label": end_dt.strftime("%b %Y")})
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
def get_thumb_labels(values):
|
| 144 |
-
# Returns formatted labels for both thumbs
|
| 145 |
-
distance = abs(values[1] - values[0])
|
| 146 |
-
close = distance < 4 * 30 * 86400 # 4 months
|
| 147 |
-
|
| 148 |
-
label_style = {
|
| 149 |
-
"background": "#fff",
|
| 150 |
-
"color": "#082030",
|
| 151 |
-
"fontWeight": "bold",
|
| 152 |
-
"fontSize": "13px",
|
| 153 |
-
"borderRadius": "8px",
|
| 154 |
-
"padding": "2px 8px",
|
| 155 |
-
"boxShadow": "0 1px 4px rgba(8,32,48,0.10)",
|
| 156 |
-
"position": "absolute",
|
| 157 |
-
"left": "50%",
|
| 158 |
-
"transform": "translateX(-50%)",
|
| 159 |
-
"whiteSpace": "nowrap",
|
| 160 |
-
"zIndex": 100,
|
| 161 |
-
}
|
| 162 |
-
|
| 163 |
-
if close:
|
| 164 |
-
# Move first label above, second label below (closer to slider)
|
| 165 |
-
style_top_1 = label_style.copy()
|
| 166 |
-
style_top_1["top"] = "-38px"
|
| 167 |
-
style_top_2 = label_style.copy()
|
| 168 |
-
style_top_2["top"] = "14px"
|
| 169 |
-
return [
|
| 170 |
-
html.Div(
|
| 171 |
-
format_date(pd.to_datetime(values[0], unit="s")),
|
| 172 |
-
style=style_top_1,
|
| 173 |
-
),
|
| 174 |
-
html.Div(
|
| 175 |
-
format_date(pd.to_datetime(values[1], unit="s")),
|
| 176 |
-
style=style_top_2,
|
| 177 |
-
),
|
| 178 |
-
]
|
| 179 |
-
else:
|
| 180 |
-
# Both labels below the slider (closer to slider)
|
| 181 |
-
style_top_1 = label_style.copy()
|
| 182 |
-
style_top_1["top"] = "14px"
|
| 183 |
-
style_top_2 = label_style.copy()
|
| 184 |
-
style_top_2["top"] = "14px"
|
| 185 |
-
return [
|
| 186 |
-
html.Div(
|
| 187 |
-
format_date(pd.to_datetime(values[0], unit="s")),
|
| 188 |
-
style=style_top_1,
|
| 189 |
-
),
|
| 190 |
-
html.Div(
|
| 191 |
-
format_date(pd.to_datetime(values[1], unit="s")),
|
| 192 |
-
style=style_top_2,
|
| 193 |
-
),
|
| 194 |
-
]
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
# Create a dcc slider for time range selection by year
|
| 198 |
-
time_slider = dmc.RangeSlider(
|
| 199 |
-
id="time-slider",
|
| 200 |
-
min=start_ts,
|
| 201 |
-
max=end_ts,
|
| 202 |
-
value=[
|
| 203 |
-
start_ts,
|
| 204 |
-
end_ts,
|
| 205 |
-
],
|
| 206 |
-
step=24 * 60 * 60,
|
| 207 |
-
color="#AC482A",
|
| 208 |
-
size="md",
|
| 209 |
-
radius="xl",
|
| 210 |
-
marks=marks,
|
| 211 |
-
style={"width": "95%", "paddingLeft": "60px"}, # updated paddingLeft
|
| 212 |
-
label=None,
|
| 213 |
-
showLabelOnHover=False,
|
| 214 |
-
labelTransitionProps={"transition": "fade", "duration": 150},
|
| 215 |
-
thumbChildren=get_thumb_labels([start_ts, end_ts]),
|
| 216 |
)
|
| 217 |
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
"fontWeight": "bold",
|
| 225 |
-
"fontSize": "13px",
|
| 226 |
-
"borderRadius": "8px",
|
| 227 |
-
"padding": "2px 8px",
|
| 228 |
-
"boxShadow": "0 1px 4px rgba(8,32,48,0.10)",
|
| 229 |
-
"position": "absolute",
|
| 230 |
-
"left": "50%",
|
| 231 |
-
"transform": "translateX(-50%)",
|
| 232 |
-
"whiteSpace": "nowrap",
|
| 233 |
-
"zIndex": 100,
|
| 234 |
-
"top": "14px",
|
| 235 |
-
}
|
| 236 |
-
return [
|
| 237 |
-
html.Div(
|
| 238 |
-
format_date(pd.to_datetime(value, unit="s")),
|
| 239 |
-
style=label_style,
|
| 240 |
-
)
|
| 241 |
-
]
|
| 242 |
-
|
| 243 |
-
time_slider_alltime = dmc.Slider(
|
| 244 |
-
id="time-slider-alltime",
|
| 245 |
-
min=start_ts,
|
| 246 |
-
max=end_ts,
|
| 247 |
-
value=end_ts,
|
| 248 |
-
step=24 * 60 * 60,
|
| 249 |
-
color="#AC482A",
|
| 250 |
-
size="md",
|
| 251 |
-
radius="xl",
|
| 252 |
-
marks=marks,
|
| 253 |
-
style={"width": "95%", "paddingLeft": "60px"},
|
| 254 |
-
label=None,
|
| 255 |
-
showLabelOnHover=False,
|
| 256 |
-
labelTransitionProps={"transition": "fade", "duration": 150},
|
| 257 |
-
thumbChildren=get_thumb_label_single(end_ts),
|
| 258 |
)
|
| 259 |
|
| 260 |
-
|
|
|
|
|
|
|
| 261 |
app.layout = dmc.MantineProvider(
|
| 262 |
theme={
|
| 263 |
"colorScheme": "light",
|
|
@@ -269,748 +75,15 @@ app.layout = dmc.MantineProvider(
|
|
| 269 |
dcc.Store(id="model-attribution-type", data="uploader"),
|
| 270 |
dcc.Store(id="time-slider-value", data=[start_ts, end_ts]),
|
| 271 |
dcc.Store(id="time-slider-alltime-value", data=end_ts),
|
| 272 |
-
|
| 273 |
-
[
|
| 274 |
-
# Header
|
| 275 |
-
html.Div(
|
| 276 |
-
[
|
| 277 |
-
html.Div(
|
| 278 |
-
[
|
| 279 |
-
html.Span(
|
| 280 |
-
[
|
| 281 |
-
html.Span(
|
| 282 |
-
className="live-dot",
|
| 283 |
-
),
|
| 284 |
-
html.Span(
|
| 285 |
-
"LIVE",
|
| 286 |
-
className="live-label",
|
| 287 |
-
),
|
| 288 |
-
],
|
| 289 |
-
className="live-row",
|
| 290 |
-
),
|
| 291 |
-
html.Span(
|
| 292 |
-
f"Last updated: {get_last_updated()}",
|
| 293 |
-
className="last-updated",
|
| 294 |
-
),
|
| 295 |
-
],
|
| 296 |
-
className="header-status-row",
|
| 297 |
-
),
|
| 298 |
-
html.Div(
|
| 299 |
-
[
|
| 300 |
-
html.A(
|
| 301 |
-
children=[
|
| 302 |
-
html.Img(
|
| 303 |
-
src="assets/images/dpi.svg",
|
| 304 |
-
className="header-logo-img",
|
| 305 |
-
),
|
| 306 |
-
"Data Provenance Initiative",
|
| 307 |
-
],
|
| 308 |
-
href="https://www.dataprovenance.org/",
|
| 309 |
-
target="_blank",
|
| 310 |
-
className="no-bg-link header-link",
|
| 311 |
-
),
|
| 312 |
-
html.A(
|
| 313 |
-
children=[
|
| 314 |
-
html.Img(
|
| 315 |
-
src="assets/images/hf.svg",
|
| 316 |
-
className="header-logo-img",
|
| 317 |
-
),
|
| 318 |
-
html.Span(
|
| 319 |
-
"Hugging Face",
|
| 320 |
-
className="hf-brand-text",
|
| 321 |
-
),
|
| 322 |
-
],
|
| 323 |
-
href="https://huggingface.co/",
|
| 324 |
-
target="_blank",
|
| 325 |
-
className="no-bg-link header-link",
|
| 326 |
-
),
|
| 327 |
-
html.A(
|
| 328 |
-
children=[
|
| 329 |
-
html.Span(
|
| 330 |
-
"Read the paper",
|
| 331 |
-
className="paper-text",
|
| 332 |
-
),
|
| 333 |
-
],
|
| 334 |
-
href="https://arxiv.org/abs/2512.03073",
|
| 335 |
-
target="_blank",
|
| 336 |
-
className="no-bg-link header-link paper-link",
|
| 337 |
-
),
|
| 338 |
-
],
|
| 339 |
-
className="header-links-row",
|
| 340 |
-
),
|
| 341 |
-
],
|
| 342 |
-
style={
|
| 343 |
-
"display": "flex",
|
| 344 |
-
"justifyContent": "space-between",
|
| 345 |
-
"alignItems": "center",
|
| 346 |
-
"padding": "18px 24px",
|
| 347 |
-
"gap": "24px",
|
| 348 |
-
"backgroundColor": "#082030", # restored dark background
|
| 349 |
-
},
|
| 350 |
-
className="responsive-header", # <-- add class
|
| 351 |
-
),
|
| 352 |
-
html.Div(
|
| 353 |
-
children=[
|
| 354 |
-
dmc.Alert(
|
| 355 |
-
# add an icon to the alert
|
| 356 |
-
icon=DashIconify(
|
| 357 |
-
icon="mdi:information-outline",
|
| 358 |
-
width=18,
|
| 359 |
-
height=18,
|
| 360 |
-
style={"color": "#1A5F8D"},
|
| 361 |
-
),
|
| 362 |
-
children=[
|
| 363 |
-
"Note: This dashboard uses ",
|
| 364 |
-
html.A(
|
| 365 |
-
"public Hugging Face",
|
| 366 |
-
href="https://huggingface.co/datasets/hfmlsoc/hub_weekly_snapshots",
|
| 367 |
-
target="_blank",
|
| 368 |
-
style={
|
| 369 |
-
"color": "#1A5F8D",
|
| 370 |
-
"fontWeight": "bold",
|
| 371 |
-
"textDecoration": "underline",
|
| 372 |
-
},
|
| 373 |
-
),
|
| 374 |
-
" download data, which is less precise than data analyzed in the paper.",
|
| 375 |
-
],
|
| 376 |
-
color="blue",
|
| 377 |
-
radius="md",
|
| 378 |
-
variant="light",
|
| 379 |
-
withCloseButton=True,
|
| 380 |
-
style={
|
| 381 |
-
"marginTop": "16px",
|
| 382 |
-
"marginBottom": "8px",
|
| 383 |
-
"fontSize": "15px",
|
| 384 |
-
"fontWeight": "500",
|
| 385 |
-
"marginLeft": "auto",
|
| 386 |
-
"marginRight": "auto",
|
| 387 |
-
},
|
| 388 |
-
),
|
| 389 |
-
html.Span(
|
| 390 |
-
"The Open Model Leaderboard",
|
| 391 |
-
style={
|
| 392 |
-
"fontSize": 40,
|
| 393 |
-
"fontWeight": "700",
|
| 394 |
-
"textAlign": "center",
|
| 395 |
-
"marginTop": "20px",
|
| 396 |
-
"marginBottom": "20px",
|
| 397 |
-
},
|
| 398 |
-
),
|
| 399 |
-
],
|
| 400 |
-
style={
|
| 401 |
-
"display": "flex",
|
| 402 |
-
"flexDirection": "column",
|
| 403 |
-
"alignItems": "center",
|
| 404 |
-
"justifyContent": "center",
|
| 405 |
-
"gap": "12px",
|
| 406 |
-
"marginTop": "20px",
|
| 407 |
-
"marginBottom": "20px",
|
| 408 |
-
},
|
| 409 |
-
className="responsive-title-row", # <-- add class
|
| 410 |
-
),
|
| 411 |
-
html.Div(
|
| 412 |
-
children=[
|
| 413 |
-
"This leaderboard assesses concentrations of power in the open model ecosystem through ranking user downloads across three groups: countries, developers, and models. Explore how user downloads of models are distributed among these groups and identify key players shaping the open model ecosystem on Hugging Face. This dashboard accompanies the paper titled ",
|
| 414 |
-
html.A(
|
| 415 |
-
"Economies of Open Intelligence: Tracing Power & Participation in the Model Ecosystem.",
|
| 416 |
-
href="https://arxiv.org/abs/2512.03073",
|
| 417 |
-
target="_blank",
|
| 418 |
-
style={
|
| 419 |
-
"color": "#AC482A",
|
| 420 |
-
"fontWeight": "700",
|
| 421 |
-
"textDecoration": "underline",
|
| 422 |
-
},
|
| 423 |
-
),
|
| 424 |
-
],
|
| 425 |
-
style={
|
| 426 |
-
"fontSize": 14,
|
| 427 |
-
"marginTop": 18,
|
| 428 |
-
"marginBottom": 12,
|
| 429 |
-
"marginLeft": 100,
|
| 430 |
-
"marginRight": 100,
|
| 431 |
-
"textAlign": "center",
|
| 432 |
-
},
|
| 433 |
-
className="responsive-intro", # <-- add class
|
| 434 |
-
),
|
| 435 |
-
html.Div(
|
| 436 |
-
children=[
|
| 437 |
-
html.Div(
|
| 438 |
-
[
|
| 439 |
-
html.Div(
|
| 440 |
-
html.Span(
|
| 441 |
-
[
|
| 442 |
-
"Download View",
|
| 443 |
-
dmc.HoverCard(
|
| 444 |
-
width=260,
|
| 445 |
-
shadow="md",
|
| 446 |
-
position="top",
|
| 447 |
-
children=[
|
| 448 |
-
dmc.HoverCardTarget(
|
| 449 |
-
html.Span(
|
| 450 |
-
DashIconify(
|
| 451 |
-
icon="mdi:information-outline",
|
| 452 |
-
width=16,
|
| 453 |
-
height=16,
|
| 454 |
-
style={
|
| 455 |
-
"marginLeft": "6px",
|
| 456 |
-
"color": "#AC482A",
|
| 457 |
-
"verticalAlign": "middle",
|
| 458 |
-
},
|
| 459 |
-
),
|
| 460 |
-
style={"cursor": "pointer"},
|
| 461 |
-
)
|
| 462 |
-
),
|
| 463 |
-
dmc.HoverCardDropdown(
|
| 464 |
-
dmc.Text(
|
| 465 |
-
"We believe this filter isolates more authentic usage, mitigating the impact of automatic software downloads for older models.",
|
| 466 |
-
size="sm",
|
| 467 |
-
style={"maxWidth": "240px"},
|
| 468 |
-
)
|
| 469 |
-
),
|
| 470 |
-
],
|
| 471 |
-
),
|
| 472 |
-
],
|
| 473 |
-
className="filter-label-row",
|
| 474 |
-
),
|
| 475 |
-
className="filter-label-container",
|
| 476 |
-
),
|
| 477 |
-
html.Div(
|
| 478 |
-
[
|
| 479 |
-
dmc.SegmentedControl(
|
| 480 |
-
id="segmented",
|
| 481 |
-
value="all-downloads",
|
| 482 |
-
color="#AC482A",
|
| 483 |
-
transitionDuration=200,
|
| 484 |
-
data=[
|
| 485 |
-
{
|
| 486 |
-
"value": "all-downloads",
|
| 487 |
-
"label": "All Downloads",
|
| 488 |
-
},
|
| 489 |
-
{
|
| 490 |
-
"value": "filtered-downloads",
|
| 491 |
-
"label": html.Span(
|
| 492 |
-
["Filtered Downloads"]
|
| 493 |
-
),
|
| 494 |
-
},
|
| 495 |
-
],
|
| 496 |
-
mb=10,
|
| 497 |
-
),
|
| 498 |
-
],
|
| 499 |
-
className="filter-segmented-row",
|
| 500 |
-
),
|
| 501 |
-
html.Div(
|
| 502 |
-
"Choose whether to count all downloads, or only downloads up to one year from model creation.",
|
| 503 |
-
className="filter-description",
|
| 504 |
-
),
|
| 505 |
-
html.Div(
|
| 506 |
-
[
|
| 507 |
-
html.Div(
|
| 508 |
-
"Model Attribution",
|
| 509 |
-
className="filter-label",
|
| 510 |
-
),
|
| 511 |
-
dmc.SegmentedControl(
|
| 512 |
-
id="model-attribution-segmented",
|
| 513 |
-
value="uploader",
|
| 514 |
-
color="#AC482A",
|
| 515 |
-
transitionDuration=200,
|
| 516 |
-
data=[
|
| 517 |
-
{
|
| 518 |
-
"value": "uploader",
|
| 519 |
-
"label": "Model Uploader",
|
| 520 |
-
},
|
| 521 |
-
{
|
| 522 |
-
"value": "original_creator",
|
| 523 |
-
"label": "Original Model Creator",
|
| 524 |
-
},
|
| 525 |
-
],
|
| 526 |
-
mb=10,
|
| 527 |
-
),
|
| 528 |
-
html.Div(
|
| 529 |
-
"Toggle between having downloads attributed to the account that uploaded the model, or the account that uploaded the model that this was originally derived from.",
|
| 530 |
-
className="filter-description",
|
| 531 |
-
),
|
| 532 |
-
],
|
| 533 |
-
style={"marginTop": "10px"},
|
| 534 |
-
),
|
| 535 |
-
html.Span(
|
| 536 |
-
id="global-toggle-status",
|
| 537 |
-
className="global-toggle-status",
|
| 538 |
-
),
|
| 539 |
-
],
|
| 540 |
-
className="main-content-left",
|
| 541 |
-
),
|
| 542 |
-
html.Div(
|
| 543 |
-
[
|
| 544 |
-
html.Div(
|
| 545 |
-
[
|
| 546 |
-
html.Span(
|
| 547 |
-
"Download Date Range",
|
| 548 |
-
className="filter-label",
|
| 549 |
-
),
|
| 550 |
-
dmc.HoverCard(
|
| 551 |
-
width=260,
|
| 552 |
-
shadow="md",
|
| 553 |
-
position="top",
|
| 554 |
-
children=[
|
| 555 |
-
dmc.HoverCardTarget(
|
| 556 |
-
html.Span(
|
| 557 |
-
DashIconify(
|
| 558 |
-
icon="mdi:information-outline",
|
| 559 |
-
width=16,
|
| 560 |
-
height=16,
|
| 561 |
-
style={
|
| 562 |
-
"marginLeft": "6px",
|
| 563 |
-
"color": "#AC482A",
|
| 564 |
-
"verticalAlign": "middle",
|
| 565 |
-
},
|
| 566 |
-
),
|
| 567 |
-
style={"cursor": "pointer"},
|
| 568 |
-
)
|
| 569 |
-
),
|
| 570 |
-
dmc.HoverCardDropdown(
|
| 571 |
-
dmc.Text(
|
| 572 |
-
"Toggle between viewing downloads between a date range or all-time downloads at a single date.",
|
| 573 |
-
size="sm",
|
| 574 |
-
style={"maxWidth": "240px"},
|
| 575 |
-
)
|
| 576 |
-
),
|
| 577 |
-
],
|
| 578 |
-
),
|
| 579 |
-
],
|
| 580 |
-
className="filter-label-row",
|
| 581 |
-
),
|
| 582 |
-
dmc.Switch(
|
| 583 |
-
id="time-range-toggle",
|
| 584 |
-
label="All-time",
|
| 585 |
-
checked=False,
|
| 586 |
-
color="#AC482A",
|
| 587 |
-
style={"marginBottom": "12px"},
|
| 588 |
-
),
|
| 589 |
-
dcc.Loading(
|
| 590 |
-
id="loading-slider",
|
| 591 |
-
type="circle",
|
| 592 |
-
color="#AC482A",
|
| 593 |
-
children=html.Div(
|
| 594 |
-
id="slider-container",
|
| 595 |
-
children=[time_slider],
|
| 596 |
-
),
|
| 597 |
-
),
|
| 598 |
-
html.Div(
|
| 599 |
-
id="slider-description",
|
| 600 |
-
children="Adjust the time range to filter leaderboard results by when models were downloaded by users.",
|
| 601 |
-
className="filter-description filter-description-margin",
|
| 602 |
-
),
|
| 603 |
-
html.Div(
|
| 604 |
-
[
|
| 605 |
-
html.Div(
|
| 606 |
-
[
|
| 607 |
-
DashIconify(
|
| 608 |
-
icon="mdi:lightbulb-on-outline",
|
| 609 |
-
width=20,
|
| 610 |
-
height=20,
|
| 611 |
-
style={
|
| 612 |
-
"marginRight": "8px",
|
| 613 |
-
"color": "#082030",
|
| 614 |
-
},
|
| 615 |
-
),
|
| 616 |
-
html.Span("Tip"),
|
| 617 |
-
],
|
| 618 |
-
className="tip-title",
|
| 619 |
-
),
|
| 620 |
-
html.Div(
|
| 621 |
-
[
|
| 622 |
-
"Try switching between ",
|
| 623 |
-
html.Span(
|
| 624 |
-
"All Downloads",
|
| 625 |
-
className="tip-highlight",
|
| 626 |
-
),
|
| 627 |
-
" and ",
|
| 628 |
-
html.Span(
|
| 629 |
-
"Filtered Downloads",
|
| 630 |
-
className="tip-highlight",
|
| 631 |
-
),
|
| 632 |
-
" to compare net popularity (but many duplicate, unused downloads) versus more immediate interest as models are released. ",
|
| 633 |
-
"You can also toggle between ",
|
| 634 |
-
html.Span(
|
| 635 |
-
"Model Uploader",
|
| 636 |
-
className="tip-highlight",
|
| 637 |
-
),
|
| 638 |
-
" and ",
|
| 639 |
-
html.Span(
|
| 640 |
-
"Original Model Creator",
|
| 641 |
-
className="tip-highlight",
|
| 642 |
-
),
|
| 643 |
-
" to see how attribution affects perceived popularity.",
|
| 644 |
-
],
|
| 645 |
-
className="tip-description",
|
| 646 |
-
),
|
| 647 |
-
],
|
| 648 |
-
className="tip-section",
|
| 649 |
-
),
|
| 650 |
-
],
|
| 651 |
-
className="main-content-right",
|
| 652 |
-
),
|
| 653 |
-
],
|
| 654 |
-
style={
|
| 655 |
-
"display": "flex",
|
| 656 |
-
"gap": "24px",
|
| 657 |
-
"padding": "32px",
|
| 658 |
-
"alignItems": "flex-start",
|
| 659 |
-
"marginLeft": "100px",
|
| 660 |
-
"marginRight": "100px",
|
| 661 |
-
"backgroundColor": "#FFFBF9",
|
| 662 |
-
"borderRadius": "18px",
|
| 663 |
-
},
|
| 664 |
-
className="responsive-main-content", # <-- add class
|
| 665 |
-
),
|
| 666 |
-
html.Div(
|
| 667 |
-
[
|
| 668 |
-
dcc.Tabs(
|
| 669 |
-
id="leaderboard-tabs",
|
| 670 |
-
value="Countries",
|
| 671 |
-
children=[
|
| 672 |
-
dcc.Tab(
|
| 673 |
-
label="Countries",
|
| 674 |
-
value="Countries",
|
| 675 |
-
style={
|
| 676 |
-
"backgroundColor": "transparent",
|
| 677 |
-
"border": "none",
|
| 678 |
-
"padding": "10px 18px",
|
| 679 |
-
"color": "#6B7280",
|
| 680 |
-
"fontWeight": "500",
|
| 681 |
-
},
|
| 682 |
-
selected_style={
|
| 683 |
-
"backgroundColor": "transparent",
|
| 684 |
-
"border": "none",
|
| 685 |
-
"padding": "10px 18px",
|
| 686 |
-
"fontWeight": "700",
|
| 687 |
-
"borderBottom": "3px solid #082030",
|
| 688 |
-
},
|
| 689 |
-
children=[
|
| 690 |
-
html.Div(
|
| 691 |
-
children=[
|
| 692 |
-
"The country leaderboard shows how downloads are distributed across different nations, highlighting which countries are leading in model usage and adoption. The metadata includes the ",
|
| 693 |
-
html.Span(
|
| 694 |
-
"country", className="meta-var"
|
| 695 |
-
),
|
| 696 |
-
" and number of ",
|
| 697 |
-
html.Span(
|
| 698 |
-
"user downloads",
|
| 699 |
-
className="meta-var",
|
| 700 |
-
),
|
| 701 |
-
".",
|
| 702 |
-
],
|
| 703 |
-
className="tab-description",
|
| 704 |
-
),
|
| 705 |
-
html.Div(
|
| 706 |
-
dcc.Loading(
|
| 707 |
-
id="loading-countries",
|
| 708 |
-
type="circle",
|
| 709 |
-
color="#AC482A",
|
| 710 |
-
children=html.Div(
|
| 711 |
-
id="top_countries-table"
|
| 712 |
-
),
|
| 713 |
-
),
|
| 714 |
-
className="responsive-table-wrapper", # <-- add wrapper for scroll
|
| 715 |
-
),
|
| 716 |
-
html.Button(
|
| 717 |
-
id="top_countries-toggle",
|
| 718 |
-
children="▼ Show Top 50",
|
| 719 |
-
n_clicks=0,
|
| 720 |
-
style={**button_style, "border": "none"},
|
| 721 |
-
),
|
| 722 |
-
],
|
| 723 |
-
),
|
| 724 |
-
dcc.Tab(
|
| 725 |
-
label="Developers",
|
| 726 |
-
value="Developers",
|
| 727 |
-
style={
|
| 728 |
-
"backgroundColor": "transparent",
|
| 729 |
-
"border": "none",
|
| 730 |
-
"padding": "10px 18px",
|
| 731 |
-
"color": "#6B7280",
|
| 732 |
-
"fontWeight": "500",
|
| 733 |
-
},
|
| 734 |
-
selected_style={
|
| 735 |
-
"backgroundColor": "transparent",
|
| 736 |
-
"border": "none",
|
| 737 |
-
"padding": "10px 18px",
|
| 738 |
-
"fontWeight": "700",
|
| 739 |
-
"borderBottom": "3px solid #082030",
|
| 740 |
-
},
|
| 741 |
-
children=[
|
| 742 |
-
html.Div(
|
| 743 |
-
children=[
|
| 744 |
-
"The developer leaderboard highlights the most influential model creators on Hugging Face, showcasing which developers have garnered the highest download counts for their models. The metadata includes the ",
|
| 745 |
-
html.Span(
|
| 746 |
-
"developer", className="meta-var"
|
| 747 |
-
),
|
| 748 |
-
", number of ",
|
| 749 |
-
html.Span(
|
| 750 |
-
"user downloads",
|
| 751 |
-
className="meta-var",
|
| 752 |
-
),
|
| 753 |
-
", and ",
|
| 754 |
-
html.Span(
|
| 755 |
-
"country", className="meta-var"
|
| 756 |
-
),
|
| 757 |
-
".",
|
| 758 |
-
],
|
| 759 |
-
className="tab-description",
|
| 760 |
-
),
|
| 761 |
-
html.Div(
|
| 762 |
-
dcc.Loading(
|
| 763 |
-
id="loading-developers",
|
| 764 |
-
type="circle",
|
| 765 |
-
color="#AC482A",
|
| 766 |
-
children=html.Div(
|
| 767 |
-
id="top_developers-table"
|
| 768 |
-
),
|
| 769 |
-
),
|
| 770 |
-
className="responsive-table-wrapper",
|
| 771 |
-
),
|
| 772 |
-
html.Button(
|
| 773 |
-
id="top_developers-toggle",
|
| 774 |
-
children="▼ Show Top 50",
|
| 775 |
-
n_clicks=0,
|
| 776 |
-
style={**button_style, "border": "none"},
|
| 777 |
-
),
|
| 778 |
-
],
|
| 779 |
-
),
|
| 780 |
-
dcc.Tab(
|
| 781 |
-
label="Models",
|
| 782 |
-
value="Models",
|
| 783 |
-
style={
|
| 784 |
-
"backgroundColor": "transparent",
|
| 785 |
-
"border": "none",
|
| 786 |
-
"padding": "10px 18px",
|
| 787 |
-
"color": "#6B7280",
|
| 788 |
-
"fontWeight": "500",
|
| 789 |
-
},
|
| 790 |
-
selected_style={
|
| 791 |
-
"backgroundColor": "transparent",
|
| 792 |
-
"border": "none",
|
| 793 |
-
"padding": "10px 18px",
|
| 794 |
-
"fontWeight": "700",
|
| 795 |
-
"borderBottom": "3px solid #082030",
|
| 796 |
-
},
|
| 797 |
-
children=[
|
| 798 |
-
html.Div(
|
| 799 |
-
children=[
|
| 800 |
-
"The model leaderboard ranks individual models based on their download counts, revealing which models are most popular among users on Hugging Face. The metadata includes the ",
|
| 801 |
-
html.Span(
|
| 802 |
-
"model name", className="meta-var"
|
| 803 |
-
),
|
| 804 |
-
", number of ",
|
| 805 |
-
html.Span(
|
| 806 |
-
"user downloads",
|
| 807 |
-
className="meta-var",
|
| 808 |
-
),
|
| 809 |
-
", ",
|
| 810 |
-
html.Span(
|
| 811 |
-
"developer", className="meta-var"
|
| 812 |
-
),
|
| 813 |
-
", and ",
|
| 814 |
-
html.Span(
|
| 815 |
-
"modality", className="meta-var"
|
| 816 |
-
),
|
| 817 |
-
" (the input and output types of the model).",
|
| 818 |
-
],
|
| 819 |
-
className="tab-description",
|
| 820 |
-
),
|
| 821 |
-
html.Div(
|
| 822 |
-
dcc.Loading(
|
| 823 |
-
id="loading-models",
|
| 824 |
-
type="circle",
|
| 825 |
-
color="#AC482A",
|
| 826 |
-
children=html.Div(
|
| 827 |
-
id="top_models-table"
|
| 828 |
-
),
|
| 829 |
-
),
|
| 830 |
-
className="responsive-table-wrapper",
|
| 831 |
-
),
|
| 832 |
-
html.Button(
|
| 833 |
-
id="top_models-toggle",
|
| 834 |
-
children="▼ Show Top 50",
|
| 835 |
-
n_clicks=0,
|
| 836 |
-
style={**button_style, "border": "none"},
|
| 837 |
-
),
|
| 838 |
-
],
|
| 839 |
-
),
|
| 840 |
-
],
|
| 841 |
-
),
|
| 842 |
-
],
|
| 843 |
-
style={
|
| 844 |
-
"borderRadius": "18px",
|
| 845 |
-
"padding": "32px",
|
| 846 |
-
"marginTop": "12px",
|
| 847 |
-
"marginBottom": "12px",
|
| 848 |
-
"marginLeft": "50px",
|
| 849 |
-
"marginRight": "50px",
|
| 850 |
-
},
|
| 851 |
-
className="responsive-tabs", # <-- add class
|
| 852 |
-
),
|
| 853 |
-
],
|
| 854 |
-
style={
|
| 855 |
-
"fontFamily": "Inter",
|
| 856 |
-
"backgroundColor": "#ffffff",
|
| 857 |
-
"minHeight": "100vh",
|
| 858 |
-
},
|
| 859 |
-
),
|
| 860 |
],
|
| 861 |
)
|
| 862 |
|
|
|
|
|
|
|
|
|
|
| 863 |
|
| 864 |
-
#
|
| 865 |
-
# -- helper utilities to consolidate duplicated callback logic --
|
| 866 |
-
def _get_filtered_top_n_from_duckdb(
|
| 867 |
-
slider_value, group_col, top_n, view="all_downloads"
|
| 868 |
-
):
|
| 869 |
-
"""
|
| 870 |
-
Query DuckDB to get model-level rows with per-model total_downloads (delta or full)
|
| 871 |
-
Returns a DataFrame with columns including:
|
| 872 |
-
- group_key (the grouping column)
|
| 873 |
-
- org_country_single, author, derived_author, merged_country_groups_single, merged_modality, model
|
| 874 |
-
- total_downloads (per-model downloads in requested window)
|
| 875 |
-
- percent_of_total (percent of total across all returned model deltas)
|
| 876 |
-
"""
|
| 877 |
-
|
| 878 |
-
# Create a fresh connection and load parquet-backed views for each call
|
| 879 |
-
local_con = create_fresh_duckdb_with_views()
|
| 880 |
-
|
| 881 |
-
try:
|
| 882 |
-
# Compute date window (if slider_value provided, use it; otherwise cover full range)
|
| 883 |
-
if slider_value and len(slider_value) == 2:
|
| 884 |
-
start = pd.to_datetime(slider_value[0], unit="s")
|
| 885 |
-
end = pd.to_datetime(slider_value[1], unit="s")
|
| 886 |
-
else:
|
| 887 |
-
start = pd.to_datetime("1970-01-01")
|
| 888 |
-
# keep previous behavior if end_dt exists
|
| 889 |
-
try:
|
| 890 |
-
end_local = end_dt # may be defined from initial load
|
| 891 |
-
except NameError:
|
| 892 |
-
end_local = pd.Timestamp.now()
|
| 893 |
-
end = end_local
|
| 894 |
-
|
| 895 |
-
start_str = str(start)
|
| 896 |
-
end_str = str(end)
|
| 897 |
-
|
| 898 |
-
# Build query using shared function
|
| 899 |
-
query = build_leaderboard_query(group_col, top_n, start_str, end_str, view=view)
|
| 900 |
-
|
| 901 |
-
# execute using the fresh local connection
|
| 902 |
-
result_df = local_con.execute(query).fetchdf()
|
| 903 |
-
return result_df
|
| 904 |
-
finally:
|
| 905 |
-
local_con.close()
|
| 906 |
-
|
| 907 |
-
|
| 908 |
-
def _get_filtered_top_n_alltime_from_duckdb(
|
| 909 |
-
slider_value, group_col, top_n, view="all_downloads"
|
| 910 |
-
):
|
| 911 |
-
"""
|
| 912 |
-
Query DuckDB to get model-level rows with all-time (cumulative) total_downloads at a specific date.
|
| 913 |
-
Returns a DataFrame with columns including:
|
| 914 |
-
- group_key (the grouping column)
|
| 915 |
-
- org_country_single, author, derived_author, merged_country_groups_single, merged_modality, model
|
| 916 |
-
- total_downloads (cumulative downloads up to the selected date)
|
| 917 |
-
- percent_of_total (percent of total across all returned models)
|
| 918 |
-
"""
|
| 919 |
-
|
| 920 |
-
# Create a fresh connection and load parquet-backed views for each call
|
| 921 |
-
local_con = create_fresh_duckdb_with_views()
|
| 922 |
-
|
| 923 |
-
try:
|
| 924 |
-
# Get the single date from slider_value (all-time mode passes a single value)
|
| 925 |
-
if slider_value is not None:
|
| 926 |
-
date = pd.to_datetime(slider_value, unit="s")
|
| 927 |
-
else:
|
| 928 |
-
# Fallback to end_dt if available
|
| 929 |
-
try:
|
| 930 |
-
date = end_dt
|
| 931 |
-
except NameError:
|
| 932 |
-
date = pd.Timestamp.now()
|
| 933 |
-
|
| 934 |
-
date_str = str(date)
|
| 935 |
-
|
| 936 |
-
# Build query using shared function for all-time
|
| 937 |
-
query = build_leaderboard_query(group_col, top_n, date_str=date_str, view=view)
|
| 938 |
-
|
| 939 |
-
# execute using the fresh local connection
|
| 940 |
-
result_df = local_con.execute(query).fetchdf()
|
| 941 |
-
return result_df
|
| 942 |
-
finally:
|
| 943 |
-
local_con.close()
|
| 944 |
-
|
| 945 |
-
|
| 946 |
-
def _leaderboard_callback_logic(
|
| 947 |
-
n_clicks,
|
| 948 |
-
slider_value,
|
| 949 |
-
current_label,
|
| 950 |
-
group_col,
|
| 951 |
-
filename,
|
| 952 |
-
default_label="▼ Show Top 50",
|
| 953 |
-
chip_color="#F0F9FF",
|
| 954 |
-
view="all_downloads",
|
| 955 |
-
derived_author_toggle=True,
|
| 956 |
-
is_alltime=False,
|
| 957 |
-
):
|
| 958 |
-
# Normalize label on first load
|
| 959 |
-
if current_label is None:
|
| 960 |
-
current_label = default_label
|
| 961 |
-
|
| 962 |
-
# Determine top_n and next label
|
| 963 |
-
if n_clicks == 0:
|
| 964 |
-
top_n = 10
|
| 965 |
-
new_label = current_label
|
| 966 |
-
elif "Show Top 50" in current_label:
|
| 967 |
-
top_n, new_label = 50, "▼ Show Top 100"
|
| 968 |
-
elif "Show Top 100" in current_label:
|
| 969 |
-
top_n, new_label = 100, "▲ Show Less"
|
| 970 |
-
else:
|
| 971 |
-
top_n, new_label = 10, "▼ Show Top 50"
|
| 972 |
-
|
| 973 |
-
# Get filtered and aggregated data directly from DuckDB
|
| 974 |
-
# Use all-time query if is_alltime flag is True
|
| 975 |
-
if is_alltime:
|
| 976 |
-
df_filtered = _get_filtered_top_n_alltime_from_duckdb(
|
| 977 |
-
slider_value, group_col, top_n, view=view
|
| 978 |
-
)
|
| 979 |
-
else:
|
| 980 |
-
df_filtered = _get_filtered_top_n_from_duckdb(
|
| 981 |
-
slider_value, group_col, top_n, view=view
|
| 982 |
-
)
|
| 983 |
-
|
| 984 |
-
# If the SQL query returned no rows, ask user to broaden date range
|
| 985 |
-
if df_filtered is None or df_filtered.empty:
|
| 986 |
-
msg = html.Div(
|
| 987 |
-
"No data found in this time range. Try broadening the download date range.",
|
| 988 |
-
style={"padding": "18px", "fontSize": "16px", "color": "#082030"},
|
| 989 |
-
)
|
| 990 |
-
return msg, new_label
|
| 991 |
-
|
| 992 |
-
# Process the already-filtered data - pass derived_author_toggle
|
| 993 |
-
df, download_df = get_top_n_leaderboard(
|
| 994 |
-
df_filtered, group_col, top_n, derived_author_toggle=derived_author_toggle
|
| 995 |
-
)
|
| 996 |
-
|
| 997 |
-
# If processing produced no rows, ask user to broaden date range
|
| 998 |
-
if df is None or (hasattr(df, "empty") and df.empty):
|
| 999 |
-
msg = html.Div(
|
| 1000 |
-
"No data found in this time range. Try broadening the download date range.",
|
| 1001 |
-
style={"padding": "18px", "fontSize": "16px", "color": "#082030"},
|
| 1002 |
-
)
|
| 1003 |
-
return msg, new_label
|
| 1004 |
-
|
| 1005 |
-
return render_table_content(
|
| 1006 |
-
df, download_df, chip_color=chip_color, filename=filename
|
| 1007 |
-
), new_label
|
| 1008 |
-
|
| 1009 |
-
|
| 1010 |
-
# -- end helpers --
|
| 1011 |
-
|
| 1012 |
-
|
| 1013 |
-
# --- Callback to store model attribution type ---
|
| 1014 |
@app.callback(
|
| 1015 |
Output("model-attribution-type", "data"),
|
| 1016 |
Input("model-attribution-segmented", "value"),
|
|
@@ -1018,8 +91,7 @@ def _leaderboard_callback_logic(
|
|
| 1018 |
def update_model_attribution_type(selected_value):
|
| 1019 |
return selected_value
|
| 1020 |
|
| 1021 |
-
|
| 1022 |
-
# --- Callback to toggle between RangeSlider and Slider ---
|
| 1023 |
@app.callback(
|
| 1024 |
Output("slider-container", "children"),
|
| 1025 |
Output("slider-description", "children"),
|
|
@@ -1030,48 +102,28 @@ def update_model_attribution_type(selected_value):
|
|
| 1030 |
def toggle_time_slider(is_alltime, range_value, alltime_value):
|
| 1031 |
if is_alltime:
|
| 1032 |
description = "Select a specific date to view all-time cumulative downloads up to that point."
|
| 1033 |
-
|
| 1034 |
-
slider =
|
| 1035 |
-
|
| 1036 |
-
|
| 1037 |
-
|
| 1038 |
-
|
| 1039 |
-
|
| 1040 |
-
color="#AC482A",
|
| 1041 |
-
size="md",
|
| 1042 |
-
radius="xl",
|
| 1043 |
-
marks=marks,
|
| 1044 |
-
style={"width": "95%", "paddingLeft": "60px"},
|
| 1045 |
-
label=None,
|
| 1046 |
-
showLabelOnHover=False,
|
| 1047 |
-
labelTransitionProps={"transition": "fade", "duration": 150},
|
| 1048 |
-
thumbChildren=get_thumb_label_single(alltime_value if alltime_value is not None else end_ts),
|
| 1049 |
)
|
| 1050 |
return [slider], description
|
| 1051 |
else:
|
| 1052 |
description = "Adjust the slider to filter leaderboard results by the difference in downloads within the time range."
|
| 1053 |
-
# Recreate the range slider with current stored value
|
| 1054 |
slider_val = range_value if range_value is not None else [start_ts, end_ts]
|
| 1055 |
-
slider =
|
| 1056 |
-
|
| 1057 |
-
|
| 1058 |
-
|
| 1059 |
-
|
| 1060 |
-
|
| 1061 |
-
color="#AC482A",
|
| 1062 |
-
size="md",
|
| 1063 |
-
radius="xl",
|
| 1064 |
-
marks=marks,
|
| 1065 |
-
style={"width": "95%", "paddingLeft": "60px"},
|
| 1066 |
-
label=None,
|
| 1067 |
-
showLabelOnHover=False,
|
| 1068 |
-
labelTransitionProps={"transition": "fade", "duration": 150},
|
| 1069 |
-
thumbChildren=get_thumb_labels(slider_val),
|
| 1070 |
)
|
| 1071 |
return [slider], description
|
| 1072 |
|
| 1073 |
-
|
| 1074 |
-
# --- Callbacks to sync slider values to stores ---
|
| 1075 |
@app.callback(
|
| 1076 |
Output("time-slider-value", "data"),
|
| 1077 |
Input("time-slider", "value"),
|
|
@@ -1079,7 +131,7 @@ def toggle_time_slider(is_alltime, range_value, alltime_value):
|
|
| 1079 |
def sync_time_slider_value(value):
|
| 1080 |
return value
|
| 1081 |
|
| 1082 |
-
|
| 1083 |
@app.callback(
|
| 1084 |
Output("time-slider-alltime-value", "data"),
|
| 1085 |
Input("time-slider-alltime", "value"),
|
|
@@ -1087,8 +139,7 @@ def sync_time_slider_value(value):
|
|
| 1087 |
def sync_time_slider_alltime_value(value):
|
| 1088 |
return value
|
| 1089 |
|
| 1090 |
-
|
| 1091 |
-
# Callbacks for interactivity (modularized)
|
| 1092 |
@app.callback(
|
| 1093 |
Output("top_countries-table", "children"),
|
| 1094 |
Output("top_countries-toggle", "children"),
|
|
@@ -1106,7 +157,7 @@ def update_top_countries(
|
|
| 1106 |
# Use the appropriate slider value based on the toggle
|
| 1107 |
active_slider_value = slider_alltime_value if is_alltime else slider_value
|
| 1108 |
|
| 1109 |
-
return
|
| 1110 |
n_clicks,
|
| 1111 |
active_slider_value,
|
| 1112 |
current_label,
|
|
@@ -1119,7 +170,7 @@ def update_top_countries(
|
|
| 1119 |
is_alltime=is_alltime,
|
| 1120 |
)
|
| 1121 |
|
| 1122 |
-
|
| 1123 |
@app.callback(
|
| 1124 |
Output("top_developers-table", "children"),
|
| 1125 |
Output("top_developers-toggle", "children"),
|
|
@@ -1139,20 +190,20 @@ def update_top_developers(
|
|
| 1139 |
# Use the appropriate slider value based on the toggle
|
| 1140 |
active_slider_value = slider_alltime_value if is_alltime else slider_value
|
| 1141 |
|
| 1142 |
-
return
|
| 1143 |
n_clicks,
|
| 1144 |
active_slider_value,
|
| 1145 |
current_label,
|
| 1146 |
group_col=group_col,
|
| 1147 |
filename="top_developers",
|
| 1148 |
default_label="▼ Show Top 50",
|
| 1149 |
-
chip_color="#
|
| 1150 |
view=selected_view,
|
| 1151 |
derived_author_toggle=(attribution_type == "original_creator"),
|
| 1152 |
is_alltime=is_alltime,
|
| 1153 |
)
|
| 1154 |
|
| 1155 |
-
|
| 1156 |
@app.callback(
|
| 1157 |
Output("top_models-table", "children"),
|
| 1158 |
Output("top_models-toggle", "children"),
|
|
@@ -1170,7 +221,7 @@ def update_top_models(
|
|
| 1170 |
# Use the appropriate slider value based on the toggle
|
| 1171 |
active_slider_value = slider_alltime_value if is_alltime else slider_value
|
| 1172 |
|
| 1173 |
-
return
|
| 1174 |
n_clicks,
|
| 1175 |
active_slider_value,
|
| 1176 |
current_label,
|
|
@@ -1183,7 +234,7 @@ def update_top_models(
|
|
| 1183 |
is_alltime=is_alltime,
|
| 1184 |
)
|
| 1185 |
|
| 1186 |
-
|
| 1187 |
@app.callback(
|
| 1188 |
Output("time-slider", "thumbChildren"),
|
| 1189 |
Input("time-slider", "value"),
|
|
@@ -1191,7 +242,7 @@ def update_top_models(
|
|
| 1191 |
def update_thumb_labels(values):
|
| 1192 |
return get_thumb_labels(values)
|
| 1193 |
|
| 1194 |
-
|
| 1195 |
@app.callback(
|
| 1196 |
Output("time-slider-alltime", "thumbChildren"),
|
| 1197 |
Input("time-slider-alltime", "value"),
|
|
@@ -1199,8 +250,7 @@ def update_thumb_labels(values):
|
|
| 1199 |
def update_thumb_label_alltime(value):
|
| 1200 |
return get_thumb_label_single(value)
|
| 1201 |
|
| 1202 |
-
|
| 1203 |
-
# --- Add callback to update selected view based on segmented control ---
|
| 1204 |
@app.callback(
|
| 1205 |
Output("selected-view", "data"),
|
| 1206 |
Input("segmented", "value"),
|
|
@@ -1210,7 +260,6 @@ def update_selected_view(seg_value):
|
|
| 1210 |
return "one_year_rolling"
|
| 1211 |
return "all_downloads"
|
| 1212 |
|
| 1213 |
-
|
| 1214 |
# Run the app
|
| 1215 |
if __name__ == "__main__":
|
| 1216 |
app.run(debug=True)
|
|
|
|
| 1 |
from dash import Dash, html, dcc, Input, Output, State
|
| 2 |
import pandas as pd
|
| 3 |
import dash_mantine_components as dmc
|
|
|
|
| 4 |
import time
|
| 5 |
+
|
| 6 |
+
from config import DATASET_ID
|
| 7 |
+
from data_utils import create_fresh_duckdb_with_views, get_last_updated
|
| 8 |
from graphs.leaderboard import (
|
| 9 |
+
leaderboard_callback_logic,
|
| 10 |
+
)
|
| 11 |
+
from helpers import build_slider_marks, get_thumb_label_single, get_thumb_labels
|
| 12 |
+
from layout_components import (
|
| 13 |
+
build_main_layout,
|
| 14 |
+
build_range_slider,
|
| 15 |
+
build_single_slider,
|
| 16 |
)
|
|
|
|
| 17 |
|
| 18 |
# Initialize the app
|
| 19 |
app = Dash(suppress_callback_exceptions=True) # suppress callback exceptions for multi-page layout
|
| 20 |
server = app.server
|
| 21 |
|
| 22 |
+
# Load dataset and determine time range
|
| 23 |
+
print(f"Attempting to connect to dataset from Hugging Face Hub: {DATASET_ID}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
try:
|
| 25 |
overall_start_time = time.time()
|
| 26 |
|
|
|
|
| 43 |
# Create a dcc slider for time range selection by year (readable marks)
|
| 44 |
start_ts = int(start_dt.timestamp())
|
| 45 |
end_ts = int(end_dt.timestamp())
|
| 46 |
+
marks = build_slider_marks(start_dt, end_dt)
|
| 47 |
+
|
| 48 |
+
time_slider = build_range_slider(
|
| 49 |
+
start_ts,
|
| 50 |
+
end_ts,
|
| 51 |
+
[start_ts, end_ts],
|
| 52 |
+
marks,
|
| 53 |
+
thumb_children=get_thumb_labels([start_ts, end_ts]),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
)
|
| 55 |
|
| 56 |
+
time_slider_alltime = build_single_slider(
|
| 57 |
+
start_ts,
|
| 58 |
+
end_ts,
|
| 59 |
+
end_ts,
|
| 60 |
+
marks,
|
| 61 |
+
thumb_children=get_thumb_label_single(end_ts),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
)
|
| 63 |
|
| 64 |
+
last_updated_label = get_last_updated()
|
| 65 |
+
|
| 66 |
+
# Define the app layout
|
| 67 |
app.layout = dmc.MantineProvider(
|
| 68 |
theme={
|
| 69 |
"colorScheme": "light",
|
|
|
|
| 75 |
dcc.Store(id="model-attribution-type", data="uploader"),
|
| 76 |
dcc.Store(id="time-slider-value", data=[start_ts, end_ts]),
|
| 77 |
dcc.Store(id="time-slider-alltime-value", data=end_ts),
|
| 78 |
+
build_main_layout(last_updated_label, time_slider),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
],
|
| 80 |
)
|
| 81 |
|
| 82 |
+
# ----------
|
| 83 |
+
# Callbacks
|
| 84 |
+
# ----------
|
| 85 |
|
| 86 |
+
# Update model attribution type based on user selection
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
@app.callback(
|
| 88 |
Output("model-attribution-type", "data"),
|
| 89 |
Input("model-attribution-segmented", "value"),
|
|
|
|
| 91 |
def update_model_attribution_type(selected_value):
|
| 92 |
return selected_value
|
| 93 |
|
| 94 |
+
# Toggle between range slider and all-time slider
|
|
|
|
| 95 |
@app.callback(
|
| 96 |
Output("slider-container", "children"),
|
| 97 |
Output("slider-description", "children"),
|
|
|
|
| 102 |
def toggle_time_slider(is_alltime, range_value, alltime_value):
|
| 103 |
if is_alltime:
|
| 104 |
description = "Select a specific date to view all-time cumulative downloads up to that point."
|
| 105 |
+
slider_value = alltime_value if alltime_value is not None else end_ts
|
| 106 |
+
slider = build_single_slider(
|
| 107 |
+
start_ts,
|
| 108 |
+
end_ts,
|
| 109 |
+
slider_value,
|
| 110 |
+
marks,
|
| 111 |
+
thumb_children=get_thumb_label_single(slider_value),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
)
|
| 113 |
return [slider], description
|
| 114 |
else:
|
| 115 |
description = "Adjust the slider to filter leaderboard results by the difference in downloads within the time range."
|
|
|
|
| 116 |
slider_val = range_value if range_value is not None else [start_ts, end_ts]
|
| 117 |
+
slider = build_range_slider(
|
| 118 |
+
start_ts,
|
| 119 |
+
end_ts,
|
| 120 |
+
slider_val,
|
| 121 |
+
marks,
|
| 122 |
+
thumb_children=get_thumb_labels(slider_val),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
)
|
| 124 |
return [slider], description
|
| 125 |
|
| 126 |
+
# Sync slider values to dcc.Store components
|
|
|
|
| 127 |
@app.callback(
|
| 128 |
Output("time-slider-value", "data"),
|
| 129 |
Input("time-slider", "value"),
|
|
|
|
| 131 |
def sync_time_slider_value(value):
|
| 132 |
return value
|
| 133 |
|
| 134 |
+
# Sync all-time slider value to dcc.Store component
|
| 135 |
@app.callback(
|
| 136 |
Output("time-slider-alltime-value", "data"),
|
| 137 |
Input("time-slider-alltime", "value"),
|
|
|
|
| 139 |
def sync_time_slider_alltime_value(value):
|
| 140 |
return value
|
| 141 |
|
| 142 |
+
# Update Top Countries leaderboard
|
|
|
|
| 143 |
@app.callback(
|
| 144 |
Output("top_countries-table", "children"),
|
| 145 |
Output("top_countries-toggle", "children"),
|
|
|
|
| 157 |
# Use the appropriate slider value based on the toggle
|
| 158 |
active_slider_value = slider_alltime_value if is_alltime else slider_value
|
| 159 |
|
| 160 |
+
return leaderboard_callback_logic(
|
| 161 |
n_clicks,
|
| 162 |
active_slider_value,
|
| 163 |
current_label,
|
|
|
|
| 170 |
is_alltime=is_alltime,
|
| 171 |
)
|
| 172 |
|
| 173 |
+
# Update Top Developers leaderboard
|
| 174 |
@app.callback(
|
| 175 |
Output("top_developers-table", "children"),
|
| 176 |
Output("top_developers-toggle", "children"),
|
|
|
|
| 190 |
# Use the appropriate slider value based on the toggle
|
| 191 |
active_slider_value = slider_alltime_value if is_alltime else slider_value
|
| 192 |
|
| 193 |
+
return leaderboard_callback_logic(
|
| 194 |
n_clicks,
|
| 195 |
active_slider_value,
|
| 196 |
current_label,
|
| 197 |
group_col=group_col,
|
| 198 |
filename="top_developers",
|
| 199 |
default_label="▼ Show Top 50",
|
| 200 |
+
chip_color="#F0F9FF",
|
| 201 |
view=selected_view,
|
| 202 |
derived_author_toggle=(attribution_type == "original_creator"),
|
| 203 |
is_alltime=is_alltime,
|
| 204 |
)
|
| 205 |
|
| 206 |
+
# Update Top Models leaderboard
|
| 207 |
@app.callback(
|
| 208 |
Output("top_models-table", "children"),
|
| 209 |
Output("top_models-toggle", "children"),
|
|
|
|
| 221 |
# Use the appropriate slider value based on the toggle
|
| 222 |
active_slider_value = slider_alltime_value if is_alltime else slider_value
|
| 223 |
|
| 224 |
+
return leaderboard_callback_logic(
|
| 225 |
n_clicks,
|
| 226 |
active_slider_value,
|
| 227 |
current_label,
|
|
|
|
| 234 |
is_alltime=is_alltime,
|
| 235 |
)
|
| 236 |
|
| 237 |
+
# Update thumb labels for range slider
|
| 238 |
@app.callback(
|
| 239 |
Output("time-slider", "thumbChildren"),
|
| 240 |
Input("time-slider", "value"),
|
|
|
|
| 242 |
def update_thumb_labels(values):
|
| 243 |
return get_thumb_labels(values)
|
| 244 |
|
| 245 |
+
# Update thumb label for all-time slider
|
| 246 |
@app.callback(
|
| 247 |
Output("time-slider-alltime", "thumbChildren"),
|
| 248 |
Input("time-slider-alltime", "value"),
|
|
|
|
| 250 |
def update_thumb_label_alltime(value):
|
| 251 |
return get_thumb_label_single(value)
|
| 252 |
|
| 253 |
+
# Update selected view based on segmented control
|
|
|
|
| 254 |
@app.callback(
|
| 255 |
Output("selected-view", "data"),
|
| 256 |
Input("segmented", "value"),
|
|
|
|
| 260 |
return "one_year_rolling"
|
| 261 |
return "all_downloads"
|
| 262 |
|
|
|
|
| 263 |
# Run the app
|
| 264 |
if __name__ == "__main__":
|
| 265 |
app.run(debug=True)
|
config.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
PRIMARY_COLOR = "#AC482A"
|
| 2 |
+
DARK_BACKGROUND = "#082030"
|
| 3 |
+
DATASET_ID = "mmpr/open_model_evolution_data"
|
| 4 |
+
HF_PARQUET_URL_1 = "https://huggingface.co/datasets/mmpr/open_model_evolution_data/resolve/main/all_downloads_with_annotations.parquet"
|
| 5 |
+
HF_PARQUET_URL_2 = "https://huggingface.co/datasets/mmpr/open_model_evolution_data/resolve/main/one_year_rolling.parquet"
|
| 6 |
+
|
| 7 |
+
BUTTON_STYLE = {
|
| 8 |
+
"display": "inline-block",
|
| 9 |
+
"marginBottom": "10px",
|
| 10 |
+
"marginRight": "15px",
|
| 11 |
+
"marginTop": "30px",
|
| 12 |
+
"padding": "6px 16px",
|
| 13 |
+
"backgroundColor": DARK_BACKGROUND,
|
| 14 |
+
"color": "white",
|
| 15 |
+
"borderRadius": "6px",
|
| 16 |
+
"textDecoration": "none",
|
| 17 |
+
"fontWeight": "bold",
|
| 18 |
+
"fontSize": "14px",
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
COMPANY_ICON_MAP = {
|
| 22 |
+
"google": "../assets/icons/google.png",
|
| 23 |
+
"distilbert": "../assets/images/hf.svg",
|
| 24 |
+
"sentence-transformers": "../assets/images/hf.svg",
|
| 25 |
+
"facebook": "../assets/icons/meta.png",
|
| 26 |
+
"openai": "../assets/icons/openai.png",
|
| 27 |
+
"amazon": "../assets/icons/amazon.png",
|
| 28 |
+
"microsoft": "../assets/icons/microsoft.png",
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
COUNTRY_EMOJI_FALLBACK = {
|
| 32 |
+
"User": "👤",
|
| 33 |
+
"Organization": "🏢",
|
| 34 |
+
"Model": "📦",
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
META_COLS_MAP = {
|
| 38 |
+
"org_country_single": ["org_country_single", "total_downloads"],
|
| 39 |
+
"author": [
|
| 40 |
+
"org_country_single",
|
| 41 |
+
"author",
|
| 42 |
+
"total_downloads",
|
| 43 |
+
],
|
| 44 |
+
"derived_author": [
|
| 45 |
+
"org_country_single",
|
| 46 |
+
"derived_author",
|
| 47 |
+
"total_downloads",
|
| 48 |
+
],
|
| 49 |
+
"model": [
|
| 50 |
+
"org_country_single",
|
| 51 |
+
"author",
|
| 52 |
+
"derived_author",
|
| 53 |
+
"merged_modality",
|
| 54 |
+
"total_downloads",
|
| 55 |
+
],
|
| 56 |
+
}
|
data_utils.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import duckdb
|
| 2 |
+
import pandas as pd
|
| 3 |
+
|
| 4 |
+
from config import DATASET_ID, HF_PARQUET_URL_1, HF_PARQUET_URL_2
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def create_fresh_duckdb_with_views(parquet_url_1: str = HF_PARQUET_URL_1, parquet_url_2: str = HF_PARQUET_URL_2):
|
| 8 |
+
"""Return a fresh DuckDB connection with parquet-backed views configured."""
|
| 9 |
+
local_con = duckdb.connect(database=":memory:", read_only=False)
|
| 10 |
+
try:
|
| 11 |
+
try:
|
| 12 |
+
local_con.execute("INSTALL httpfs;")
|
| 13 |
+
local_con.execute("LOAD httpfs;")
|
| 14 |
+
except Exception:
|
| 15 |
+
pass
|
| 16 |
+
|
| 17 |
+
try:
|
| 18 |
+
local_con.execute("SET enable_http_metadata_cache = false;")
|
| 19 |
+
local_con.execute("SET enable_object_cache = false;")
|
| 20 |
+
except Exception:
|
| 21 |
+
pass
|
| 22 |
+
|
| 23 |
+
local_con.execute(
|
| 24 |
+
f"""
|
| 25 |
+
CREATE OR REPLACE VIEW all_downloads AS
|
| 26 |
+
SELECT * FROM read_parquet('{parquet_url_1}')
|
| 27 |
+
"""
|
| 28 |
+
)
|
| 29 |
+
local_con.execute(
|
| 30 |
+
f"""
|
| 31 |
+
CREATE OR REPLACE VIEW one_year_rolling AS
|
| 32 |
+
SELECT * FROM read_parquet('{parquet_url_2}')
|
| 33 |
+
"""
|
| 34 |
+
)
|
| 35 |
+
except Exception:
|
| 36 |
+
pass
|
| 37 |
+
return local_con
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def get_last_updated():
|
| 41 |
+
"""Return the latest timestamp available in the all_downloads view."""
|
| 42 |
+
try:
|
| 43 |
+
conn = create_fresh_duckdb_with_views()
|
| 44 |
+
try:
|
| 45 |
+
result = conn.execute("SELECT MAX(time) as max_time FROM all_downloads").fetchdf()
|
| 46 |
+
finally:
|
| 47 |
+
conn.close()
|
| 48 |
+
|
| 49 |
+
max_time = result["max_time"].iloc[0]
|
| 50 |
+
if pd.isnull(max_time):
|
| 51 |
+
return "N/A"
|
| 52 |
+
dt = pd.to_datetime(max_time)
|
| 53 |
+
return dt.strftime("%b %d, %Y")
|
| 54 |
+
except Exception:
|
| 55 |
+
return "N/A"
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def build_leaderboard_query(
|
| 59 |
+
group_col,
|
| 60 |
+
top_n,
|
| 61 |
+
start_str=None,
|
| 62 |
+
end_str=None,
|
| 63 |
+
date_str=None,
|
| 64 |
+
view="all_downloads",
|
| 65 |
+
):
|
| 66 |
+
"""Build the SQL query string for leaderboard data."""
|
| 67 |
+
is_alltime = date_str is not None
|
| 68 |
+
|
| 69 |
+
if group_col == "org_country_single":
|
| 70 |
+
group_expr = """CASE
|
| 71 |
+
WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
|
| 72 |
+
WHEN org_country_single IN ('International', 'Online', 'Online?') THEN 'International/Online'
|
| 73 |
+
ELSE org_country_single
|
| 74 |
+
END"""
|
| 75 |
+
else:
|
| 76 |
+
group_expr = group_col
|
| 77 |
+
|
| 78 |
+
if is_alltime:
|
| 79 |
+
base_where = f"WHERE time <= '{date_str}'"
|
| 80 |
+
downloads_calc = f"COALESCE(MAX(CASE WHEN time <= '{date_str}' THEN downloadsAllTime END), 0) AS total_downloads"
|
| 81 |
+
else:
|
| 82 |
+
base_where = ""
|
| 83 |
+
downloads_calc = f"""COALESCE(MAX(CASE WHEN time <= '{end_str}' THEN downloadsAllTime END), 0)
|
| 84 |
+
- COALESCE(MAX(CASE WHEN time < '{start_str}' THEN downloadsAllTime END), 0)
|
| 85 |
+
AS total_downloads"""
|
| 86 |
+
|
| 87 |
+
if group_col == "derived_author":
|
| 88 |
+
return f"""
|
| 89 |
+
WITH base_data AS (
|
| 90 |
+
SELECT
|
| 91 |
+
{group_expr} AS group_key,
|
| 92 |
+
CASE
|
| 93 |
+
WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
|
| 94 |
+
WHEN org_country_single IN ('International', 'Online', 'Online?') THEN 'International/Online'
|
| 95 |
+
ELSE org_country_single
|
| 96 |
+
END AS org_country_single,
|
| 97 |
+
author,
|
| 98 |
+
derived_author,
|
| 99 |
+
merged_country_groups_single,
|
| 100 |
+
merged_modality,
|
| 101 |
+
model,
|
| 102 |
+
time,
|
| 103 |
+
downloadsAllTime
|
| 104 |
+
FROM {view}
|
| 105 |
+
{base_where}
|
| 106 |
+
),
|
| 107 |
+
|
| 108 |
+
author_country_lookup AS (
|
| 109 |
+
SELECT DISTINCT
|
| 110 |
+
derived_author,
|
| 111 |
+
FIRST_VALUE(org_country_single) OVER (PARTITION BY derived_author ORDER BY downloadsAllTime DESC) AS derived_author_country
|
| 112 |
+
FROM base_data
|
| 113 |
+
WHERE derived_author IS NOT NULL
|
| 114 |
+
),
|
| 115 |
+
|
| 116 |
+
author_merged_country_lookup AS (
|
| 117 |
+
SELECT DISTINCT
|
| 118 |
+
derived_author,
|
| 119 |
+
FIRST_VALUE(merged_country_groups_single) OVER (PARTITION BY derived_author ORDER BY downloadsAllTime DESC) AS derived_author_merged_country
|
| 120 |
+
FROM base_data
|
| 121 |
+
WHERE derived_author IS NOT NULL
|
| 122 |
+
),
|
| 123 |
+
|
| 124 |
+
model_metrics AS (
|
| 125 |
+
SELECT
|
| 126 |
+
model,
|
| 127 |
+
group_key,
|
| 128 |
+
ANY_VALUE(org_country_single) AS org_country_single,
|
| 129 |
+
ANY_VALUE(author) AS author,
|
| 130 |
+
ANY_VALUE(derived_author) AS derived_author,
|
| 131 |
+
ANY_VALUE(merged_country_groups_single) AS merged_country_groups_single,
|
| 132 |
+
ANY_VALUE(merged_modality) AS merged_modality,
|
| 133 |
+
{downloads_calc}
|
| 134 |
+
FROM base_data
|
| 135 |
+
GROUP BY model, group_key
|
| 136 |
+
),
|
| 137 |
+
|
| 138 |
+
total_downloads_cte AS (
|
| 139 |
+
SELECT SUM(total_downloads) AS total_downloads_all FROM model_metrics
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
SELECT
|
| 143 |
+
mm.model,
|
| 144 |
+
mm.group_key,
|
| 145 |
+
acl.derived_author_country AS org_country_single,
|
| 146 |
+
amc.derived_author_merged_country AS merged_country_groups_single,
|
| 147 |
+
mm.author,
|
| 148 |
+
mm.derived_author,
|
| 149 |
+
mm.merged_modality,
|
| 150 |
+
mm.total_downloads,
|
| 151 |
+
CASE WHEN td.total_downloads_all = 0 THEN 0 ELSE ROUND(mm.total_downloads * 100.0 / td.total_downloads_all, 2) END AS percent_of_total
|
| 152 |
+
FROM model_metrics mm
|
| 153 |
+
LEFT JOIN author_country_lookup acl ON mm.group_key = acl.derived_author
|
| 154 |
+
LEFT JOIN author_merged_country_lookup amc ON mm.group_key = amc.derived_author
|
| 155 |
+
CROSS JOIN total_downloads_cte td
|
| 156 |
+
WHERE mm.total_downloads > 0
|
| 157 |
+
ORDER BY mm.total_downloads DESC
|
| 158 |
+
LIMIT {top_n * 10};
|
| 159 |
+
"""
|
| 160 |
+
|
| 161 |
+
return f"""
|
| 162 |
+
WITH base_data AS (
|
| 163 |
+
SELECT
|
| 164 |
+
{group_expr} AS group_key,
|
| 165 |
+
CASE
|
| 166 |
+
WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
|
| 167 |
+
WHEN org_country_single IN ('International', 'Online', 'Online?') THEN 'International/Online'
|
| 168 |
+
ELSE org_country_single
|
| 169 |
+
END AS org_country_single,
|
| 170 |
+
author,
|
| 171 |
+
derived_author,
|
| 172 |
+
merged_country_groups_single,
|
| 173 |
+
merged_modality,
|
| 174 |
+
model,
|
| 175 |
+
time,
|
| 176 |
+
downloadsAllTime
|
| 177 |
+
FROM {view}
|
| 178 |
+
{base_where}
|
| 179 |
+
),
|
| 180 |
+
|
| 181 |
+
model_metrics AS (
|
| 182 |
+
SELECT
|
| 183 |
+
model,
|
| 184 |
+
group_key,
|
| 185 |
+
ANY_VALUE(org_country_single) AS org_country_single,
|
| 186 |
+
ANY_VALUE(author) AS author,
|
| 187 |
+
ANY_VALUE(derived_author) AS derived_author,
|
| 188 |
+
ANY_VALUE(merged_country_groups_single) AS merged_country_groups_single,
|
| 189 |
+
ANY_VALUE(merged_modality) AS merged_modality,
|
| 190 |
+
{downloads_calc}
|
| 191 |
+
FROM base_data
|
| 192 |
+
GROUP BY model, group_key
|
| 193 |
+
),
|
| 194 |
+
|
| 195 |
+
total_downloads_cte AS (
|
| 196 |
+
SELECT SUM(total_downloads) AS total_downloads_all FROM model_metrics
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
SELECT
|
| 200 |
+
mm.model,
|
| 201 |
+
mm.group_key,
|
| 202 |
+
mm.org_country_single,
|
| 203 |
+
mm.author,
|
| 204 |
+
mm.derived_author,
|
| 205 |
+
mm.merged_country_groups_single,
|
| 206 |
+
mm.merged_modality,
|
| 207 |
+
mm.total_downloads,
|
| 208 |
+
CASE WHEN td.total_downloads_all = 0 THEN 0 ELSE ROUND(mm.total_downloads * 100.0 / td.total_downloads_all, 2) END AS percent_of_total
|
| 209 |
+
FROM model_metrics mm
|
| 210 |
+
CROSS JOIN total_downloads_cte td
|
| 211 |
+
WHERE mm.total_downloads > 0
|
| 212 |
+
ORDER BY mm.total_downloads DESC
|
| 213 |
+
LIMIT {top_n * 10};
|
| 214 |
+
"""
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
def get_top_n_from_duckdb(con, group_col, top_n=10, time_filter=None, view="all_downloads"):
|
| 218 |
+
"""Query DuckDB directly to get model-level rows with per-model total_downloads."""
|
| 219 |
+
if time_filter and len(time_filter) == 2:
|
| 220 |
+
start = pd.to_datetime(time_filter[0], unit="s")
|
| 221 |
+
end = pd.to_datetime(time_filter[1], unit="s")
|
| 222 |
+
else:
|
| 223 |
+
start = pd.to_datetime("1970-01-01")
|
| 224 |
+
end = pd.Timestamp.now()
|
| 225 |
+
|
| 226 |
+
start_str = str(start)
|
| 227 |
+
end_str = str(end)
|
| 228 |
+
query = build_leaderboard_query(group_col, top_n, start_str, end_str, view=view)
|
| 229 |
+
|
| 230 |
+
conn_local = create_fresh_duckdb_with_views()
|
| 231 |
+
try:
|
| 232 |
+
return conn_local.execute(query).fetchdf()
|
| 233 |
+
except Exception as exc:
|
| 234 |
+
print(f"Error querying DuckDB: {exc}")
|
| 235 |
+
return pd.DataFrame()
|
| 236 |
+
finally:
|
| 237 |
+
conn_local.close()
|
graphs/leaderboard.py
CHANGED
|
@@ -1,63 +1,162 @@
|
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
from dash import html
|
| 3 |
from dash_iconify import DashIconify
|
| 4 |
import dash_mantine_components as dmc
|
| 5 |
-
import base64
|
| 6 |
import countryflag
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
"
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
"
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
# Chip renderer
|
| 62 |
def chip(text, bg_color="#F0F0F0"):
|
| 63 |
return html.Span(
|
|
@@ -368,7 +467,7 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10, derived_author_toggl
|
|
| 368 |
download_top["% of total"] = download_top["% of total"].round(2)
|
| 369 |
|
| 370 |
# All relevant metadata columns for the grouping
|
| 371 |
-
meta_cols =
|
| 372 |
|
| 373 |
# Collect metadata per group by inspecting the underlying model-level rows
|
| 374 |
meta_map = {}
|
|
@@ -398,15 +497,15 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10, derived_author_toggl
|
|
| 398 |
try:
|
| 399 |
flag_emoji = countryflag.getflag(c)
|
| 400 |
if not flag_emoji or flag_emoji == c:
|
| 401 |
-
flag_emoji =
|
| 402 |
except Exception:
|
| 403 |
-
flag_emoji =
|
| 404 |
chips.append((flag_emoji, c, "country"))
|
| 405 |
|
| 406 |
# Author - use derived_author_toggle to determine which column
|
| 407 |
author_key = "derived_author" if derived_author_toggle else "author"
|
| 408 |
for a in meta.get(author_key, []):
|
| 409 |
-
icon =
|
| 410 |
if icon == "":
|
| 411 |
if meta.get("merged_country_groups_single", ["User"])[0] != "User":
|
| 412 |
icon = "🏢"
|
|
@@ -459,230 +558,6 @@ def get_top_n_leaderboard(filtered_df, group_col, top_n=10, derived_author_toggl
|
|
| 459 |
return display_for_render, download_top
|
| 460 |
|
| 461 |
|
| 462 |
-
# Add dataset URLs used to create views when running queries from this module
|
| 463 |
-
hf_parquet_url_1 = "https://huggingface.co/datasets/emsesc/open_model_evolution_data/resolve/main/all_downloads_with_annotations.parquet"
|
| 464 |
-
hf_parquet_url_2 = "https://huggingface.co/datasets/emsesc/open_model_evolution_data/resolve/main/one_year_rolling.parquet"
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
def create_fresh_duckdb_with_views():
|
| 468 |
-
"""
|
| 469 |
-
Returns a fresh in-memory DuckDB connection with httpfs enabled and the
|
| 470 |
-
all_downloads / one_year_rolling views created from the remote parquet URLs.
|
| 471 |
-
Caller must close the returned connection.
|
| 472 |
-
"""
|
| 473 |
-
local_con = duckdb.connect(database=":memory:", read_only=False)
|
| 474 |
-
try:
|
| 475 |
-
try:
|
| 476 |
-
local_con.execute("INSTALL httpfs;")
|
| 477 |
-
local_con.execute("LOAD httpfs;")
|
| 478 |
-
except Exception:
|
| 479 |
-
pass
|
| 480 |
-
try:
|
| 481 |
-
local_con.execute("SET enable_http_metadata_cache = false;")
|
| 482 |
-
local_con.execute("SET enable_object_cache = false;")
|
| 483 |
-
except Exception:
|
| 484 |
-
pass
|
| 485 |
-
|
| 486 |
-
local_con.execute(f"""
|
| 487 |
-
CREATE OR REPLACE VIEW all_downloads AS
|
| 488 |
-
SELECT * FROM read_parquet('{hf_parquet_url_1}')
|
| 489 |
-
""")
|
| 490 |
-
local_con.execute(f"""
|
| 491 |
-
CREATE OR REPLACE VIEW one_year_rolling AS
|
| 492 |
-
SELECT * FROM read_parquet('{hf_parquet_url_2}')
|
| 493 |
-
""")
|
| 494 |
-
except Exception:
|
| 495 |
-
pass
|
| 496 |
-
return local_con
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
def build_leaderboard_query(
|
| 500 |
-
group_col,
|
| 501 |
-
top_n,
|
| 502 |
-
start_str=None,
|
| 503 |
-
end_str=None,
|
| 504 |
-
date_str=None,
|
| 505 |
-
view="all_downloads",
|
| 506 |
-
):
|
| 507 |
-
"""
|
| 508 |
-
Build and return the SQL query string for leaderboard data.
|
| 509 |
-
|
| 510 |
-
Supports two modes:
|
| 511 |
-
1. Time-range mode: Pass start_str and end_str to get downloads delta
|
| 512 |
-
2. All-time mode: Pass date_str to get cumulative downloads as of that date
|
| 513 |
-
|
| 514 |
-
Args:
|
| 515 |
-
group_col: Column to group by (e.g., 'author', 'derived_author', 'org_country_single')
|
| 516 |
-
top_n: Number of top entries to return
|
| 517 |
-
start_str: Start date string (for time-range mode)
|
| 518 |
-
end_str: End date string (for time-range mode)
|
| 519 |
-
date_str: Specific date string (for all-time mode)
|
| 520 |
-
view: DuckDB view name ('all_downloads' or 'one_year_rolling')
|
| 521 |
-
|
| 522 |
-
Returns:
|
| 523 |
-
SQL query string
|
| 524 |
-
"""
|
| 525 |
-
# Determine mode
|
| 526 |
-
is_alltime = date_str is not None
|
| 527 |
-
|
| 528 |
-
# handle country grouping normalization
|
| 529 |
-
if group_col == "org_country_single":
|
| 530 |
-
group_expr = """CASE
|
| 531 |
-
WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
|
| 532 |
-
WHEN org_country_single IN ('International', 'Online', 'Online?') THEN 'International/Online'
|
| 533 |
-
ELSE org_country_single
|
| 534 |
-
END"""
|
| 535 |
-
else:
|
| 536 |
-
group_expr = group_col
|
| 537 |
-
|
| 538 |
-
# Determine WHERE clause and total_downloads calculation
|
| 539 |
-
if is_alltime:
|
| 540 |
-
base_where = f"WHERE time <= '{date_str}'"
|
| 541 |
-
downloads_calc = f"COALESCE(MAX(CASE WHEN time <= '{date_str}' THEN downloadsAllTime END), 0) AS total_downloads"
|
| 542 |
-
else:
|
| 543 |
-
base_where = ""
|
| 544 |
-
downloads_calc = f"""COALESCE(MAX(CASE WHEN time <= '{end_str}' THEN downloadsAllTime END), 0)
|
| 545 |
-
- COALESCE(MAX(CASE WHEN time < '{start_str}' THEN downloadsAllTime END), 0)
|
| 546 |
-
AS total_downloads"""
|
| 547 |
-
|
| 548 |
-
# Derived-author special-case (uses author-derived lookups)
|
| 549 |
-
if group_col == "derived_author":
|
| 550 |
-
return f"""
|
| 551 |
-
WITH base_data AS (
|
| 552 |
-
SELECT
|
| 553 |
-
{group_expr} AS group_key,
|
| 554 |
-
CASE
|
| 555 |
-
WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
|
| 556 |
-
WHEN org_country_single IN ('International', 'Online', 'Online?') THEN 'International/Online'
|
| 557 |
-
ELSE org_country_single
|
| 558 |
-
END AS org_country_single,
|
| 559 |
-
author,
|
| 560 |
-
derived_author,
|
| 561 |
-
merged_country_groups_single,
|
| 562 |
-
merged_modality,
|
| 563 |
-
model,
|
| 564 |
-
time,
|
| 565 |
-
downloadsAllTime
|
| 566 |
-
FROM {view}
|
| 567 |
-
{base_where}
|
| 568 |
-
),
|
| 569 |
-
|
| 570 |
-
author_country_lookup AS (
|
| 571 |
-
SELECT DISTINCT
|
| 572 |
-
derived_author,
|
| 573 |
-
FIRST_VALUE(org_country_single) OVER (PARTITION BY derived_author ORDER BY downloadsAllTime DESC) AS derived_author_country
|
| 574 |
-
FROM base_data
|
| 575 |
-
WHERE derived_author IS NOT NULL
|
| 576 |
-
),
|
| 577 |
-
|
| 578 |
-
author_merged_country_lookup AS (
|
| 579 |
-
SELECT DISTINCT
|
| 580 |
-
derived_author,
|
| 581 |
-
FIRST_VALUE(merged_country_groups_single) OVER (PARTITION BY derived_author ORDER BY downloadsAllTime DESC) AS derived_author_merged_country
|
| 582 |
-
FROM base_data
|
| 583 |
-
WHERE derived_author IS NOT NULL
|
| 584 |
-
),
|
| 585 |
-
|
| 586 |
-
model_metrics AS (
|
| 587 |
-
SELECT
|
| 588 |
-
model,
|
| 589 |
-
group_key,
|
| 590 |
-
ANY_VALUE(org_country_single) AS org_country_single,
|
| 591 |
-
ANY_VALUE(author) AS author,
|
| 592 |
-
ANY_VALUE(derived_author) AS derived_author,
|
| 593 |
-
ANY_VALUE(merged_country_groups_single) AS merged_country_groups_single,
|
| 594 |
-
ANY_VALUE(merged_modality) AS merged_modality,
|
| 595 |
-
{downloads_calc}
|
| 596 |
-
FROM base_data
|
| 597 |
-
GROUP BY model, group_key
|
| 598 |
-
),
|
| 599 |
-
|
| 600 |
-
total_downloads_cte AS (
|
| 601 |
-
SELECT SUM(total_downloads) AS total_downloads_all FROM model_metrics
|
| 602 |
-
)
|
| 603 |
-
|
| 604 |
-
SELECT
|
| 605 |
-
mm.model,
|
| 606 |
-
mm.group_key,
|
| 607 |
-
acl.derived_author_country AS org_country_single,
|
| 608 |
-
amc.derived_author_merged_country AS merged_country_groups_single,
|
| 609 |
-
mm.author,
|
| 610 |
-
mm.derived_author,
|
| 611 |
-
mm.merged_modality,
|
| 612 |
-
mm.total_downloads,
|
| 613 |
-
CASE WHEN td.total_downloads_all = 0 THEN 0 ELSE ROUND(mm.total_downloads * 100.0 / td.total_downloads_all, 2) END AS percent_of_total
|
| 614 |
-
FROM model_metrics mm
|
| 615 |
-
LEFT JOIN author_country_lookup acl ON mm.group_key = acl.derived_author
|
| 616 |
-
LEFT JOIN author_merged_country_lookup amc ON mm.group_key = amc.derived_author
|
| 617 |
-
CROSS JOIN total_downloads_cte td
|
| 618 |
-
WHERE mm.total_downloads > 0
|
| 619 |
-
ORDER BY mm.total_downloads DESC
|
| 620 |
-
LIMIT {top_n * 10};
|
| 621 |
-
"""
|
| 622 |
-
|
| 623 |
-
# Generic grouping SQL
|
| 624 |
-
return f"""
|
| 625 |
-
WITH base_data AS (
|
| 626 |
-
SELECT
|
| 627 |
-
{group_expr} AS group_key,
|
| 628 |
-
CASE
|
| 629 |
-
WHEN org_country_single IN ('HF', 'United States of America') THEN 'United States of America'
|
| 630 |
-
WHEN org_country_single IN ('International', 'Online', 'Online?') THEN 'International/Online'
|
| 631 |
-
ELSE org_country_single
|
| 632 |
-
END AS org_country_single,
|
| 633 |
-
author,
|
| 634 |
-
derived_author,
|
| 635 |
-
merged_country_groups_single,
|
| 636 |
-
merged_modality,
|
| 637 |
-
model,
|
| 638 |
-
time,
|
| 639 |
-
downloadsAllTime
|
| 640 |
-
FROM {view}
|
| 641 |
-
{base_where}
|
| 642 |
-
),
|
| 643 |
-
|
| 644 |
-
model_metrics AS (
|
| 645 |
-
SELECT
|
| 646 |
-
model,
|
| 647 |
-
group_key,
|
| 648 |
-
ANY_VALUE(org_country_single) AS org_country_single,
|
| 649 |
-
ANY_VALUE(author) AS author,
|
| 650 |
-
ANY_VALUE(derived_author) AS derived_author,
|
| 651 |
-
ANY_VALUE(merged_country_groups_single) AS merged_country_groups_single,
|
| 652 |
-
ANY_VALUE(merged_modality) AS merged_modality,
|
| 653 |
-
{downloads_calc}
|
| 654 |
-
FROM base_data
|
| 655 |
-
GROUP BY model, group_key
|
| 656 |
-
),
|
| 657 |
-
|
| 658 |
-
total_downloads_cte AS (
|
| 659 |
-
SELECT SUM(total_downloads) AS total_downloads_all FROM model_metrics
|
| 660 |
-
)
|
| 661 |
-
|
| 662 |
-
SELECT
|
| 663 |
-
mm.model,
|
| 664 |
-
mm.group_key,
|
| 665 |
-
mm.org_country_single,
|
| 666 |
-
mm.author,
|
| 667 |
-
mm.derived_author,
|
| 668 |
-
mm.merged_country_groups_single,
|
| 669 |
-
mm.merged_modality,
|
| 670 |
-
mm.total_downloads,
|
| 671 |
-
CASE WHEN td.total_downloads_all = 0 THEN 0 ELSE ROUND(mm.total_downloads * 100.0 / td.total_downloads_all, 2) END AS percent_of_total
|
| 672 |
-
FROM model_metrics mm
|
| 673 |
-
CROSS JOIN total_downloads_cte td
|
| 674 |
-
WHERE mm.total_downloads > 0
|
| 675 |
-
ORDER BY mm.total_downloads DESC
|
| 676 |
-
LIMIT {top_n * 10};
|
| 677 |
-
"""
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
def build_leaderboard_query_alltime(group_col, top_n, date_str, view="all_downloads"):
|
| 681 |
-
"""
|
| 682 |
-
Deprecated: Use build_leaderboard_query with date_str parameter instead.
|
| 683 |
-
Build and return the SQL query string for all-time downloads at a specific date.
|
| 684 |
-
"""
|
| 685 |
-
return build_leaderboard_query(group_col, top_n, date_str=date_str, view=view)
|
| 686 |
|
| 687 |
|
| 688 |
def get_top_n_from_duckdb(
|
|
@@ -718,15 +593,3 @@ def get_top_n_from_duckdb(
|
|
| 718 |
return pd.DataFrame()
|
| 719 |
finally:
|
| 720 |
conn_local.close()
|
| 721 |
-
|
| 722 |
-
|
| 723 |
-
def format_large_number(n):
|
| 724 |
-
"""Shorten large numbers, e.g. 5,000,000 -> '5 million'."""
|
| 725 |
-
if n >= 1_000_000_000:
|
| 726 |
-
return f"{n / 1_000_000_000:.1f} billion"
|
| 727 |
-
elif n >= 1_000_000:
|
| 728 |
-
return f"{n / 1_000_000:.1f} million"
|
| 729 |
-
elif n >= 1_000:
|
| 730 |
-
return f"{n / 1_000:.1f}k"
|
| 731 |
-
else:
|
| 732 |
-
return str(int(n))
|
|
|
|
| 1 |
+
import base64
|
| 2 |
import pandas as pd
|
| 3 |
from dash import html
|
| 4 |
from dash_iconify import DashIconify
|
| 5 |
import dash_mantine_components as dmc
|
|
|
|
| 6 |
import countryflag
|
| 7 |
+
|
| 8 |
+
from config import COMPANY_ICON_MAP, COUNTRY_EMOJI_FALLBACK, META_COLS_MAP
|
| 9 |
+
from data_utils import build_leaderboard_query, create_fresh_duckdb_with_views
|
| 10 |
+
from helpers import format_large_number
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# =============================
|
| 14 |
+
# Leaderboard Data Fetching
|
| 15 |
+
# =============================
|
| 16 |
+
|
| 17 |
+
def get_filtered_top_n_from_duckdb(
|
| 18 |
+
slider_value, group_col, top_n, view="all_downloads"
|
| 19 |
+
):
|
| 20 |
+
"""
|
| 21 |
+
Query DuckDB to get model-level rows with per-model total_downloads (delta or full)
|
| 22 |
+
Returns a DataFrame with columns including:
|
| 23 |
+
- group_key (the grouping column)
|
| 24 |
+
- org_country_single, author, derived_author, merged_country_groups_single, merged_modality, model
|
| 25 |
+
- total_downloads (per-model downloads in requested window)
|
| 26 |
+
- percent_of_total (percent of total across all returned model deltas)
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
# Create a fresh connection and load parquet-backed views for each call
|
| 30 |
+
local_con = create_fresh_duckdb_with_views()
|
| 31 |
+
|
| 32 |
+
try:
|
| 33 |
+
# Compute date window (if slider_value provided, use it; otherwise cover full range)
|
| 34 |
+
if slider_value and len(slider_value) == 2:
|
| 35 |
+
start = pd.to_datetime(slider_value[0], unit="s")
|
| 36 |
+
end = pd.to_datetime(slider_value[1], unit="s")
|
| 37 |
+
else:
|
| 38 |
+
start = pd.to_datetime("1970-01-01")
|
| 39 |
+
end = pd.Timestamp.now()
|
| 40 |
+
|
| 41 |
+
start_str = str(start)
|
| 42 |
+
end_str = str(end)
|
| 43 |
+
|
| 44 |
+
# Build query using shared function
|
| 45 |
+
query = build_leaderboard_query(group_col, top_n, start_str, end_str, view=view)
|
| 46 |
+
|
| 47 |
+
# execute using the fresh local connection
|
| 48 |
+
result_df = local_con.execute(query).fetchdf()
|
| 49 |
+
return result_df
|
| 50 |
+
finally:
|
| 51 |
+
local_con.close()
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def get_filtered_top_n_alltime_from_duckdb(
|
| 55 |
+
slider_value, group_col, top_n, view="all_downloads"
|
| 56 |
+
):
|
| 57 |
+
"""
|
| 58 |
+
Query DuckDB to get model-level rows with all-time (cumulative) total_downloads at a specific date.
|
| 59 |
+
Returns a DataFrame with columns including:
|
| 60 |
+
- group_key (the grouping column)
|
| 61 |
+
- org_country_single, author, derived_author, merged_country_groups_single, merged_modality, model
|
| 62 |
+
- total_downloads (cumulative downloads up to the selected date)
|
| 63 |
+
- percent_of_total (percent of total across all returned models)
|
| 64 |
+
"""
|
| 65 |
+
|
| 66 |
+
# Create a fresh connection and load parquet-backed views for each call
|
| 67 |
+
local_con = create_fresh_duckdb_with_views()
|
| 68 |
+
|
| 69 |
+
try:
|
| 70 |
+
# Get the single date from slider_value (all-time mode passes a single value)
|
| 71 |
+
if slider_value is not None:
|
| 72 |
+
date = pd.to_datetime(slider_value, unit="s")
|
| 73 |
+
else:
|
| 74 |
+
date = pd.Timestamp.now()
|
| 75 |
+
|
| 76 |
+
date_str = str(date)
|
| 77 |
+
|
| 78 |
+
# Build query using shared function for all-time
|
| 79 |
+
query = build_leaderboard_query(group_col, top_n, date_str=date_str, view=view)
|
| 80 |
+
|
| 81 |
+
# execute using the fresh local connection
|
| 82 |
+
result_df = local_con.execute(query).fetchdf()
|
| 83 |
+
return result_df
|
| 84 |
+
finally:
|
| 85 |
+
local_con.close()
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def leaderboard_callback_logic(
|
| 89 |
+
n_clicks,
|
| 90 |
+
slider_value,
|
| 91 |
+
current_label,
|
| 92 |
+
group_col,
|
| 93 |
+
filename,
|
| 94 |
+
default_label="▼ Show Top 50",
|
| 95 |
+
chip_color="#F0F9FF",
|
| 96 |
+
view="all_downloads",
|
| 97 |
+
derived_author_toggle=True,
|
| 98 |
+
is_alltime=False,
|
| 99 |
+
):
|
| 100 |
+
"""
|
| 101 |
+
Core logic for handling leaderboard updates based on user interactions.
|
| 102 |
+
Returns tuple of (table_content, new_label) for the callback.
|
| 103 |
+
"""
|
| 104 |
+
# Normalize label on first load
|
| 105 |
+
if current_label is None:
|
| 106 |
+
current_label = default_label
|
| 107 |
+
|
| 108 |
+
# Determine top_n and next label
|
| 109 |
+
if n_clicks == 0:
|
| 110 |
+
top_n = 10
|
| 111 |
+
new_label = current_label
|
| 112 |
+
elif "Show Top 50" in current_label:
|
| 113 |
+
top_n, new_label = 50, "▼ Show Top 100"
|
| 114 |
+
elif "Show Top 100" in current_label:
|
| 115 |
+
top_n, new_label = 100, "▲ Show Less"
|
| 116 |
+
else:
|
| 117 |
+
top_n, new_label = 10, "▼ Show Top 50"
|
| 118 |
+
|
| 119 |
+
# Get filtered and aggregated data directly from DuckDB
|
| 120 |
+
# Use all-time query if is_alltime flag is True
|
| 121 |
+
if is_alltime:
|
| 122 |
+
df_filtered = get_filtered_top_n_alltime_from_duckdb(
|
| 123 |
+
slider_value, group_col, top_n, view=view
|
| 124 |
+
)
|
| 125 |
+
else:
|
| 126 |
+
df_filtered = get_filtered_top_n_from_duckdb(
|
| 127 |
+
slider_value, group_col, top_n, view=view
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
# If the SQL query returned no rows, ask user to broaden date range
|
| 131 |
+
if df_filtered is None or df_filtered.empty:
|
| 132 |
+
msg = html.Div(
|
| 133 |
+
"No data found in this time range. Try broadening the download date range.",
|
| 134 |
+
style={"padding": "18px", "fontSize": "16px", "color": "#082030"},
|
| 135 |
+
)
|
| 136 |
+
return msg, new_label
|
| 137 |
+
|
| 138 |
+
# Process the already-filtered data - pass derived_author_toggle
|
| 139 |
+
df, download_df = get_top_n_leaderboard(
|
| 140 |
+
df_filtered, group_col, top_n, derived_author_toggle=derived_author_toggle
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
# If processing produced no rows, ask user to broaden date range
|
| 144 |
+
if df is None or (hasattr(df, "empty") and df.empty):
|
| 145 |
+
msg = html.Div(
|
| 146 |
+
"No data found in this time range. Try broadening the download date range.",
|
| 147 |
+
style={"padding": "18px", "fontSize": "16px", "color": "#082030"},
|
| 148 |
+
)
|
| 149 |
+
return msg, new_label
|
| 150 |
+
|
| 151 |
+
return render_table_content(
|
| 152 |
+
df, download_df, chip_color=chip_color, filename=filename
|
| 153 |
+
), new_label
|
| 154 |
|
| 155 |
|
| 156 |
+
# =============================
|
| 157 |
+
# UI Rendering Components
|
| 158 |
+
# =============================
|
| 159 |
+
|
| 160 |
# Chip renderer
|
| 161 |
def chip(text, bg_color="#F0F0F0"):
|
| 162 |
return html.Span(
|
|
|
|
| 467 |
download_top["% of total"] = download_top["% of total"].round(2)
|
| 468 |
|
| 469 |
# All relevant metadata columns for the grouping
|
| 470 |
+
meta_cols = META_COLS_MAP.get(group_col, [])
|
| 471 |
|
| 472 |
# Collect metadata per group by inspecting the underlying model-level rows
|
| 473 |
meta_map = {}
|
|
|
|
| 497 |
try:
|
| 498 |
flag_emoji = countryflag.getflag(c)
|
| 499 |
if not flag_emoji or flag_emoji == c:
|
| 500 |
+
flag_emoji = COUNTRY_EMOJI_FALLBACK.get(c, "🌍")
|
| 501 |
except Exception:
|
| 502 |
+
flag_emoji = COUNTRY_EMOJI_FALLBACK.get(c, "🌍")
|
| 503 |
chips.append((flag_emoji, c, "country"))
|
| 504 |
|
| 505 |
# Author - use derived_author_toggle to determine which column
|
| 506 |
author_key = "derived_author" if derived_author_toggle else "author"
|
| 507 |
for a in meta.get(author_key, []):
|
| 508 |
+
icon = COMPANY_ICON_MAP.get(a, "")
|
| 509 |
if icon == "":
|
| 510 |
if meta.get("merged_country_groups_single", ["User"])[0] != "User":
|
| 511 |
icon = "🏢"
|
|
|
|
| 558 |
return display_for_render, download_top
|
| 559 |
|
| 560 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 561 |
|
| 562 |
|
| 563 |
def get_top_n_from_duckdb(
|
|
|
|
| 593 |
return pd.DataFrame()
|
| 594 |
finally:
|
| 595 |
conn_local.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
helpers.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from dash import html
|
| 3 |
+
|
| 4 |
+
def ordinal(n: int) -> str:
|
| 5 |
+
"""Return the ordinal suffix for a day (e.g., 1 -> 1st)."""
|
| 6 |
+
if 10 <= n % 100 <= 20:
|
| 7 |
+
suffix = "th"
|
| 8 |
+
else:
|
| 9 |
+
suffix = {1: "st", 2: "nd", 3: "rd"}.get(n % 10, "th")
|
| 10 |
+
return f"{n}{suffix}"
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def format_date(dt: pd.Timestamp) -> str:
|
| 14 |
+
"""Format a pandas Timestamp into a readable string."""
|
| 15 |
+
return dt.strftime("%b") + f" {ordinal(dt.day)}, {dt.year}"
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def build_slider_marks(start_dt: pd.Timestamp, end_dt: pd.Timestamp):
|
| 19 |
+
"""Create slider marks for the range and all-time sliders."""
|
| 20 |
+
return [
|
| 21 |
+
{"value": int(start_dt.timestamp()), "label": start_dt.strftime("%b %Y")},
|
| 22 |
+
{"value": int(end_dt.timestamp()), "label": end_dt.strftime("%b %Y")},
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
def get_thumb_labels(values):
|
| 26 |
+
"""Generate thumb labels for the range slider."""
|
| 27 |
+
distance = abs(values[1] - values[0])
|
| 28 |
+
close = distance < 4 * 30 * 86400 # 4 months
|
| 29 |
+
|
| 30 |
+
label_style = {
|
| 31 |
+
"background": "#fff",
|
| 32 |
+
"color": "#082030",
|
| 33 |
+
"fontWeight": "bold",
|
| 34 |
+
"fontSize": "13px",
|
| 35 |
+
"borderRadius": "8px",
|
| 36 |
+
"padding": "2px 8px",
|
| 37 |
+
"boxShadow": "0 1px 4px rgba(8,32,48,0.10)",
|
| 38 |
+
"position": "absolute",
|
| 39 |
+
"left": "50%",
|
| 40 |
+
"transform": "translateX(-50%)",
|
| 41 |
+
"whiteSpace": "nowrap",
|
| 42 |
+
"zIndex": 100,
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
if close:
|
| 46 |
+
style_top_1 = label_style.copy()
|
| 47 |
+
style_top_1["top"] = "-38px"
|
| 48 |
+
style_top_2 = label_style.copy()
|
| 49 |
+
style_top_2["top"] = "14px"
|
| 50 |
+
else:
|
| 51 |
+
style_top_1 = label_style.copy()
|
| 52 |
+
style_top_1["top"] = "14px"
|
| 53 |
+
style_top_2 = label_style.copy()
|
| 54 |
+
style_top_2["top"] = "14px"
|
| 55 |
+
|
| 56 |
+
return [
|
| 57 |
+
html.Div(pd.to_datetime(values[0], unit="s").strftime("%b %d, %Y"), style=style_top_1),
|
| 58 |
+
html.Div(pd.to_datetime(values[1], unit="s").strftime("%b %d, %Y"), style=style_top_2),
|
| 59 |
+
]
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def get_thumb_label_single(value):
|
| 63 |
+
"""Generate thumb label for the all-time slider."""
|
| 64 |
+
label_style = {
|
| 65 |
+
"background": "#fff",
|
| 66 |
+
"color": "#082030",
|
| 67 |
+
"fontWeight": "bold",
|
| 68 |
+
"fontSize": "13px",
|
| 69 |
+
"borderRadius": "8px",
|
| 70 |
+
"padding": "2px 8px",
|
| 71 |
+
"boxShadow": "0 1px 4px rgba(8,32,48,0.10)",
|
| 72 |
+
"position": "absolute",
|
| 73 |
+
"left": "50%",
|
| 74 |
+
"transform": "translateX(-50%)",
|
| 75 |
+
"whiteSpace": "nowrap",
|
| 76 |
+
"zIndex": 100,
|
| 77 |
+
"top": "14px",
|
| 78 |
+
}
|
| 79 |
+
return [html.Div(pd.to_datetime(value, unit="s").strftime("%b %d, %Y"), style=label_style)]
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def format_large_number(n: int) -> str:
|
| 83 |
+
"""Shorten large numbers, e.g. 5,000,000 -> '5 million'."""
|
| 84 |
+
if n >= 1_000_000_000:
|
| 85 |
+
return f"{n / 1_000_000_000:.1f} billion"
|
| 86 |
+
if n >= 1_000_000:
|
| 87 |
+
return f"{n / 1_000_000:.1f} million"
|
| 88 |
+
if n >= 1_000:
|
| 89 |
+
return f"{n / 1_000:.1f}k"
|
| 90 |
+
return str(int(n))
|
layout_components.py
ADDED
|
@@ -0,0 +1,603 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dash import html, dcc
|
| 2 |
+
import dash_mantine_components as dmc
|
| 3 |
+
from dash_iconify import DashIconify
|
| 4 |
+
|
| 5 |
+
from config import BUTTON_STYLE, DARK_BACKGROUND, PRIMARY_COLOR
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def build_header(last_updated: str) -> html.Div:
|
| 9 |
+
"""Top header with live badge and partner logos."""
|
| 10 |
+
return html.Div(
|
| 11 |
+
[
|
| 12 |
+
html.Div(
|
| 13 |
+
[
|
| 14 |
+
html.Span(
|
| 15 |
+
[
|
| 16 |
+
html.Span(className="live-dot"),
|
| 17 |
+
html.Span("LIVE", className="live-label"),
|
| 18 |
+
],
|
| 19 |
+
className="live-row",
|
| 20 |
+
),
|
| 21 |
+
html.Span(
|
| 22 |
+
f"Last updated: {last_updated}", className="last-updated"
|
| 23 |
+
),
|
| 24 |
+
],
|
| 25 |
+
className="header-status-row",
|
| 26 |
+
),
|
| 27 |
+
html.Div(
|
| 28 |
+
[
|
| 29 |
+
html.A(
|
| 30 |
+
children=[
|
| 31 |
+
html.Img(
|
| 32 |
+
src="assets/images/dpi.svg",
|
| 33 |
+
className="header-logo-img",
|
| 34 |
+
),
|
| 35 |
+
"Data Provenance Initiative",
|
| 36 |
+
],
|
| 37 |
+
href="https://www.dataprovenance.org/",
|
| 38 |
+
target="_blank",
|
| 39 |
+
className="no-bg-link header-link",
|
| 40 |
+
),
|
| 41 |
+
html.A(
|
| 42 |
+
children=[
|
| 43 |
+
html.Img(
|
| 44 |
+
src="assets/images/hf.svg",
|
| 45 |
+
className="header-logo-img",
|
| 46 |
+
),
|
| 47 |
+
html.Span("Hugging Face", className="hf-brand-text"),
|
| 48 |
+
],
|
| 49 |
+
href="https://huggingface.co/",
|
| 50 |
+
target="_blank",
|
| 51 |
+
className="no-bg-link header-link",
|
| 52 |
+
),
|
| 53 |
+
html.A(
|
| 54 |
+
children=[html.Span("Read the paper", className="paper-text")],
|
| 55 |
+
href="https://arxiv.org/abs/2512.03073",
|
| 56 |
+
target="_blank",
|
| 57 |
+
className="no-bg-link header-link paper-link",
|
| 58 |
+
),
|
| 59 |
+
],
|
| 60 |
+
className="header-links-row",
|
| 61 |
+
),
|
| 62 |
+
],
|
| 63 |
+
style={
|
| 64 |
+
"display": "flex",
|
| 65 |
+
"justifyContent": "space-between",
|
| 66 |
+
"alignItems": "center",
|
| 67 |
+
"padding": "18px 24px",
|
| 68 |
+
"gap": "24px",
|
| 69 |
+
"backgroundColor": DARK_BACKGROUND,
|
| 70 |
+
},
|
| 71 |
+
className="responsive-header",
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def build_range_slider(start_ts: int, end_ts: int, value, marks, thumb_children=None):
|
| 76 |
+
"""Create the range slider used for time deltas."""
|
| 77 |
+
return dmc.RangeSlider(
|
| 78 |
+
id="time-slider",
|
| 79 |
+
min=start_ts,
|
| 80 |
+
max=end_ts,
|
| 81 |
+
value=value,
|
| 82 |
+
step=24 * 60 * 60,
|
| 83 |
+
color=PRIMARY_COLOR,
|
| 84 |
+
size="md",
|
| 85 |
+
radius="xl",
|
| 86 |
+
marks=marks,
|
| 87 |
+
style={"width": "95%", "paddingLeft": "60px"},
|
| 88 |
+
label=None,
|
| 89 |
+
showLabelOnHover=False,
|
| 90 |
+
labelTransitionProps={"transition": "fade", "duration": 150},
|
| 91 |
+
thumbChildren=thumb_children,
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def build_single_slider(start_ts: int, end_ts: int, value, marks, thumb_children=None):
|
| 96 |
+
"""Create the single-value slider used for all-time selection."""
|
| 97 |
+
return dmc.Slider(
|
| 98 |
+
id="time-slider-alltime",
|
| 99 |
+
min=start_ts,
|
| 100 |
+
max=end_ts,
|
| 101 |
+
value=value,
|
| 102 |
+
step=24 * 60 * 60,
|
| 103 |
+
color=PRIMARY_COLOR,
|
| 104 |
+
size="md",
|
| 105 |
+
radius="xl",
|
| 106 |
+
marks=marks,
|
| 107 |
+
style={"width": "95%", "paddingLeft": "60px"},
|
| 108 |
+
label=None,
|
| 109 |
+
showLabelOnHover=False,
|
| 110 |
+
labelTransitionProps={"transition": "fade", "duration": 150},
|
| 111 |
+
thumbChildren=thumb_children,
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def build_alert_and_title() -> html.Div:
|
| 116 |
+
"""Intro alert and title block."""
|
| 117 |
+
return html.Div(
|
| 118 |
+
children=[
|
| 119 |
+
dmc.Alert(
|
| 120 |
+
icon=DashIconify(
|
| 121 |
+
icon="mdi:information-outline",
|
| 122 |
+
width=18,
|
| 123 |
+
height=18,
|
| 124 |
+
style={"color": "#1A5F8D"},
|
| 125 |
+
),
|
| 126 |
+
children=[
|
| 127 |
+
"Note: This dashboard uses ",
|
| 128 |
+
html.A(
|
| 129 |
+
"public Hugging Face",
|
| 130 |
+
href="https://huggingface.co/datasets/hfmlsoc/hub_weekly_snapshots",
|
| 131 |
+
target="_blank",
|
| 132 |
+
style={
|
| 133 |
+
"color": "#1A5F8D",
|
| 134 |
+
"fontWeight": "bold",
|
| 135 |
+
"textDecoration": "underline",
|
| 136 |
+
},
|
| 137 |
+
),
|
| 138 |
+
" download data, which is less precise than data analyzed in the paper.",
|
| 139 |
+
],
|
| 140 |
+
color="blue",
|
| 141 |
+
radius="md",
|
| 142 |
+
variant="light",
|
| 143 |
+
withCloseButton=True,
|
| 144 |
+
style={
|
| 145 |
+
"marginTop": "16px",
|
| 146 |
+
"marginBottom": "8px",
|
| 147 |
+
"fontSize": "15px",
|
| 148 |
+
"fontWeight": "500",
|
| 149 |
+
"marginLeft": "auto",
|
| 150 |
+
"marginRight": "auto",
|
| 151 |
+
},
|
| 152 |
+
),
|
| 153 |
+
html.Span(
|
| 154 |
+
"The Open Model Leaderboard",
|
| 155 |
+
style={
|
| 156 |
+
"fontSize": 40,
|
| 157 |
+
"fontWeight": "700",
|
| 158 |
+
"textAlign": "center",
|
| 159 |
+
"marginTop": "20px",
|
| 160 |
+
"marginBottom": "20px",
|
| 161 |
+
},
|
| 162 |
+
),
|
| 163 |
+
],
|
| 164 |
+
style={
|
| 165 |
+
"display": "flex",
|
| 166 |
+
"flexDirection": "column",
|
| 167 |
+
"alignItems": "center",
|
| 168 |
+
"justifyContent": "center",
|
| 169 |
+
"gap": "12px",
|
| 170 |
+
"marginTop": "20px",
|
| 171 |
+
"marginBottom": "20px",
|
| 172 |
+
},
|
| 173 |
+
className="responsive-title-row",
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def build_intro_paragraph() -> html.Div:
|
| 178 |
+
"""Body intro paragraph under the title."""
|
| 179 |
+
return html.Div(
|
| 180 |
+
children=[
|
| 181 |
+
"This leaderboard assesses concentrations of power in the open model ecosystem through ranking user downloads across three groups: countries, developers, and models. Explore how user downloads of models are distributed among these groups and identify key players shaping the open model ecosystem on Hugging Face. This dashboard accompanies the paper titled ",
|
| 182 |
+
html.A(
|
| 183 |
+
"Economies of Open Intelligence: Tracing Power & Participation in the Model Ecosystem.",
|
| 184 |
+
href="https://arxiv.org/abs/2512.03073",
|
| 185 |
+
target="_blank",
|
| 186 |
+
style={
|
| 187 |
+
"color": PRIMARY_COLOR,
|
| 188 |
+
"fontWeight": "700",
|
| 189 |
+
"textDecoration": "underline",
|
| 190 |
+
},
|
| 191 |
+
),
|
| 192 |
+
],
|
| 193 |
+
style={
|
| 194 |
+
"fontSize": 14,
|
| 195 |
+
"marginTop": 18,
|
| 196 |
+
"marginBottom": 12,
|
| 197 |
+
"marginLeft": 100,
|
| 198 |
+
"marginRight": 100,
|
| 199 |
+
"textAlign": "center",
|
| 200 |
+
},
|
| 201 |
+
className="responsive-intro",
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
def build_filter_controls(time_slider_component) -> html.Div:
|
| 206 |
+
"""Filter controls block with segmented controls and sliders."""
|
| 207 |
+
return html.Div(
|
| 208 |
+
children=[
|
| 209 |
+
html.Div(
|
| 210 |
+
[
|
| 211 |
+
html.Div(
|
| 212 |
+
html.Span(
|
| 213 |
+
[
|
| 214 |
+
"Download View",
|
| 215 |
+
dmc.HoverCard(
|
| 216 |
+
width=260,
|
| 217 |
+
shadow="md",
|
| 218 |
+
position="top",
|
| 219 |
+
children=[
|
| 220 |
+
dmc.HoverCardTarget(
|
| 221 |
+
html.Span(
|
| 222 |
+
DashIconify(
|
| 223 |
+
icon="mdi:information-outline",
|
| 224 |
+
width=16,
|
| 225 |
+
height=16,
|
| 226 |
+
style={
|
| 227 |
+
"marginLeft": "6px",
|
| 228 |
+
"color": PRIMARY_COLOR,
|
| 229 |
+
"verticalAlign": "middle",
|
| 230 |
+
},
|
| 231 |
+
),
|
| 232 |
+
style={"cursor": "pointer"},
|
| 233 |
+
)
|
| 234 |
+
),
|
| 235 |
+
dmc.HoverCardDropdown(
|
| 236 |
+
dmc.Text(
|
| 237 |
+
"We believe this filter isolates more authentic usage, mitigating the impact of automatic software downloads for older models.",
|
| 238 |
+
size="sm",
|
| 239 |
+
style={"maxWidth": "240px"},
|
| 240 |
+
)
|
| 241 |
+
),
|
| 242 |
+
],
|
| 243 |
+
),
|
| 244 |
+
],
|
| 245 |
+
className="filter-label-row",
|
| 246 |
+
),
|
| 247 |
+
className="filter-label-container",
|
| 248 |
+
),
|
| 249 |
+
html.Div(
|
| 250 |
+
[
|
| 251 |
+
dmc.SegmentedControl(
|
| 252 |
+
id="segmented",
|
| 253 |
+
value="all-downloads",
|
| 254 |
+
color=PRIMARY_COLOR,
|
| 255 |
+
transitionDuration=200,
|
| 256 |
+
data=[
|
| 257 |
+
{
|
| 258 |
+
"value": "all-downloads",
|
| 259 |
+
"label": "All Downloads",
|
| 260 |
+
},
|
| 261 |
+
{
|
| 262 |
+
"value": "filtered-downloads",
|
| 263 |
+
"label": html.Span(["Filtered Downloads"]),
|
| 264 |
+
},
|
| 265 |
+
],
|
| 266 |
+
mb=10,
|
| 267 |
+
),
|
| 268 |
+
],
|
| 269 |
+
className="filter-segmented-row",
|
| 270 |
+
),
|
| 271 |
+
html.Div(
|
| 272 |
+
"Choose whether to count all downloads, or only downloads up to one year from model creation.",
|
| 273 |
+
className="filter-description",
|
| 274 |
+
),
|
| 275 |
+
html.Div(
|
| 276 |
+
[
|
| 277 |
+
html.Div("Model Attribution", className="filter-label"),
|
| 278 |
+
dmc.SegmentedControl(
|
| 279 |
+
id="model-attribution-segmented",
|
| 280 |
+
value="uploader",
|
| 281 |
+
color=PRIMARY_COLOR,
|
| 282 |
+
transitionDuration=200,
|
| 283 |
+
data=[
|
| 284 |
+
{"value": "uploader", "label": "Model Uploader"},
|
| 285 |
+
{
|
| 286 |
+
"value": "original_creator",
|
| 287 |
+
"label": "Original Model Creator",
|
| 288 |
+
},
|
| 289 |
+
],
|
| 290 |
+
mb=10,
|
| 291 |
+
),
|
| 292 |
+
html.Div(
|
| 293 |
+
"Toggle between having downloads attributed to the account that uploaded the model, or the account that uploaded the model that this was originally derived from.",
|
| 294 |
+
className="filter-description",
|
| 295 |
+
),
|
| 296 |
+
],
|
| 297 |
+
style={"marginTop": "10px"},
|
| 298 |
+
),
|
| 299 |
+
html.Span(
|
| 300 |
+
id="global-toggle-status", className="global-toggle-status"
|
| 301 |
+
),
|
| 302 |
+
],
|
| 303 |
+
className="main-content-left",
|
| 304 |
+
),
|
| 305 |
+
html.Div(
|
| 306 |
+
[
|
| 307 |
+
html.Div(
|
| 308 |
+
[
|
| 309 |
+
html.Span("Download Date Range", className="filter-label"),
|
| 310 |
+
dmc.HoverCard(
|
| 311 |
+
width=260,
|
| 312 |
+
shadow="md",
|
| 313 |
+
position="top",
|
| 314 |
+
children=[
|
| 315 |
+
dmc.HoverCardTarget(
|
| 316 |
+
html.Span(
|
| 317 |
+
DashIconify(
|
| 318 |
+
icon="mdi:information-outline",
|
| 319 |
+
width=16,
|
| 320 |
+
height=16,
|
| 321 |
+
style={
|
| 322 |
+
"marginLeft": "6px",
|
| 323 |
+
"color": PRIMARY_COLOR,
|
| 324 |
+
"verticalAlign": "middle",
|
| 325 |
+
},
|
| 326 |
+
),
|
| 327 |
+
style={"cursor": "pointer"},
|
| 328 |
+
)
|
| 329 |
+
),
|
| 330 |
+
dmc.HoverCardDropdown(
|
| 331 |
+
dmc.Text(
|
| 332 |
+
"Toggle between viewing downloads between a date range or all-time downloads at a single date.",
|
| 333 |
+
size="sm",
|
| 334 |
+
style={"maxWidth": "240px"},
|
| 335 |
+
)
|
| 336 |
+
),
|
| 337 |
+
],
|
| 338 |
+
),
|
| 339 |
+
],
|
| 340 |
+
className="filter-label-row",
|
| 341 |
+
),
|
| 342 |
+
dmc.Switch(
|
| 343 |
+
id="time-range-toggle",
|
| 344 |
+
label="All-time",
|
| 345 |
+
checked=False,
|
| 346 |
+
color=PRIMARY_COLOR,
|
| 347 |
+
style={"marginBottom": "12px"},
|
| 348 |
+
),
|
| 349 |
+
dcc.Loading(
|
| 350 |
+
id="loading-slider",
|
| 351 |
+
type="circle",
|
| 352 |
+
color=PRIMARY_COLOR,
|
| 353 |
+
children=html.Div(
|
| 354 |
+
id="slider-container", children=[time_slider_component]
|
| 355 |
+
),
|
| 356 |
+
),
|
| 357 |
+
html.Div(
|
| 358 |
+
id="slider-description",
|
| 359 |
+
children="Adjust the time range to filter leaderboard results by when models were downloaded by users.",
|
| 360 |
+
className="filter-description filter-description-margin",
|
| 361 |
+
),
|
| 362 |
+
html.Div(
|
| 363 |
+
[
|
| 364 |
+
html.Div(
|
| 365 |
+
[
|
| 366 |
+
DashIconify(
|
| 367 |
+
icon="mdi:lightbulb-on-outline",
|
| 368 |
+
width=20,
|
| 369 |
+
height=20,
|
| 370 |
+
style={
|
| 371 |
+
"marginRight": "8px",
|
| 372 |
+
"color": DARK_BACKGROUND,
|
| 373 |
+
},
|
| 374 |
+
),
|
| 375 |
+
html.Span("Tip"),
|
| 376 |
+
],
|
| 377 |
+
className="tip-title",
|
| 378 |
+
),
|
| 379 |
+
html.Div(
|
| 380 |
+
[
|
| 381 |
+
"Try switching between ",
|
| 382 |
+
html.Span(
|
| 383 |
+
"All Downloads", className="tip-highlight"
|
| 384 |
+
),
|
| 385 |
+
" and ",
|
| 386 |
+
html.Span(
|
| 387 |
+
"Filtered Downloads", className="tip-highlight"
|
| 388 |
+
),
|
| 389 |
+
" to compare net popularity (but many duplicate, unused downloads) versus more immediate interest as models are released. ",
|
| 390 |
+
"You can also toggle between ",
|
| 391 |
+
html.Span(
|
| 392 |
+
"Model Uploader", className="tip-highlight"
|
| 393 |
+
),
|
| 394 |
+
" and ",
|
| 395 |
+
html.Span(
|
| 396 |
+
"Original Model Creator",
|
| 397 |
+
className="tip-highlight",
|
| 398 |
+
),
|
| 399 |
+
" to see how attribution affects perceived popularity.",
|
| 400 |
+
],
|
| 401 |
+
className="tip-description",
|
| 402 |
+
),
|
| 403 |
+
],
|
| 404 |
+
className="tip-section",
|
| 405 |
+
),
|
| 406 |
+
],
|
| 407 |
+
className="main-content-right",
|
| 408 |
+
),
|
| 409 |
+
],
|
| 410 |
+
style={
|
| 411 |
+
"display": "flex",
|
| 412 |
+
"gap": "24px",
|
| 413 |
+
"padding": "32px",
|
| 414 |
+
"alignItems": "flex-start",
|
| 415 |
+
"marginLeft": "100px",
|
| 416 |
+
"marginRight": "100px",
|
| 417 |
+
"backgroundColor": "#FFFBF9",
|
| 418 |
+
"borderRadius": "18px",
|
| 419 |
+
},
|
| 420 |
+
className="responsive-main-content",
|
| 421 |
+
)
|
| 422 |
+
|
| 423 |
+
|
| 424 |
+
def build_leaderboard_tabs() -> html.Div:
|
| 425 |
+
"""Tabbed leaderboard section."""
|
| 426 |
+
return html.Div(
|
| 427 |
+
[
|
| 428 |
+
dcc.Tabs(
|
| 429 |
+
id="leaderboard-tabs",
|
| 430 |
+
value="Countries",
|
| 431 |
+
children=[
|
| 432 |
+
dcc.Tab(
|
| 433 |
+
label="Countries",
|
| 434 |
+
value="Countries",
|
| 435 |
+
style={
|
| 436 |
+
"backgroundColor": "transparent",
|
| 437 |
+
"border": "none",
|
| 438 |
+
"padding": "10px 18px",
|
| 439 |
+
"color": "#6B7280",
|
| 440 |
+
"fontWeight": "500",
|
| 441 |
+
},
|
| 442 |
+
selected_style={
|
| 443 |
+
"backgroundColor": "transparent",
|
| 444 |
+
"border": "none",
|
| 445 |
+
"padding": "10px 18px",
|
| 446 |
+
"fontWeight": "700",
|
| 447 |
+
"borderBottom": "3px solid #082030",
|
| 448 |
+
},
|
| 449 |
+
children=[
|
| 450 |
+
html.Div(
|
| 451 |
+
children=[
|
| 452 |
+
"The country leaderboard shows how downloads are distributed across different nations, highlighting which countries are leading in model usage and adoption. The metadata includes the ",
|
| 453 |
+
html.Span("country", className="meta-var"),
|
| 454 |
+
" and number of ",
|
| 455 |
+
html.Span("user downloads", className="meta-var"),
|
| 456 |
+
".",
|
| 457 |
+
],
|
| 458 |
+
className="tab-description",
|
| 459 |
+
),
|
| 460 |
+
html.Div(
|
| 461 |
+
dcc.Loading(
|
| 462 |
+
id="loading-countries",
|
| 463 |
+
type="circle",
|
| 464 |
+
color=PRIMARY_COLOR,
|
| 465 |
+
children=html.Div(id="top_countries-table"),
|
| 466 |
+
),
|
| 467 |
+
className="responsive-table-wrapper",
|
| 468 |
+
),
|
| 469 |
+
html.Button(
|
| 470 |
+
id="top_countries-toggle",
|
| 471 |
+
children="▼ Show Top 50",
|
| 472 |
+
n_clicks=0,
|
| 473 |
+
style={**BUTTON_STYLE, "border": "none"},
|
| 474 |
+
),
|
| 475 |
+
],
|
| 476 |
+
),
|
| 477 |
+
dcc.Tab(
|
| 478 |
+
label="Developers",
|
| 479 |
+
value="Developers",
|
| 480 |
+
style={
|
| 481 |
+
"backgroundColor": "transparent",
|
| 482 |
+
"border": "none",
|
| 483 |
+
"padding": "10px 18px",
|
| 484 |
+
"color": "#6B7280",
|
| 485 |
+
"fontWeight": "500",
|
| 486 |
+
},
|
| 487 |
+
selected_style={
|
| 488 |
+
"backgroundColor": "transparent",
|
| 489 |
+
"border": "none",
|
| 490 |
+
"padding": "10px 18px",
|
| 491 |
+
"fontWeight": "700",
|
| 492 |
+
"borderBottom": "3px solid #082030",
|
| 493 |
+
},
|
| 494 |
+
children=[
|
| 495 |
+
html.Div(
|
| 496 |
+
children=[
|
| 497 |
+
"The developer leaderboard highlights the most influential model creators on Hugging Face, showcasing which developers have garnered the highest download counts for their models. The metadata includes the ",
|
| 498 |
+
html.Span("developer", className="meta-var"),
|
| 499 |
+
", number of ",
|
| 500 |
+
html.Span("user downloads", className="meta-var"),
|
| 501 |
+
", and ",
|
| 502 |
+
html.Span("country", className="meta-var"),
|
| 503 |
+
".",
|
| 504 |
+
],
|
| 505 |
+
className="tab-description",
|
| 506 |
+
),
|
| 507 |
+
html.Div(
|
| 508 |
+
dcc.Loading(
|
| 509 |
+
id="loading-developers",
|
| 510 |
+
type="circle",
|
| 511 |
+
color=PRIMARY_COLOR,
|
| 512 |
+
children=html.Div(id="top_developers-table"),
|
| 513 |
+
),
|
| 514 |
+
className="responsive-table-wrapper",
|
| 515 |
+
),
|
| 516 |
+
html.Button(
|
| 517 |
+
id="top_developers-toggle",
|
| 518 |
+
children="▼ Show Top 50",
|
| 519 |
+
n_clicks=0,
|
| 520 |
+
style={**BUTTON_STYLE, "border": "none"},
|
| 521 |
+
),
|
| 522 |
+
],
|
| 523 |
+
),
|
| 524 |
+
dcc.Tab(
|
| 525 |
+
label="Models",
|
| 526 |
+
value="Models",
|
| 527 |
+
style={
|
| 528 |
+
"backgroundColor": "transparent",
|
| 529 |
+
"border": "none",
|
| 530 |
+
"padding": "10px 18px",
|
| 531 |
+
"color": "#6B7280",
|
| 532 |
+
"fontWeight": "500",
|
| 533 |
+
},
|
| 534 |
+
selected_style={
|
| 535 |
+
"backgroundColor": "transparent",
|
| 536 |
+
"border": "none",
|
| 537 |
+
"padding": "10px 18px",
|
| 538 |
+
"fontWeight": "700",
|
| 539 |
+
"borderBottom": "3px solid #082030",
|
| 540 |
+
},
|
| 541 |
+
children=[
|
| 542 |
+
html.Div(
|
| 543 |
+
children=[
|
| 544 |
+
"The model leaderboard ranks individual models based on their download counts, revealing which models are most popular among users on Hugging Face. The metadata includes the ",
|
| 545 |
+
html.Span("model name", className="meta-var"),
|
| 546 |
+
", number of ",
|
| 547 |
+
html.Span("user downloads", className="meta-var"),
|
| 548 |
+
", ",
|
| 549 |
+
html.Span("developer", className="meta-var"),
|
| 550 |
+
", and ",
|
| 551 |
+
html.Span("modality", className="meta-var"),
|
| 552 |
+
" (the input and output types of the model).",
|
| 553 |
+
],
|
| 554 |
+
className="tab-description",
|
| 555 |
+
),
|
| 556 |
+
html.Div(
|
| 557 |
+
dcc.Loading(
|
| 558 |
+
id="loading-models",
|
| 559 |
+
type="circle",
|
| 560 |
+
color=PRIMARY_COLOR,
|
| 561 |
+
children=html.Div(id="top_models-table"),
|
| 562 |
+
),
|
| 563 |
+
className="responsive-table-wrapper",
|
| 564 |
+
),
|
| 565 |
+
html.Button(
|
| 566 |
+
id="top_models-toggle",
|
| 567 |
+
children="▼ Show Top 50",
|
| 568 |
+
n_clicks=0,
|
| 569 |
+
style={**BUTTON_STYLE, "border": "none"},
|
| 570 |
+
),
|
| 571 |
+
],
|
| 572 |
+
),
|
| 573 |
+
],
|
| 574 |
+
),
|
| 575 |
+
],
|
| 576 |
+
style={
|
| 577 |
+
"borderRadius": "18px",
|
| 578 |
+
"padding": "32px",
|
| 579 |
+
"marginTop": "12px",
|
| 580 |
+
"marginBottom": "12px",
|
| 581 |
+
"marginLeft": "50px",
|
| 582 |
+
"marginRight": "50px",
|
| 583 |
+
},
|
| 584 |
+
className="responsive-tabs",
|
| 585 |
+
)
|
| 586 |
+
|
| 587 |
+
|
| 588 |
+
def build_main_layout(last_updated: str, time_slider_component) -> html.Div:
|
| 589 |
+
"""Full page layout assembled from smaller sections."""
|
| 590 |
+
return html.Div(
|
| 591 |
+
[
|
| 592 |
+
build_header(last_updated),
|
| 593 |
+
build_alert_and_title(),
|
| 594 |
+
build_intro_paragraph(),
|
| 595 |
+
build_filter_controls(time_slider_component),
|
| 596 |
+
build_leaderboard_tabs(),
|
| 597 |
+
],
|
| 598 |
+
style={
|
| 599 |
+
"fontFamily": "Inter",
|
| 600 |
+
"backgroundColor": "#ffffff",
|
| 601 |
+
"minHeight": "100vh",
|
| 602 |
+
},
|
| 603 |
+
)
|