README / index.html
Reubencf's picture
Bundle D3 + GSAP locally, move inline script to app.js
c65dcab verified
raw
history blame
12.7 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>ReubenDataLab 路 Dataset Explorer</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Geist:wght@100..900&family=Google+Sans:ital,opsz,wght@0,17..18,400..700;1,17..18,400..700&display=swap" rel="stylesheet">
<script src="vendor/d3.min.js" defer></script>
<script src="vendor/d3-weighted-voronoi.js" defer></script>
<script src="vendor/d3-voronoi-map.js" defer></script>
<script src="vendor/d3-voronoi-treemap.js" defer></script>
<script src="vendor/gsap.min.js" defer></script>
<style>
:root {
--bg: #000000;
--fg: #ffffff;
--muted: #8a8a94;
--card: #141414;
--card-alt: #1c1c1e;
--border: #262626;
--divider: #2e2e2e;
--tooltip-bg: rgba(20, 20, 20, 0.96);
--palette-1: #3b82f6;
--palette-2: #10b981;
--palette-3: #ef4444;
--palette-4: #f59e0b;
--palette-5: #8b5cf6;
--palette-6: #ec4899;
--palette-7: #06b6d4;
--palette-8: #84cc16;
--palette-9: #f97316;
--palette-10: #14b8a6;
--palette-11: #a855f7;
--palette-12: #eab308;
}
* { box-sizing: border-box; }
html, body {
margin: 0; padding: 0;
background: var(--bg);
color: var(--fg);
font-family: "Geist", "Google Sans", -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
font-weight: 400;
min-height: 100vh;
-webkit-font-smoothing: antialiased;
letter-spacing: 0.005em;
}
a { color: var(--fg); text-decoration: none; }
a:hover { opacity: 0.7; }
/* Header / hero image */
header {
max-width: 1440px;
margin: 0 auto;
padding: 32px 24px 8px 24px;
text-align: center;
}
.hero-img {
display: block;
max-width: 900px;
width: 100%;
height: auto;
margin: 0 auto;
border-radius: 14px;
}
/* Hero stats banner */
.hero-stats {
max-width: 1440px;
margin: 24px auto 0 auto;
padding: 0 24px;
display: grid;
grid-template-columns: repeat(5, 1fr);
gap: 14px;
}
.stat {
background: var(--card);
border: 1px solid var(--border);
border-radius: 16px;
padding: 18px 14px;
text-align: center;
}
.stat .num {
display: block;
font-size: 1.75rem;
font-weight: 700;
color: var(--fg);
letter-spacing: -0.015em;
line-height: 1.05;
}
.stat .num .decimal { font-size: 0.55em; font-weight: 500; opacity: 0.75; margin-left: 1px; }
.stat .lbl {
display: block;
font-size: 0.68rem;
color: var(--muted);
text-transform: uppercase;
letter-spacing: 0.13em;
margin-top: 8px;
font-weight: 500;
}
.stat .sub {
display: block;
font-size: 0.6rem;
color: var(--muted);
font-weight: 400;
letter-spacing: 0.04em;
margin-top: 4px;
opacity: 0.65;
text-transform: none;
}
/* Chart sections */
.charts {
max-width: 1440px;
margin: 0 auto;
display: grid;
grid-template-columns: 1fr 1fr;
gap: 24px;
padding: 24px;
}
.chart-card {
background: var(--card);
border: 1px solid var(--border);
border-radius: 20px;
padding: 24px 20px 16px 20px;
}
.chart-card h2 {
text-align: center;
margin: 0 0 4px 0;
font-size: 1.1rem;
font-weight: 600;
color: var(--fg);
letter-spacing: -0.005em;
}
.chart-card .subtitle {
text-align: center;
margin: 0 0 14px 0;
font-size: 0.82rem;
color: var(--muted);
font-weight: 400;
}
/* Donut */
.donut-wrap {
position: relative;
width: 100%;
max-width: 560px;
aspect-ratio: 1;
margin: 0 auto;
}
.donut-wrap.small { max-width: 400px; }
.donut-svg {
width: 100%;
height: 100%;
display: block;
overflow: visible;
}
.donut-slice { cursor: pointer; transition: filter 0.2s ease; }
.donut-slice:hover { filter: brightness(1.25) drop-shadow(0 0 10px rgba(255,255,255,0.15)); }
.donut-center {
position: absolute;
inset: 0;
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
pointer-events: none;
padding: 18%;
text-align: center;
}
.donut-center.small { padding: 22%; }
.center-item { width: 100%; }
.center-label {
font-size: 0.65rem;
font-weight: 500;
color: var(--muted);
letter-spacing: 0.18em;
text-transform: uppercase;
display: flex;
align-items: center;
justify-content: center;
gap: 6px;
}
.center-label .icon { font-size: 0.85rem; opacity: 0.9; }
.center-number {
font-size: clamp(1.8rem, 4.5vw, 2.75rem);
font-weight: 700;
color: var(--fg);
line-height: 1;
letter-spacing: -0.03em;
margin: 4px 0;
}
.center-number .decimal {
font-size: 0.55em;
font-weight: 500;
color: var(--fg);
opacity: 0.72;
margin-left: 1px;
}
.center-divider {
width: 42%;
border: none;
border-top: 1px solid rgba(255, 255, 255, 0.08);
margin: 10px auto;
}
/* Details card */
.details {
max-width: 1440px;
margin: 0 auto 32px auto;
padding: 0 24px;
}
.details-card {
background: var(--card);
border: 1px solid var(--border);
border-radius: 20px;
padding: 26px 28px;
min-height: 140px;
}
.details-card h3 {
margin: 0 0 8px 0;
font-size: 1.35rem;
color: var(--fg);
display: flex;
align-items: center;
gap: 12px;
font-weight: 600;
letter-spacing: -0.01em;
}
.details-card h3 .swatch { display: inline-block; width: 14px; height: 14px; border-radius: 50%; }
.details-card h3 a { color: var(--fg); font-size: 1.05rem; opacity: 0.85; }
.details-card h3 a:hover { opacity: 1; text-decoration: underline; }
.details-card .tagline { color: var(--muted); font-size: 0.95rem; margin: 0 0 18px 0; }
.kv-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
gap: 12px 24px;
}
.kv .k { color: var(--muted); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.1em; margin-bottom: 4px; font-weight: 500; }
.kv .v { color: var(--fg); font-size: 0.9rem; }
.kv .v a { border-bottom: 1px dashed var(--muted); }
.kv .v strong { font-weight: 600; }
.schema-list { display: flex; flex-wrap: wrap; gap: 6px; margin-top: 6px; }
.schema-list code {
background: var(--card-alt);
color: var(--fg);
padding: 3px 8px;
border-radius: 6px;
font-size: 0.78rem;
font-family: "SF Mono", Consolas, monospace;
border: 1px solid var(--border);
}
/* Extras (modality + treemap) */
.extras {
max-width: 1440px;
margin: 8px auto 0 auto;
padding: 0 24px 24px 24px;
display: grid;
grid-template-columns: 1fr 2fr;
gap: 24px;
}
.plot-treemap { width: 100%; height: 900px; position: relative; }
.plot-treemap svg { width: 100%; height: 100%; display: block; }
/* Voronoi */
.voronoi-cell {
cursor: pointer;
transition: filter 0.18s ease, opacity 0.18s ease;
}
.voronoi-cell:hover { filter: brightness(1.35) drop-shadow(0 0 8px rgba(255,255,255,0.35)); }
.voronoi-label {
font-family: "Geist", "Google Sans", sans-serif;
font-weight: 600;
fill: #ffffff;
pointer-events: none;
text-anchor: middle;
user-select: none;
}
.voronoi-label .code { font-weight: 400; opacity: 0.8; fill: #ffffff; }
.voronoi-tooltip {
position: absolute;
pointer-events: none;
background: var(--tooltip-bg);
border: 1px solid var(--border);
border-radius: 10px;
padding: 10px 14px;
font-size: 0.85rem;
color: var(--fg);
box-shadow: 0 12px 32px rgba(0,0,0,0.7);
opacity: 0;
transition: opacity 0.12s ease;
white-space: nowrap;
z-index: 20;
font-family: "Geist", sans-serif;
}
.voronoi-tooltip .t-name { font-weight: 700; color: var(--fg); font-size: 0.95rem; }
.voronoi-tooltip .t-code { color: var(--muted); font-size: 0.72rem; margin-left: 4px; }
.voronoi-tooltip .t-rows { color: var(--fg); font-weight: 600; margin-top: 4px; opacity: 0.9; }
/* Donut tooltip (shared style) */
.donut-tooltip {
position: fixed;
pointer-events: none;
background: var(--tooltip-bg);
border: 1px solid var(--border);
border-radius: 10px;
padding: 10px 14px;
font-size: 0.85rem;
color: var(--fg);
box-shadow: 0 12px 32px rgba(0,0,0,0.7);
opacity: 0;
transition: opacity 0.12s ease;
white-space: nowrap;
z-index: 50;
font-family: "Geist", sans-serif;
}
.donut-tooltip .t-name { font-weight: 700; font-size: 0.95rem; }
.donut-tooltip .t-meta { color: var(--muted); font-size: 0.78rem; margin-top: 4px; }
footer {
max-width: 1440px;
margin: 0 auto 32px auto;
padding: 0 24px;
text-align: center;
color: var(--muted);
font-size: 0.8rem;
font-weight: 400;
}
footer a { border-bottom: 1px dashed var(--muted); }
@media (max-width: 900px) {
.hero-stats { grid-template-columns: repeat(2, 1fr); }
.extras { grid-template-columns: 1fr; }
}
@media (max-width: 780px) {
.charts { grid-template-columns: 1fr; }
}
</style>
</head>
<body>
<header>
<img src="Reubensdataset.png" alt="Reuben's Data Lab" class="hero-img" />
</header>
<section class="hero-stats">
<div class="stat">
<span class="num" data-value="12"></span>
<span class="lbl">Raw datasets</span>
<span class="sub">in four HF collections</span>
</div>
<div class="stat">
<span class="num" data-value="14.8M"></span>
<span class="lbl">Total rows</span>
<span class="sub">every row, every dataset</span>
</div>
<div class="stat">
<span class="num" data-value="130+"></span>
<span class="lbl">Languages</span>
<span class="sub">many rarely seen online</span>
</div>
<div class="stat">
<span class="num" data-value="4"></span>
<span class="lbl">Modalities</span>
<span class="sub">audio, text, images, code</span>
</div>
<div class="stat">
<span class="num" data-value="17"></span>
<span class="lbl">Days to build</span>
<span class="sub">April 8 to April 24, 2026</span>
</div>
</section>
<section class="charts">
<div class="chart-card">
<h2>Raw corpus</h2>
<div class="subtitle">Every dataset I've created in the <a href="https://huggingface.co/ReubenDataLab/collections" target="_blank" rel="noopener">ReubenDataLab collections</a></div>
<div class="donut-wrap">
<svg id="chart-raw" class="donut-svg"></svg>
<div class="donut-center" id="center-raw"></div>
</div>
</div>
<div class="chart-card">
<h2>Adaption-remastered</h2>
<div class="subtitle">Improved datasets after running them through <a href="https://adaptionlabs.ai" target="_blank" rel="noopener">adaptionlabs.ai</a></div>
<div class="donut-wrap">
<svg id="chart-adaption" class="donut-svg"></svg>
<div class="donut-center" id="center-adaption"></div>
</div>
</div>
</section>
<div class="details">
<div id="details-card" class="details-card" style="display: none;"></div>
</div>
<section class="extras">
<div class="chart-card">
<h2>Modality split</h2>
<div class="subtitle">Share of the corpus by data type</div>
<div class="donut-wrap small">
<svg id="chart-modality" class="donut-svg"></svg>
<div class="donut-center small" id="center-modality"></div>
</div>
</div>
<div class="chart-card">
<h2>Languages across the corpus</h2>
<div class="subtitle">Every language that appears in any raw dataset, sized (log-scale) by total row count. Hover for exact numbers.</div>
<div id="chart-treemap" class="plot-treemap">
<div id="voronoi-tooltip" class="voronoi-tooltip"></div>
</div>
</div>
</section>
<div id="donut-tooltip" class="donut-tooltip"></div>
<footer>
Data self-reported from HF dataset pages 路 Built for the
<a href="https://www.adaptionlabs.ai/blog/the-uncharted-data-challenge" target="_blank">Uncharted Data Challenge</a>
路 Author <a href="https://huggingface.co/Reubencf" target="_blank">@Reubencf</a>
</footer>
<script src="app.js" defer></script>
</body>
</html>