An, Duo commited on
Commit ·
227eeac
1
Parent(s): bc9ddee
Enhance Colab badge functionality: Update script to add 'Open in Colab' badges to both template and solution notebooks. Modify badge generation to accommodate different notebook directories and ensure proper processing of both templates and solutions.
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- scripts/add_colab_badges.py +19 -11
- scripts/add_colab_torch_judge_install.py +79 -0
- solutions/01_relu_solution.ipynb +45 -29
- solutions/02_softmax_solution.ipynb +16 -1
- solutions/03_linear_solution.ipynb +16 -1
- solutions/04_layernorm_solution.ipynb +16 -1
- solutions/05_attention_solution.ipynb +16 -0
- solutions/06_multihead_attention_solution.ipynb +117 -102
- solutions/07_batchnorm_solution.ipynb +129 -114
- solutions/08_rmsnorm_solution.ipynb +16 -1
- solutions/09_causal_attention_solution.ipynb +16 -1
- solutions/10_gqa_solution.ipynb +16 -1
- solutions/11_sliding_window_solution.ipynb +16 -1
- solutions/12_linear_attention_solution.ipynb +16 -1
- solutions/13_gpt2_block_solution.ipynb +16 -1
- solutions/14_kv_cache_solution.ipynb +18 -3
- solutions/15_mlp_solution.ipynb +18 -3
- solutions/16_cross_entropy_solution.ipynb +16 -1
- solutions/17_dropout_solution.ipynb +16 -1
- solutions/18_embedding_solution.ipynb +16 -1
- solutions/19_gelu_solution.ipynb +16 -1
- solutions/20_weight_init_solution.ipynb +16 -1
- solutions/21_gradient_clipping_solution.ipynb +16 -1
- solutions/22_conv2d_solution.ipynb +16 -1
- solutions/23_cross_attention_solution.ipynb +16 -1
- solutions/24_rope_solution.ipynb +16 -1
- solutions/25_flash_attention_solution.ipynb +16 -1
- solutions/26_lora_solution.ipynb +16 -1
- solutions/27_vit_patch_solution.ipynb +16 -1
- solutions/28_moe_solution.ipynb +16 -1
- solutions/29_adam_solution.ipynb +16 -1
- solutions/30_cosine_lr_solution.ipynb +16 -1
- solutions/31_gradient_accumulation_solution.ipynb +16 -1
- solutions/32_topk_sampling_solution.ipynb +16 -1
- solutions/33_beam_search_solution.ipynb +16 -1
- solutions/34_speculative_decoding_solution.ipynb +16 -1
- solutions/35_bpe_solution.ipynb +16 -1
- solutions/36_int8_quantization_solution.ipynb +16 -1
- solutions/37_dpo_loss_solution.ipynb +16 -1
- solutions/38_grpo_loss_solution.ipynb +15 -0
- solutions/39_ppo_loss_solution.ipynb +15 -1
- solutions/40_linear_regression_solution.ipynb +137 -122
- templates/00_welcome.ipynb +102 -6
- templates/01_relu.ipynb +14 -0
- templates/02_softmax.ipynb +14 -0
- templates/03_linear.ipynb +14 -0
- templates/04_layernorm.ipynb +14 -0
- templates/05_attention.ipynb +14 -0
- templates/06_multihead_attention.ipynb +15 -0
- templates/07_batchnorm.ipynb +142 -128
scripts/add_colab_badges.py
CHANGED
|
@@ -1,27 +1,29 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
-
"""Add 'Open in Colab' badges to all template notebooks."""
|
| 3 |
|
| 4 |
import json
|
| 5 |
from pathlib import Path
|
| 6 |
|
| 7 |
REPO = "duoan/TorchCode"
|
| 8 |
BRANCH = "master"
|
| 9 |
-
|
|
|
|
|
|
|
| 10 |
BADGE_IMG = "https://colab.research.google.com/assets/colab-badge.svg"
|
| 11 |
|
| 12 |
|
| 13 |
-
def colab_url(filename: str) -> str:
|
| 14 |
return (
|
| 15 |
f"https://colab.research.google.com/github/{REPO}"
|
| 16 |
-
f"/blob/{BRANCH}/
|
| 17 |
)
|
| 18 |
|
| 19 |
|
| 20 |
-
def badge_markdown(filename: str) -> str:
|
| 21 |
-
return f"[]({colab_url(filename)})"
|
| 22 |
|
| 23 |
|
| 24 |
-
def process_notebook(path: Path) -> bool:
|
| 25 |
with open(path, "r", encoding="utf-8") as f:
|
| 26 |
nb = json.load(f)
|
| 27 |
|
|
@@ -34,7 +36,7 @@ def process_notebook(path: Path) -> bool:
|
|
| 34 |
if "colab-badge.svg" in flat:
|
| 35 |
return False
|
| 36 |
|
| 37 |
-
badge = badge_markdown(path.name)
|
| 38 |
cells[0]["source"] = [badge + "\n\n"] + (
|
| 39 |
source_lines if isinstance(source_lines, list) else [source_lines]
|
| 40 |
)
|
|
@@ -49,11 +51,17 @@ def process_notebook(path: Path) -> bool:
|
|
| 49 |
def main() -> None:
|
| 50 |
updated = 0
|
| 51 |
for nb_path in sorted(TEMPLATES_DIR.glob("*.ipynb")):
|
| 52 |
-
if process_notebook(nb_path):
|
| 53 |
-
print(f" ✅ {nb_path.name}")
|
| 54 |
updated += 1
|
| 55 |
else:
|
| 56 |
-
print(f" ⏭️ {nb_path.name} (already has badge or skipped)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
print(f"\nDone — updated {updated} notebooks.")
|
| 58 |
|
| 59 |
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
+
"""Add 'Open in Colab' badges to all template and solution notebooks."""
|
| 3 |
|
| 4 |
import json
|
| 5 |
from pathlib import Path
|
| 6 |
|
| 7 |
REPO = "duoan/TorchCode"
|
| 8 |
BRANCH = "master"
|
| 9 |
+
ROOT = Path(__file__).resolve().parent.parent
|
| 10 |
+
TEMPLATES_DIR = ROOT / "templates"
|
| 11 |
+
SOLUTIONS_DIR = ROOT / "solutions"
|
| 12 |
BADGE_IMG = "https://colab.research.google.com/assets/colab-badge.svg"
|
| 13 |
|
| 14 |
|
| 15 |
+
def colab_url(filename: str, folder: str) -> str:
|
| 16 |
return (
|
| 17 |
f"https://colab.research.google.com/github/{REPO}"
|
| 18 |
+
f"/blob/{BRANCH}/{folder}/{filename}"
|
| 19 |
)
|
| 20 |
|
| 21 |
|
| 22 |
+
def badge_markdown(filename: str, folder: str) -> str:
|
| 23 |
+
return f"[]({colab_url(filename, folder)})"
|
| 24 |
|
| 25 |
|
| 26 |
+
def process_notebook(path: Path, folder: str) -> bool:
|
| 27 |
with open(path, "r", encoding="utf-8") as f:
|
| 28 |
nb = json.load(f)
|
| 29 |
|
|
|
|
| 36 |
if "colab-badge.svg" in flat:
|
| 37 |
return False
|
| 38 |
|
| 39 |
+
badge = badge_markdown(path.name, folder)
|
| 40 |
cells[0]["source"] = [badge + "\n\n"] + (
|
| 41 |
source_lines if isinstance(source_lines, list) else [source_lines]
|
| 42 |
)
|
|
|
|
| 51 |
def main() -> None:
|
| 52 |
updated = 0
|
| 53 |
for nb_path in sorted(TEMPLATES_DIR.glob("*.ipynb")):
|
| 54 |
+
if process_notebook(nb_path, "templates"):
|
| 55 |
+
print(f" ✅ templates/{nb_path.name}")
|
| 56 |
updated += 1
|
| 57 |
else:
|
| 58 |
+
print(f" ⏭️ templates/{nb_path.name} (already has badge or skipped)")
|
| 59 |
+
for nb_path in sorted(SOLUTIONS_DIR.glob("*.ipynb")):
|
| 60 |
+
if process_notebook(nb_path, "solutions"):
|
| 61 |
+
print(f" ✅ solutions/{nb_path.name}")
|
| 62 |
+
updated += 1
|
| 63 |
+
else:
|
| 64 |
+
print(f" ⏭️ solutions/{nb_path.name} (already has badge or skipped)")
|
| 65 |
print(f"\nDone — updated {updated} notebooks.")
|
| 66 |
|
| 67 |
|
scripts/add_colab_torch_judge_install.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Add Colab-only pip install of torch-judge to all notebooks that use torch_judge."""
|
| 3 |
+
|
| 4 |
+
import json
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
INSTALL_CELL_SOURCE = [
|
| 8 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 9 |
+
"try:\n",
|
| 10 |
+
" import google.colab\n",
|
| 11 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 12 |
+
"except ImportError:\n",
|
| 13 |
+
" pass\n",
|
| 14 |
+
]
|
| 15 |
+
|
| 16 |
+
MARKER = "get_ipython().run_line_magic('pip', 'install"
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def has_torch_judge(nb: dict) -> bool:
|
| 20 |
+
for cell in nb.get("cells", []):
|
| 21 |
+
src = cell.get("source", [])
|
| 22 |
+
flat = "".join(src) if isinstance(src, list) else str(src)
|
| 23 |
+
if "torch_judge" in flat:
|
| 24 |
+
return True
|
| 25 |
+
return False
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def already_has_install(nb: dict) -> bool:
|
| 29 |
+
for cell in nb.get("cells", []):
|
| 30 |
+
src = cell.get("source", [])
|
| 31 |
+
flat = "".join(src) if isinstance(src, list) else str(src)
|
| 32 |
+
if MARKER in flat and "torch-judge" in flat:
|
| 33 |
+
return True
|
| 34 |
+
return False
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def process_notebook(path: Path) -> bool:
|
| 38 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 39 |
+
nb = json.load(f)
|
| 40 |
+
|
| 41 |
+
if not has_torch_judge(nb):
|
| 42 |
+
return False
|
| 43 |
+
if already_has_install(nb):
|
| 44 |
+
return False
|
| 45 |
+
|
| 46 |
+
cells = nb["cells"]
|
| 47 |
+
if not cells:
|
| 48 |
+
return False
|
| 49 |
+
|
| 50 |
+
# Insert install cell at index 1 (after first cell, usually markdown title)
|
| 51 |
+
install_cell = {
|
| 52 |
+
"cell_type": "code",
|
| 53 |
+
"metadata": {},
|
| 54 |
+
"source": INSTALL_CELL_SOURCE,
|
| 55 |
+
"outputs": [],
|
| 56 |
+
"execution_count": None,
|
| 57 |
+
}
|
| 58 |
+
cells.insert(1, install_cell)
|
| 59 |
+
|
| 60 |
+
with open(path, "w", encoding="utf-8") as f:
|
| 61 |
+
json.dump(nb, f, ensure_ascii=False, indent=1)
|
| 62 |
+
f.write("\n")
|
| 63 |
+
|
| 64 |
+
return True
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def main() -> None:
|
| 68 |
+
root = Path(__file__).resolve().parent.parent
|
| 69 |
+
updated = 0
|
| 70 |
+
for pattern in ["templates/*.ipynb", "solutions/*.ipynb"]:
|
| 71 |
+
for path in sorted(root.glob(pattern)):
|
| 72 |
+
if process_notebook(path):
|
| 73 |
+
print(f" + {path.relative_to(root)}")
|
| 74 |
+
updated += 1
|
| 75 |
+
print(f"Updated {updated} notebooks.")
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
if __name__ == "__main__":
|
| 79 |
+
main()
|
solutions/01_relu_solution.ipynb
CHANGED
|
@@ -1,73 +1,89 @@
|
|
| 1 |
{
|
| 2 |
-
"nbformat": 4,
|
| 3 |
-
"nbformat_minor": 5,
|
| 4 |
-
"metadata": {
|
| 5 |
-
"kernelspec": {
|
| 6 |
-
"display_name": "Python 3",
|
| 7 |
-
"language": "python",
|
| 8 |
-
"name": "python3"
|
| 9 |
-
},
|
| 10 |
-
"language_info": {
|
| 11 |
-
"name": "python",
|
| 12 |
-
"version": "3.11.0"
|
| 13 |
-
}
|
| 14 |
-
},
|
| 15 |
"cells": [
|
| 16 |
{
|
| 17 |
"cell_type": "markdown",
|
|
|
|
| 18 |
"metadata": {},
|
| 19 |
"source": [
|
|
|
|
|
|
|
| 20 |
"# 🟢 Solution: Implement ReLU\n",
|
| 21 |
"\n",
|
| 22 |
"Reference solution for the ReLU activation function.\n",
|
| 23 |
"\n",
|
| 24 |
"$$\\text{ReLU}(x) = \\max(0, x)$$"
|
| 25 |
-
]
|
| 26 |
-
"outputs": []
|
| 27 |
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
|
|
|
| 30 |
"metadata": {},
|
|
|
|
| 31 |
"source": [
|
| 32 |
-
"
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
"outputs": [],
|
| 35 |
-
"
|
|
|
|
|
|
|
| 36 |
},
|
| 37 |
{
|
| 38 |
"cell_type": "code",
|
|
|
|
| 39 |
"metadata": {},
|
|
|
|
| 40 |
"source": [
|
| 41 |
"# ✅ SOLUTION\n",
|
| 42 |
"\n",
|
| 43 |
"def relu(x: torch.Tensor) -> torch.Tensor:\n",
|
| 44 |
" return x * (x > 0).float()"
|
| 45 |
-
]
|
| 46 |
-
"outputs": [],
|
| 47 |
-
"execution_count": null
|
| 48 |
},
|
| 49 |
{
|
| 50 |
"cell_type": "code",
|
|
|
|
| 51 |
"metadata": {},
|
|
|
|
| 52 |
"source": [
|
| 53 |
"# Verify\n",
|
| 54 |
"x = torch.tensor([-2., -1., 0., 1., 2.])\n",
|
| 55 |
"print(\"Input: \", x)\n",
|
| 56 |
"print(\"Output:\", relu(x))"
|
| 57 |
-
]
|
| 58 |
-
"outputs": [],
|
| 59 |
-
"execution_count": null
|
| 60 |
},
|
| 61 |
{
|
| 62 |
"cell_type": "code",
|
|
|
|
| 63 |
"metadata": {},
|
|
|
|
| 64 |
"source": [
|
| 65 |
"# Run judge\n",
|
| 66 |
"from torch_judge import check\n",
|
| 67 |
"check(\"relu\")"
|
| 68 |
-
]
|
| 69 |
-
"outputs": [],
|
| 70 |
-
"execution_count": null
|
| 71 |
}
|
| 72 |
-
]
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "markdown",
|
| 5 |
+
"id": "0556419b",
|
| 6 |
"metadata": {},
|
| 7 |
"source": [
|
| 8 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb)\n",
|
| 9 |
+
"\n",
|
| 10 |
"# 🟢 Solution: Implement ReLU\n",
|
| 11 |
"\n",
|
| 12 |
"Reference solution for the ReLU activation function.\n",
|
| 13 |
"\n",
|
| 14 |
"$$\\text{ReLU}(x) = \\max(0, x)$$"
|
| 15 |
+
]
|
|
|
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"cell_type": "code",
|
| 19 |
+
"execution_count": null,
|
| 20 |
"metadata": {},
|
| 21 |
+
"outputs": [],
|
| 22 |
"source": [
|
| 23 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 24 |
+
"try:\n",
|
| 25 |
+
" import google.colab\n",
|
| 26 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 27 |
+
"except ImportError:\n",
|
| 28 |
+
" pass\n"
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"cell_type": "code",
|
| 33 |
+
"execution_count": null,
|
| 34 |
+
"metadata": {},
|
| 35 |
"outputs": [],
|
| 36 |
+
"source": [
|
| 37 |
+
"import torch"
|
| 38 |
+
]
|
| 39 |
},
|
| 40 |
{
|
| 41 |
"cell_type": "code",
|
| 42 |
+
"execution_count": null,
|
| 43 |
"metadata": {},
|
| 44 |
+
"outputs": [],
|
| 45 |
"source": [
|
| 46 |
"# ✅ SOLUTION\n",
|
| 47 |
"\n",
|
| 48 |
"def relu(x: torch.Tensor) -> torch.Tensor:\n",
|
| 49 |
" return x * (x > 0).float()"
|
| 50 |
+
]
|
|
|
|
|
|
|
| 51 |
},
|
| 52 |
{
|
| 53 |
"cell_type": "code",
|
| 54 |
+
"execution_count": null,
|
| 55 |
"metadata": {},
|
| 56 |
+
"outputs": [],
|
| 57 |
"source": [
|
| 58 |
"# Verify\n",
|
| 59 |
"x = torch.tensor([-2., -1., 0., 1., 2.])\n",
|
| 60 |
"print(\"Input: \", x)\n",
|
| 61 |
"print(\"Output:\", relu(x))"
|
| 62 |
+
]
|
|
|
|
|
|
|
| 63 |
},
|
| 64 |
{
|
| 65 |
"cell_type": "code",
|
| 66 |
+
"execution_count": null,
|
| 67 |
"metadata": {},
|
| 68 |
+
"outputs": [],
|
| 69 |
"source": [
|
| 70 |
"# Run judge\n",
|
| 71 |
"from torch_judge import check\n",
|
| 72 |
"check(\"relu\")"
|
| 73 |
+
]
|
|
|
|
|
|
|
| 74 |
}
|
| 75 |
+
],
|
| 76 |
+
"metadata": {
|
| 77 |
+
"kernelspec": {
|
| 78 |
+
"display_name": "Python 3",
|
| 79 |
+
"language": "python",
|
| 80 |
+
"name": "python3"
|
| 81 |
+
},
|
| 82 |
+
"language_info": {
|
| 83 |
+
"name": "python",
|
| 84 |
+
"version": "3.11.0"
|
| 85 |
+
}
|
| 86 |
+
},
|
| 87 |
+
"nbformat": 4,
|
| 88 |
+
"nbformat_minor": 5
|
| 89 |
+
}
|
solutions/02_softmax_solution.ipynb
CHANGED
|
@@ -17,6 +17,7 @@
|
|
| 17 |
"cell_type": "markdown",
|
| 18 |
"metadata": {},
|
| 19 |
"source": [
|
|
|
|
| 20 |
"# 🟢 Solution: Implement Softmax\n",
|
| 21 |
"\n",
|
| 22 |
"Reference solution for the numerically-stable Softmax function.\n",
|
|
@@ -25,6 +26,20 @@
|
|
| 25 |
],
|
| 26 |
"outputs": []
|
| 27 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
@@ -73,4 +88,4 @@
|
|
| 73 |
"execution_count": null
|
| 74 |
}
|
| 75 |
]
|
| 76 |
-
}
|
|
|
|
| 17 |
"cell_type": "markdown",
|
| 18 |
"metadata": {},
|
| 19 |
"source": [
|
| 20 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/02_softmax_solution.ipynb)\n\n",
|
| 21 |
"# 🟢 Solution: Implement Softmax\n",
|
| 22 |
"\n",
|
| 23 |
"Reference solution for the numerically-stable Softmax function.\n",
|
|
|
|
| 26 |
],
|
| 27 |
"outputs": []
|
| 28 |
},
|
| 29 |
+
{
|
| 30 |
+
"cell_type": "code",
|
| 31 |
+
"metadata": {},
|
| 32 |
+
"source": [
|
| 33 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 34 |
+
"try:\n",
|
| 35 |
+
" import google.colab\n",
|
| 36 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 37 |
+
"except ImportError:\n",
|
| 38 |
+
" pass\n"
|
| 39 |
+
],
|
| 40 |
+
"outputs": [],
|
| 41 |
+
"execution_count": null
|
| 42 |
+
},
|
| 43 |
{
|
| 44 |
"cell_type": "code",
|
| 45 |
"metadata": {},
|
|
|
|
| 88 |
"execution_count": null
|
| 89 |
}
|
| 90 |
]
|
| 91 |
+
}
|
solutions/03_linear_solution.ipynb
CHANGED
|
@@ -17,12 +17,27 @@
|
|
| 17 |
"cell_type": "markdown",
|
| 18 |
"metadata": {},
|
| 19 |
"source": [
|
|
|
|
| 20 |
"# 🟡 Solution: Simple Linear Layer\n",
|
| 21 |
"\n",
|
| 22 |
"Reference solution for a fully-connected linear layer: **y = xW^T + b**"
|
| 23 |
],
|
| 24 |
"outputs": []
|
| 25 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
{
|
| 27 |
"cell_type": "code",
|
| 28 |
"metadata": {},
|
|
@@ -77,4 +92,4 @@
|
|
| 77 |
"execution_count": null
|
| 78 |
}
|
| 79 |
]
|
| 80 |
-
}
|
|
|
|
| 17 |
"cell_type": "markdown",
|
| 18 |
"metadata": {},
|
| 19 |
"source": [
|
| 20 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/03_linear_solution.ipynb)\n\n",
|
| 21 |
"# 🟡 Solution: Simple Linear Layer\n",
|
| 22 |
"\n",
|
| 23 |
"Reference solution for a fully-connected linear layer: **y = xW^T + b**"
|
| 24 |
],
|
| 25 |
"outputs": []
|
| 26 |
},
|
| 27 |
+
{
|
| 28 |
+
"cell_type": "code",
|
| 29 |
+
"metadata": {},
|
| 30 |
+
"source": [
|
| 31 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 32 |
+
"try:\n",
|
| 33 |
+
" import google.colab\n",
|
| 34 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 35 |
+
"except ImportError:\n",
|
| 36 |
+
" pass\n"
|
| 37 |
+
],
|
| 38 |
+
"outputs": [],
|
| 39 |
+
"execution_count": null
|
| 40 |
+
},
|
| 41 |
{
|
| 42 |
"cell_type": "code",
|
| 43 |
"metadata": {},
|
|
|
|
| 92 |
"execution_count": null
|
| 93 |
}
|
| 94 |
]
|
| 95 |
+
}
|
solutions/04_layernorm_solution.ipynb
CHANGED
|
@@ -17,6 +17,7 @@
|
|
| 17 |
"cell_type": "markdown",
|
| 18 |
"metadata": {},
|
| 19 |
"source": [
|
|
|
|
| 20 |
"# 🟡 Solution: Implement LayerNorm\n",
|
| 21 |
"\n",
|
| 22 |
"Reference solution for Layer Normalization.\n",
|
|
@@ -25,6 +26,20 @@
|
|
| 25 |
],
|
| 26 |
"outputs": []
|
| 27 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
@@ -76,4 +91,4 @@
|
|
| 76 |
"execution_count": null
|
| 77 |
}
|
| 78 |
]
|
| 79 |
-
}
|
|
|
|
| 17 |
"cell_type": "markdown",
|
| 18 |
"metadata": {},
|
| 19 |
"source": [
|
| 20 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/04_layernorm_solution.ipynb)\n\n",
|
| 21 |
"# 🟡 Solution: Implement LayerNorm\n",
|
| 22 |
"\n",
|
| 23 |
"Reference solution for Layer Normalization.\n",
|
|
|
|
| 26 |
],
|
| 27 |
"outputs": []
|
| 28 |
},
|
| 29 |
+
{
|
| 30 |
+
"cell_type": "code",
|
| 31 |
+
"metadata": {},
|
| 32 |
+
"source": [
|
| 33 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 34 |
+
"try:\n",
|
| 35 |
+
" import google.colab\n",
|
| 36 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 37 |
+
"except ImportError:\n",
|
| 38 |
+
" pass\n"
|
| 39 |
+
],
|
| 40 |
+
"outputs": [],
|
| 41 |
+
"execution_count": null
|
| 42 |
+
},
|
| 43 |
{
|
| 44 |
"cell_type": "code",
|
| 45 |
"metadata": {},
|
|
|
|
| 91 |
"execution_count": null
|
| 92 |
}
|
| 93 |
]
|
| 94 |
+
}
|
solutions/05_attention_solution.ipynb
CHANGED
|
@@ -5,6 +5,7 @@
|
|
| 5 |
"id": "5f63d076",
|
| 6 |
"metadata": {},
|
| 7 |
"source": [
|
|
|
|
| 8 |
"# 🔴 Solution: Softmax Attention\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution for the core Transformer attention mechanism.\n",
|
|
@@ -12,6 +13,21 @@
|
|
| 12 |
"$$\\text{Attention}(Q, K, V) = \\text{softmax}\\!\\left(\\frac{QK^T}{\\sqrt{d_k}}\\right)V$$"
|
| 13 |
]
|
| 14 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
{
|
| 16 |
"cell_type": "code",
|
| 17 |
"execution_count": null,
|
|
|
|
| 5 |
"id": "5f63d076",
|
| 6 |
"metadata": {},
|
| 7 |
"source": [
|
| 8 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/05_attention_solution.ipynb)\n\n",
|
| 9 |
"# 🔴 Solution: Softmax Attention\n",
|
| 10 |
"\n",
|
| 11 |
"Reference solution for the core Transformer attention mechanism.\n",
|
|
|
|
| 13 |
"$$\\text{Attention}(Q, K, V) = \\text{softmax}\\!\\left(\\frac{QK^T}{\\sqrt{d_k}}\\right)V$$"
|
| 14 |
]
|
| 15 |
},
|
| 16 |
+
{
|
| 17 |
+
"cell_type": "code",
|
| 18 |
+
"execution_count": null,
|
| 19 |
+
"id": "ce663fb0",
|
| 20 |
+
"metadata": {},
|
| 21 |
+
"outputs": [],
|
| 22 |
+
"source": [
|
| 23 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 24 |
+
"try:\n",
|
| 25 |
+
" import google.colab\n",
|
| 26 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 27 |
+
"except ImportError:\n",
|
| 28 |
+
" pass\n"
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
{
|
| 32 |
"cell_type": "code",
|
| 33 |
"execution_count": null,
|
solutions/06_multihead_attention_solution.ipynb
CHANGED
|
@@ -1,105 +1,120 @@
|
|
| 1 |
{
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
{
|
| 15 |
-
"cell_type": "code",
|
| 16 |
-
"execution_count": null,
|
| 17 |
-
"metadata": {},
|
| 18 |
-
"outputs": [],
|
| 19 |
-
"source": [
|
| 20 |
-
"import torch\n",
|
| 21 |
-
"import torch.nn as nn\n",
|
| 22 |
-
"import math"
|
| 23 |
-
]
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"cell_type": "code",
|
| 27 |
-
"execution_count": null,
|
| 28 |
-
"id": "46b73737",
|
| 29 |
-
"metadata": {},
|
| 30 |
-
"outputs": [],
|
| 31 |
-
"source": [
|
| 32 |
-
"# ✅ SOLUTION\n",
|
| 33 |
-
"\n",
|
| 34 |
-
"class MultiHeadAttention:\n",
|
| 35 |
-
" def __init__(self, d_model: int, num_heads: int):\n",
|
| 36 |
-
" self.num_heads = num_heads\n",
|
| 37 |
-
" self.d_k = d_model // num_heads\n",
|
| 38 |
-
"\n",
|
| 39 |
-
" self.W_q = nn.Linear(d_model, d_model)\n",
|
| 40 |
-
" self.W_k = nn.Linear(d_model, d_model)\n",
|
| 41 |
-
" self.W_v = nn.Linear(d_model, d_model)\n",
|
| 42 |
-
" self.W_o = nn.Linear(d_model, d_model)\n",
|
| 43 |
-
"\n",
|
| 44 |
-
" def forward(self, Q, K, V):\n",
|
| 45 |
-
" B, S_q, _ = Q.shape\n",
|
| 46 |
-
" S_k = K.shape[1]\n",
|
| 47 |
-
"\n",
|
| 48 |
-
" q = self.W_q(Q).view(B, S_q, self.num_heads, self.d_k).transpose(1, 2)\n",
|
| 49 |
-
" k = self.W_k(K).view(B, S_k, self.num_heads, self.d_k).transpose(1, 2)\n",
|
| 50 |
-
" v = self.W_v(V).view(B, S_k, self.num_heads, self.d_k).transpose(1, 2)\n",
|
| 51 |
-
"\n",
|
| 52 |
-
" scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)\n",
|
| 53 |
-
" weights = torch.softmax(scores, dim=-1)\n",
|
| 54 |
-
" attn = torch.matmul(weights, v)\n",
|
| 55 |
-
"\n",
|
| 56 |
-
" out = attn.transpose(1, 2).contiguous().view(B, S_q, -1)\n",
|
| 57 |
-
" return self.W_o(out)"
|
| 58 |
-
]
|
| 59 |
-
},
|
| 60 |
-
{
|
| 61 |
-
"cell_type": "code",
|
| 62 |
-
"execution_count": null,
|
| 63 |
-
"metadata": {},
|
| 64 |
-
"outputs": [],
|
| 65 |
-
"source": [
|
| 66 |
-
"# Verify\n",
|
| 67 |
-
"torch.manual_seed(0)\n",
|
| 68 |
-
"mha = MultiHeadAttention(d_model=32, num_heads=4)\n",
|
| 69 |
-
"x = torch.randn(2, 6, 32)\n",
|
| 70 |
-
"out = mha.forward(x, x, x)\n",
|
| 71 |
-
"print(\"Self-attn shape:\", out.shape)\n",
|
| 72 |
-
"\n",
|
| 73 |
-
"Q = torch.randn(1, 3, 32)\n",
|
| 74 |
-
"K = torch.randn(1, 7, 32)\n",
|
| 75 |
-
"V = torch.randn(1, 7, 32)\n",
|
| 76 |
-
"out2 = mha.forward(Q, K, V)\n",
|
| 77 |
-
"print(\"Cross-attn shape:\", out2.shape)"
|
| 78 |
-
]
|
| 79 |
-
},
|
| 80 |
-
{
|
| 81 |
-
"cell_type": "code",
|
| 82 |
-
"execution_count": null,
|
| 83 |
-
"metadata": {},
|
| 84 |
-
"outputs": [],
|
| 85 |
-
"source": [
|
| 86 |
-
"# Run judge\n",
|
| 87 |
-
"from torch_judge import check\n",
|
| 88 |
-
"check(\"mha\")"
|
| 89 |
-
]
|
| 90 |
-
}
|
| 91 |
-
],
|
| 92 |
-
"metadata": {
|
| 93 |
-
"kernelspec": {
|
| 94 |
-
"display_name": "Python 3",
|
| 95 |
-
"language": "python",
|
| 96 |
-
"name": "python3"
|
| 97 |
-
},
|
| 98 |
-
"language_info": {
|
| 99 |
-
"name": "python",
|
| 100 |
-
"version": "3.11.0"
|
| 101 |
-
}
|
| 102 |
},
|
| 103 |
-
|
| 104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/06_multihead_attention_solution.ipynb)\n\n",
|
| 8 |
+
"# 🔴 Solution: Multi-Head Attention\n",
|
| 9 |
+
"\n",
|
| 10 |
+
"Reference solution for the Multi-Head Attention mechanism.\n",
|
| 11 |
+
"\n",
|
| 12 |
+
"$$\\text{MultiHead}(Q, K, V) = \\text{Concat}(\\text{head}_1, \\dots, \\text{head}_h) W^O$$"
|
| 13 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
},
|
| 15 |
+
{
|
| 16 |
+
"cell_type": "code",
|
| 17 |
+
"metadata": {},
|
| 18 |
+
"source": [
|
| 19 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 20 |
+
"try:\n",
|
| 21 |
+
" import google.colab\n",
|
| 22 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 23 |
+
"except ImportError:\n",
|
| 24 |
+
" pass\n"
|
| 25 |
+
],
|
| 26 |
+
"outputs": [],
|
| 27 |
+
"execution_count": null
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"cell_type": "code",
|
| 31 |
+
"execution_count": null,
|
| 32 |
+
"metadata": {},
|
| 33 |
+
"outputs": [],
|
| 34 |
+
"source": [
|
| 35 |
+
"import torch\n",
|
| 36 |
+
"import torch.nn as nn\n",
|
| 37 |
+
"import math"
|
| 38 |
+
]
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"cell_type": "code",
|
| 42 |
+
"execution_count": null,
|
| 43 |
+
"id": "46b73737",
|
| 44 |
+
"metadata": {},
|
| 45 |
+
"outputs": [],
|
| 46 |
+
"source": [
|
| 47 |
+
"# ✅ SOLUTION\n",
|
| 48 |
+
"\n",
|
| 49 |
+
"class MultiHeadAttention:\n",
|
| 50 |
+
" def __init__(self, d_model: int, num_heads: int):\n",
|
| 51 |
+
" self.num_heads = num_heads\n",
|
| 52 |
+
" self.d_k = d_model // num_heads\n",
|
| 53 |
+
"\n",
|
| 54 |
+
" self.W_q = nn.Linear(d_model, d_model)\n",
|
| 55 |
+
" self.W_k = nn.Linear(d_model, d_model)\n",
|
| 56 |
+
" self.W_v = nn.Linear(d_model, d_model)\n",
|
| 57 |
+
" self.W_o = nn.Linear(d_model, d_model)\n",
|
| 58 |
+
"\n",
|
| 59 |
+
" def forward(self, Q, K, V):\n",
|
| 60 |
+
" B, S_q, _ = Q.shape\n",
|
| 61 |
+
" S_k = K.shape[1]\n",
|
| 62 |
+
"\n",
|
| 63 |
+
" q = self.W_q(Q).view(B, S_q, self.num_heads, self.d_k).transpose(1, 2)\n",
|
| 64 |
+
" k = self.W_k(K).view(B, S_k, self.num_heads, self.d_k).transpose(1, 2)\n",
|
| 65 |
+
" v = self.W_v(V).view(B, S_k, self.num_heads, self.d_k).transpose(1, 2)\n",
|
| 66 |
+
"\n",
|
| 67 |
+
" scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)\n",
|
| 68 |
+
" weights = torch.softmax(scores, dim=-1)\n",
|
| 69 |
+
" attn = torch.matmul(weights, v)\n",
|
| 70 |
+
"\n",
|
| 71 |
+
" out = attn.transpose(1, 2).contiguous().view(B, S_q, -1)\n",
|
| 72 |
+
" return self.W_o(out)"
|
| 73 |
+
]
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"cell_type": "code",
|
| 77 |
+
"execution_count": null,
|
| 78 |
+
"metadata": {},
|
| 79 |
+
"outputs": [],
|
| 80 |
+
"source": [
|
| 81 |
+
"# Verify\n",
|
| 82 |
+
"torch.manual_seed(0)\n",
|
| 83 |
+
"mha = MultiHeadAttention(d_model=32, num_heads=4)\n",
|
| 84 |
+
"x = torch.randn(2, 6, 32)\n",
|
| 85 |
+
"out = mha.forward(x, x, x)\n",
|
| 86 |
+
"print(\"Self-attn shape:\", out.shape)\n",
|
| 87 |
+
"\n",
|
| 88 |
+
"Q = torch.randn(1, 3, 32)\n",
|
| 89 |
+
"K = torch.randn(1, 7, 32)\n",
|
| 90 |
+
"V = torch.randn(1, 7, 32)\n",
|
| 91 |
+
"out2 = mha.forward(Q, K, V)\n",
|
| 92 |
+
"print(\"Cross-attn shape:\", out2.shape)"
|
| 93 |
+
]
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"cell_type": "code",
|
| 97 |
+
"execution_count": null,
|
| 98 |
+
"metadata": {},
|
| 99 |
+
"outputs": [],
|
| 100 |
+
"source": [
|
| 101 |
+
"# Run judge\n",
|
| 102 |
+
"from torch_judge import check\n",
|
| 103 |
+
"check(\"mha\")"
|
| 104 |
+
]
|
| 105 |
+
}
|
| 106 |
+
],
|
| 107 |
+
"metadata": {
|
| 108 |
+
"kernelspec": {
|
| 109 |
+
"display_name": "Python 3",
|
| 110 |
+
"language": "python",
|
| 111 |
+
"name": "python3"
|
| 112 |
+
},
|
| 113 |
+
"language_info": {
|
| 114 |
+
"name": "python",
|
| 115 |
+
"version": "3.11.0"
|
| 116 |
+
}
|
| 117 |
+
},
|
| 118 |
+
"nbformat": 4,
|
| 119 |
+
"nbformat_minor": 5
|
| 120 |
}
|
solutions/07_batchnorm_solution.ipynb
CHANGED
|
@@ -1,117 +1,132 @@
|
|
| 1 |
{
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
{
|
| 14 |
-
"cell_type": "code",
|
| 15 |
-
"execution_count": null,
|
| 16 |
-
"metadata": {},
|
| 17 |
-
"outputs": [],
|
| 18 |
-
"source": [
|
| 19 |
-
"import torch"
|
| 20 |
-
]
|
| 21 |
-
},
|
| 22 |
-
{
|
| 23 |
-
"cell_type": "code",
|
| 24 |
-
"execution_count": null,
|
| 25 |
-
"id": "70488b9f",
|
| 26 |
-
"metadata": {},
|
| 27 |
-
"outputs": [],
|
| 28 |
-
"source": [
|
| 29 |
-
"# ✅ SOLUTION\n",
|
| 30 |
-
"\n",
|
| 31 |
-
"import torch\n",
|
| 32 |
-
"\n",
|
| 33 |
-
"def my_batch_norm(\n",
|
| 34 |
-
" x,\n",
|
| 35 |
-
" gamma,\n",
|
| 36 |
-
" beta,\n",
|
| 37 |
-
" running_mean,\n",
|
| 38 |
-
" running_var,\n",
|
| 39 |
-
" eps=1e-5,\n",
|
| 40 |
-
" momentum=0.1,\n",
|
| 41 |
-
" training=True,\n",
|
| 42 |
-
"):\n",
|
| 43 |
-
" \"\"\"BatchNorm with train/eval behavior and running stats.\n",
|
| 44 |
-
"\n",
|
| 45 |
-
" - Training: use batch stats, update running_mean / running_var in-place.\n",
|
| 46 |
-
" - Inference: use running_mean / running_var as-is.\n",
|
| 47 |
-
" \"\"\"\n",
|
| 48 |
-
" if training:\n",
|
| 49 |
-
" batch_mean = x.mean(dim=0)\n",
|
| 50 |
-
" batch_var = x.var(dim=0, unbiased=False)\n",
|
| 51 |
-
"\n",
|
| 52 |
-
" # Update running statistics in-place. Detach to avoid tracking gradients.\n",
|
| 53 |
-
" running_mean.mul_(1 - momentum).add_(momentum * batch_mean.detach())\n",
|
| 54 |
-
" running_var.mul_(1 - momentum).add_(momentum * batch_var.detach())\n",
|
| 55 |
-
"\n",
|
| 56 |
-
" mean = batch_mean\n",
|
| 57 |
-
" var = batch_var\n",
|
| 58 |
-
" else:\n",
|
| 59 |
-
" mean = running_mean\n",
|
| 60 |
-
" var = running_var\n",
|
| 61 |
-
"\n",
|
| 62 |
-
" x_norm = (x - mean) / torch.sqrt(var + eps)\n",
|
| 63 |
-
" return gamma * x_norm + beta"
|
| 64 |
-
]
|
| 65 |
-
},
|
| 66 |
-
{
|
| 67 |
-
"cell_type": "code",
|
| 68 |
-
"execution_count": null,
|
| 69 |
-
"id": "dbd7bb4e",
|
| 70 |
-
"metadata": {},
|
| 71 |
-
"outputs": [],
|
| 72 |
-
"source": [
|
| 73 |
-
"# Verify\n",
|
| 74 |
-
"x = torch.randn(8, 4)\n",
|
| 75 |
-
"gamma = torch.ones(4)\n",
|
| 76 |
-
"beta = torch.zeros(4)\n",
|
| 77 |
-
"\n",
|
| 78 |
-
"running_mean = torch.zeros(4)\n",
|
| 79 |
-
"running_var = torch.ones(4)\n",
|
| 80 |
-
"\n",
|
| 81 |
-
"# Training behavior: normalize with batch stats and update running stats\n",
|
| 82 |
-
"out_train = my_batch_norm(x, gamma, beta, running_mean, running_var, training=True)\n",
|
| 83 |
-
"print(\"[Train] Column means:\", out_train.mean(dim=0))\n",
|
| 84 |
-
"print(\"[Train] Column stds: \", out_train.std(dim=0))\n",
|
| 85 |
-
"print(\"Updated running_mean:\", running_mean)\n",
|
| 86 |
-
"print(\"Updated running_var:\", running_var)\n",
|
| 87 |
-
"\n",
|
| 88 |
-
"# Inference behavior: use running_mean / running_var only\n",
|
| 89 |
-
"out_eval = my_batch_norm(x, gamma, beta, running_mean, running_var, training=False)\n",
|
| 90 |
-
"print(\"[Eval] Column means (using running stats):\", out_eval.mean(dim=0))"
|
| 91 |
-
]
|
| 92 |
-
},
|
| 93 |
-
{
|
| 94 |
-
"cell_type": "code",
|
| 95 |
-
"execution_count": null,
|
| 96 |
-
"metadata": {},
|
| 97 |
-
"outputs": [],
|
| 98 |
-
"source": [
|
| 99 |
-
"from torch_judge import check\n",
|
| 100 |
-
"check('batchnorm')"
|
| 101 |
-
]
|
| 102 |
-
}
|
| 103 |
-
],
|
| 104 |
-
"metadata": {
|
| 105 |
-
"kernelspec": {
|
| 106 |
-
"display_name": "Python 3",
|
| 107 |
-
"language": "python",
|
| 108 |
-
"name": "python3"
|
| 109 |
-
},
|
| 110 |
-
"language_info": {
|
| 111 |
-
"name": "python",
|
| 112 |
-
"version": "3.11.0"
|
| 113 |
-
}
|
| 114 |
},
|
| 115 |
-
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"id": "ffd42526",
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"source": [
|
| 8 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/07_batchnorm_solution.ipynb)\n\n",
|
| 9 |
+
"# 🟡 Solution: Implement BatchNorm\n",
|
| 10 |
+
"\n",
|
| 11 |
+
"Reference solution for Batch Normalization with both **training** and **inference** behavior, including running mean/variance updates."
|
| 12 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"cell_type": "code",
|
| 30 |
+
"execution_count": null,
|
| 31 |
+
"metadata": {},
|
| 32 |
+
"outputs": [],
|
| 33 |
+
"source": [
|
| 34 |
+
"import torch"
|
| 35 |
+
]
|
| 36 |
+
},
|
| 37 |
+
{
|
| 38 |
+
"cell_type": "code",
|
| 39 |
+
"execution_count": null,
|
| 40 |
+
"id": "70488b9f",
|
| 41 |
+
"metadata": {},
|
| 42 |
+
"outputs": [],
|
| 43 |
+
"source": [
|
| 44 |
+
"# ✅ SOLUTION\n",
|
| 45 |
+
"\n",
|
| 46 |
+
"import torch\n",
|
| 47 |
+
"\n",
|
| 48 |
+
"def my_batch_norm(\n",
|
| 49 |
+
" x,\n",
|
| 50 |
+
" gamma,\n",
|
| 51 |
+
" beta,\n",
|
| 52 |
+
" running_mean,\n",
|
| 53 |
+
" running_var,\n",
|
| 54 |
+
" eps=1e-5,\n",
|
| 55 |
+
" momentum=0.1,\n",
|
| 56 |
+
" training=True,\n",
|
| 57 |
+
"):\n",
|
| 58 |
+
" \"\"\"BatchNorm with train/eval behavior and running stats.\n",
|
| 59 |
+
"\n",
|
| 60 |
+
" - Training: use batch stats, update running_mean / running_var in-place.\n",
|
| 61 |
+
" - Inference: use running_mean / running_var as-is.\n",
|
| 62 |
+
" \"\"\"\n",
|
| 63 |
+
" if training:\n",
|
| 64 |
+
" batch_mean = x.mean(dim=0)\n",
|
| 65 |
+
" batch_var = x.var(dim=0, unbiased=False)\n",
|
| 66 |
+
"\n",
|
| 67 |
+
" # Update running statistics in-place. Detach to avoid tracking gradients.\n",
|
| 68 |
+
" running_mean.mul_(1 - momentum).add_(momentum * batch_mean.detach())\n",
|
| 69 |
+
" running_var.mul_(1 - momentum).add_(momentum * batch_var.detach())\n",
|
| 70 |
+
"\n",
|
| 71 |
+
" mean = batch_mean\n",
|
| 72 |
+
" var = batch_var\n",
|
| 73 |
+
" else:\n",
|
| 74 |
+
" mean = running_mean\n",
|
| 75 |
+
" var = running_var\n",
|
| 76 |
+
"\n",
|
| 77 |
+
" x_norm = (x - mean) / torch.sqrt(var + eps)\n",
|
| 78 |
+
" return gamma * x_norm + beta"
|
| 79 |
+
]
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"cell_type": "code",
|
| 83 |
+
"execution_count": null,
|
| 84 |
+
"id": "dbd7bb4e",
|
| 85 |
+
"metadata": {},
|
| 86 |
+
"outputs": [],
|
| 87 |
+
"source": [
|
| 88 |
+
"# Verify\n",
|
| 89 |
+
"x = torch.randn(8, 4)\n",
|
| 90 |
+
"gamma = torch.ones(4)\n",
|
| 91 |
+
"beta = torch.zeros(4)\n",
|
| 92 |
+
"\n",
|
| 93 |
+
"running_mean = torch.zeros(4)\n",
|
| 94 |
+
"running_var = torch.ones(4)\n",
|
| 95 |
+
"\n",
|
| 96 |
+
"# Training behavior: normalize with batch stats and update running stats\n",
|
| 97 |
+
"out_train = my_batch_norm(x, gamma, beta, running_mean, running_var, training=True)\n",
|
| 98 |
+
"print(\"[Train] Column means:\", out_train.mean(dim=0))\n",
|
| 99 |
+
"print(\"[Train] Column stds: \", out_train.std(dim=0))\n",
|
| 100 |
+
"print(\"Updated running_mean:\", running_mean)\n",
|
| 101 |
+
"print(\"Updated running_var:\", running_var)\n",
|
| 102 |
+
"\n",
|
| 103 |
+
"# Inference behavior: use running_mean / running_var only\n",
|
| 104 |
+
"out_eval = my_batch_norm(x, gamma, beta, running_mean, running_var, training=False)\n",
|
| 105 |
+
"print(\"[Eval] Column means (using running stats):\", out_eval.mean(dim=0))"
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"cell_type": "code",
|
| 110 |
+
"execution_count": null,
|
| 111 |
+
"metadata": {},
|
| 112 |
+
"outputs": [],
|
| 113 |
+
"source": [
|
| 114 |
+
"from torch_judge import check\n",
|
| 115 |
+
"check('batchnorm')"
|
| 116 |
+
]
|
| 117 |
+
}
|
| 118 |
+
],
|
| 119 |
+
"metadata": {
|
| 120 |
+
"kernelspec": {
|
| 121 |
+
"display_name": "Python 3",
|
| 122 |
+
"language": "python",
|
| 123 |
+
"name": "python3"
|
| 124 |
+
},
|
| 125 |
+
"language_info": {
|
| 126 |
+
"name": "python",
|
| 127 |
+
"version": "3.11.0"
|
| 128 |
+
}
|
| 129 |
+
},
|
| 130 |
+
"nbformat": 4,
|
| 131 |
+
"nbformat_minor": 5
|
| 132 |
}
|
solutions/08_rmsnorm_solution.ipynb
CHANGED
|
@@ -17,12 +17,27 @@
|
|
| 17 |
"cell_type": "markdown",
|
| 18 |
"metadata": {},
|
| 19 |
"source": [
|
|
|
|
| 20 |
"# 🟡 Solution: Implement RMSNorm\n",
|
| 21 |
"\n",
|
| 22 |
"Reference solution for Root Mean Square Normalization."
|
| 23 |
],
|
| 24 |
"outputs": []
|
| 25 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
{
|
| 27 |
"cell_type": "code",
|
| 28 |
"metadata": {},
|
|
@@ -67,4 +82,4 @@
|
|
| 67 |
"execution_count": null
|
| 68 |
}
|
| 69 |
]
|
| 70 |
-
}
|
|
|
|
| 17 |
"cell_type": "markdown",
|
| 18 |
"metadata": {},
|
| 19 |
"source": [
|
| 20 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/08_rmsnorm_solution.ipynb)\n\n",
|
| 21 |
"# 🟡 Solution: Implement RMSNorm\n",
|
| 22 |
"\n",
|
| 23 |
"Reference solution for Root Mean Square Normalization."
|
| 24 |
],
|
| 25 |
"outputs": []
|
| 26 |
},
|
| 27 |
+
{
|
| 28 |
+
"cell_type": "code",
|
| 29 |
+
"metadata": {},
|
| 30 |
+
"source": [
|
| 31 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 32 |
+
"try:\n",
|
| 33 |
+
" import google.colab\n",
|
| 34 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 35 |
+
"except ImportError:\n",
|
| 36 |
+
" pass\n"
|
| 37 |
+
],
|
| 38 |
+
"outputs": [],
|
| 39 |
+
"execution_count": null
|
| 40 |
+
},
|
| 41 |
{
|
| 42 |
"cell_type": "code",
|
| 43 |
"metadata": {},
|
|
|
|
| 82 |
"execution_count": null
|
| 83 |
}
|
| 84 |
]
|
| 85 |
+
}
|
solutions/09_causal_attention_solution.ipynb
CHANGED
|
@@ -17,12 +17,27 @@
|
|
| 17 |
"cell_type": "markdown",
|
| 18 |
"metadata": {},
|
| 19 |
"source": [
|
|
|
|
| 20 |
"# 🔴 Solution: Causal Self-Attention\n",
|
| 21 |
"\n",
|
| 22 |
"Reference solution — softmax attention with an upper-triangular mask."
|
| 23 |
],
|
| 24 |
"outputs": []
|
| 25 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
{
|
| 27 |
"cell_type": "code",
|
| 28 |
"metadata": {},
|
|
@@ -77,4 +92,4 @@
|
|
| 77 |
"execution_count": null
|
| 78 |
}
|
| 79 |
]
|
| 80 |
-
}
|
|
|
|
| 17 |
"cell_type": "markdown",
|
| 18 |
"metadata": {},
|
| 19 |
"source": [
|
| 20 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/09_causal_attention_solution.ipynb)\n\n",
|
| 21 |
"# 🔴 Solution: Causal Self-Attention\n",
|
| 22 |
"\n",
|
| 23 |
"Reference solution — softmax attention with an upper-triangular mask."
|
| 24 |
],
|
| 25 |
"outputs": []
|
| 26 |
},
|
| 27 |
+
{
|
| 28 |
+
"cell_type": "code",
|
| 29 |
+
"metadata": {},
|
| 30 |
+
"source": [
|
| 31 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 32 |
+
"try:\n",
|
| 33 |
+
" import google.colab\n",
|
| 34 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 35 |
+
"except ImportError:\n",
|
| 36 |
+
" pass\n"
|
| 37 |
+
],
|
| 38 |
+
"outputs": [],
|
| 39 |
+
"execution_count": null
|
| 40 |
+
},
|
| 41 |
{
|
| 42 |
"cell_type": "code",
|
| 43 |
"metadata": {},
|
|
|
|
| 92 |
"execution_count": null
|
| 93 |
}
|
| 94 |
]
|
| 95 |
+
}
|
solutions/10_gqa_solution.ipynb
CHANGED
|
@@ -17,12 +17,27 @@
|
|
| 17 |
"cell_type": "markdown",
|
| 18 |
"metadata": {},
|
| 19 |
"source": [
|
|
|
|
| 20 |
"# 🔴 Solution: Grouped Query Attention\n",
|
| 21 |
"\n",
|
| 22 |
"Reference solution for GQA — MHA with shared KV heads."
|
| 23 |
],
|
| 24 |
"outputs": []
|
| 25 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
{
|
| 27 |
"cell_type": "code",
|
| 28 |
"metadata": {},
|
|
@@ -88,4 +103,4 @@
|
|
| 88 |
"execution_count": null
|
| 89 |
}
|
| 90 |
]
|
| 91 |
-
}
|
|
|
|
| 17 |
"cell_type": "markdown",
|
| 18 |
"metadata": {},
|
| 19 |
"source": [
|
| 20 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/10_gqa_solution.ipynb)\n\n",
|
| 21 |
"# 🔴 Solution: Grouped Query Attention\n",
|
| 22 |
"\n",
|
| 23 |
"Reference solution for GQA — MHA with shared KV heads."
|
| 24 |
],
|
| 25 |
"outputs": []
|
| 26 |
},
|
| 27 |
+
{
|
| 28 |
+
"cell_type": "code",
|
| 29 |
+
"metadata": {},
|
| 30 |
+
"source": [
|
| 31 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 32 |
+
"try:\n",
|
| 33 |
+
" import google.colab\n",
|
| 34 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 35 |
+
"except ImportError:\n",
|
| 36 |
+
" pass\n"
|
| 37 |
+
],
|
| 38 |
+
"outputs": [],
|
| 39 |
+
"execution_count": null
|
| 40 |
+
},
|
| 41 |
{
|
| 42 |
"cell_type": "code",
|
| 43 |
"metadata": {},
|
|
|
|
| 103 |
"execution_count": null
|
| 104 |
}
|
| 105 |
]
|
| 106 |
+
}
|
solutions/11_sliding_window_solution.ipynb
CHANGED
|
@@ -17,12 +17,27 @@
|
|
| 17 |
"cell_type": "markdown",
|
| 18 |
"metadata": {},
|
| 19 |
"source": [
|
|
|
|
| 20 |
"# 🔴 Solution: Sliding Window Attention\n",
|
| 21 |
"\n",
|
| 22 |
"Reference solution — softmax attention with a band mask."
|
| 23 |
],
|
| 24 |
"outputs": []
|
| 25 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
{
|
| 27 |
"cell_type": "code",
|
| 28 |
"metadata": {},
|
|
@@ -73,4 +88,4 @@
|
|
| 73 |
"execution_count": null
|
| 74 |
}
|
| 75 |
]
|
| 76 |
-
}
|
|
|
|
| 17 |
"cell_type": "markdown",
|
| 18 |
"metadata": {},
|
| 19 |
"source": [
|
| 20 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/11_sliding_window_solution.ipynb)\n\n",
|
| 21 |
"# 🔴 Solution: Sliding Window Attention\n",
|
| 22 |
"\n",
|
| 23 |
"Reference solution — softmax attention with a band mask."
|
| 24 |
],
|
| 25 |
"outputs": []
|
| 26 |
},
|
| 27 |
+
{
|
| 28 |
+
"cell_type": "code",
|
| 29 |
+
"metadata": {},
|
| 30 |
+
"source": [
|
| 31 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 32 |
+
"try:\n",
|
| 33 |
+
" import google.colab\n",
|
| 34 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 35 |
+
"except ImportError:\n",
|
| 36 |
+
" pass\n"
|
| 37 |
+
],
|
| 38 |
+
"outputs": [],
|
| 39 |
+
"execution_count": null
|
| 40 |
+
},
|
| 41 |
{
|
| 42 |
"cell_type": "code",
|
| 43 |
"metadata": {},
|
|
|
|
| 88 |
"execution_count": null
|
| 89 |
}
|
| 90 |
]
|
| 91 |
+
}
|
solutions/12_linear_attention_solution.ipynb
CHANGED
|
@@ -17,12 +17,27 @@
|
|
| 17 |
"cell_type": "markdown",
|
| 18 |
"metadata": {},
|
| 19 |
"source": [
|
|
|
|
| 20 |
"# 🔴 Solution: Linear Self-Attention\n",
|
| 21 |
"\n",
|
| 22 |
"Reference solution — kernel-based attention with elu+1 feature map."
|
| 23 |
],
|
| 24 |
"outputs": []
|
| 25 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
{
|
| 27 |
"cell_type": "code",
|
| 28 |
"metadata": {},
|
|
@@ -72,4 +87,4 @@
|
|
| 72 |
"execution_count": null
|
| 73 |
}
|
| 74 |
]
|
| 75 |
-
}
|
|
|
|
| 17 |
"cell_type": "markdown",
|
| 18 |
"metadata": {},
|
| 19 |
"source": [
|
| 20 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/12_linear_attention_solution.ipynb)\n\n",
|
| 21 |
"# 🔴 Solution: Linear Self-Attention\n",
|
| 22 |
"\n",
|
| 23 |
"Reference solution — kernel-based attention with elu+1 feature map."
|
| 24 |
],
|
| 25 |
"outputs": []
|
| 26 |
},
|
| 27 |
+
{
|
| 28 |
+
"cell_type": "code",
|
| 29 |
+
"metadata": {},
|
| 30 |
+
"source": [
|
| 31 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 32 |
+
"try:\n",
|
| 33 |
+
" import google.colab\n",
|
| 34 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 35 |
+
"except ImportError:\n",
|
| 36 |
+
" pass\n"
|
| 37 |
+
],
|
| 38 |
+
"outputs": [],
|
| 39 |
+
"execution_count": null
|
| 40 |
+
},
|
| 41 |
{
|
| 42 |
"cell_type": "code",
|
| 43 |
"metadata": {},
|
|
|
|
| 87 |
"execution_count": null
|
| 88 |
}
|
| 89 |
]
|
| 90 |
+
}
|
solutions/13_gpt2_block_solution.ipynb
CHANGED
|
@@ -17,12 +17,27 @@
|
|
| 17 |
"cell_type": "markdown",
|
| 18 |
"metadata": {},
|
| 19 |
"source": [
|
|
|
|
| 20 |
"# 🔴 Solution: GPT-2 Transformer Block\n",
|
| 21 |
"\n",
|
| 22 |
"Reference solution — pre-norm, causal self-attention, 4x MLP with GELU."
|
| 23 |
],
|
| 24 |
"outputs": []
|
| 25 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
{
|
| 27 |
"cell_type": "code",
|
| 28 |
"metadata": {},
|
|
@@ -102,4 +117,4 @@
|
|
| 102 |
"execution_count": null
|
| 103 |
}
|
| 104 |
]
|
| 105 |
-
}
|
|
|
|
| 17 |
"cell_type": "markdown",
|
| 18 |
"metadata": {},
|
| 19 |
"source": [
|
| 20 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/13_gpt2_block_solution.ipynb)\n\n",
|
| 21 |
"# 🔴 Solution: GPT-2 Transformer Block\n",
|
| 22 |
"\n",
|
| 23 |
"Reference solution — pre-norm, causal self-attention, 4x MLP with GELU."
|
| 24 |
],
|
| 25 |
"outputs": []
|
| 26 |
},
|
| 27 |
+
{
|
| 28 |
+
"cell_type": "code",
|
| 29 |
+
"metadata": {},
|
| 30 |
+
"source": [
|
| 31 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 32 |
+
"try:\n",
|
| 33 |
+
" import google.colab\n",
|
| 34 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 35 |
+
"except ImportError:\n",
|
| 36 |
+
" pass\n"
|
| 37 |
+
],
|
| 38 |
+
"outputs": [],
|
| 39 |
+
"execution_count": null
|
| 40 |
+
},
|
| 41 |
{
|
| 42 |
"cell_type": "code",
|
| 43 |
"metadata": {},
|
|
|
|
| 117 |
"execution_count": null
|
| 118 |
}
|
| 119 |
]
|
| 120 |
+
}
|
solutions/14_kv_cache_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
-
"
|
|
|
|
| 8 |
"\n",
|
| 9 |
-
"Reference solution
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -26,7 +41,7 @@
|
|
| 26 |
"metadata": {},
|
| 27 |
"outputs": [],
|
| 28 |
"source": [
|
| 29 |
-
"#
|
| 30 |
"\n",
|
| 31 |
"class KVCacheAttention(nn.Module):\n",
|
| 32 |
" def __init__(self, d_model, num_heads):\n",
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/14_kv_cache_solution.ipynb)\n\n",
|
| 8 |
+
"# 🔴 Solution: KV Cache Attention\n",
|
| 9 |
"\n",
|
| 10 |
+
"Reference solution — multi-head attention with KV caching for autoregressive inference."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 41 |
"metadata": {},
|
| 42 |
"outputs": [],
|
| 43 |
"source": [
|
| 44 |
+
"# ✅ SOLUTION\n",
|
| 45 |
"\n",
|
| 46 |
"class KVCacheAttention(nn.Module):\n",
|
| 47 |
" def __init__(self, d_model, num_heads):\n",
|
solutions/15_mlp_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
-
"
|
|
|
|
| 8 |
"\n",
|
| 9 |
-
"Reference solution
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -26,7 +41,7 @@
|
|
| 26 |
"metadata": {},
|
| 27 |
"outputs": [],
|
| 28 |
"source": [
|
| 29 |
-
"#
|
| 30 |
"\n",
|
| 31 |
"class SwiGLUMLP(nn.Module):\n",
|
| 32 |
" def __init__(self, d_model, d_ff):\n",
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/15_mlp_solution.ipynb)\n\n",
|
| 8 |
+
"# 🟠 Solution: SwiGLU MLP\n",
|
| 9 |
"\n",
|
| 10 |
+
"Reference solution — gated feed-forward network used in LLaMA, Mistral, and PaLM."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 41 |
"metadata": {},
|
| 42 |
"outputs": [],
|
| 43 |
"source": [
|
| 44 |
+
"# ✅ SOLUTION\n",
|
| 45 |
"\n",
|
| 46 |
"class SwiGLUMLP(nn.Module):\n",
|
| 47 |
" def __init__(self, d_model, d_ff):\n",
|
solutions/16_cross_entropy_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: Cross-Entropy Loss\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -69,4 +84,4 @@
|
|
| 69 |
},
|
| 70 |
"nbformat": 4,
|
| 71 |
"nbformat_minor": 4
|
| 72 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/16_cross_entropy_solution.ipynb)\n\n",
|
| 8 |
"# Solution: Cross-Entropy Loss\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 84 |
},
|
| 85 |
"nbformat": 4,
|
| 86 |
"nbformat_minor": 4
|
| 87 |
+
}
|
solutions/17_dropout_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: Implement Dropout\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -79,4 +94,4 @@
|
|
| 79 |
},
|
| 80 |
"nbformat": 4,
|
| 81 |
"nbformat_minor": 4
|
| 82 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/17_dropout_solution.ipynb)\n\n",
|
| 8 |
"# Solution: Implement Dropout\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 94 |
},
|
| 95 |
"nbformat": 4,
|
| 96 |
"nbformat_minor": 4
|
| 97 |
+
}
|
solutions/18_embedding_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: Embedding Layer\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -74,4 +89,4 @@
|
|
| 74 |
},
|
| 75 |
"nbformat": 4,
|
| 76 |
"nbformat_minor": 4
|
| 77 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/18_embedding_solution.ipynb)\n\n",
|
| 8 |
"# Solution: Embedding Layer\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 89 |
},
|
| 90 |
"nbformat": 4,
|
| 91 |
"nbformat_minor": 4
|
| 92 |
+
}
|
solutions/19_gelu_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: GELU Activation\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -68,4 +83,4 @@
|
|
| 68 |
},
|
| 69 |
"nbformat": 4,
|
| 70 |
"nbformat_minor": 4
|
| 71 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/19_gelu_solution.ipynb)\n\n",
|
| 8 |
"# Solution: GELU Activation\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 83 |
},
|
| 84 |
"nbformat": 4,
|
| 85 |
"nbformat_minor": 4
|
| 86 |
+
}
|
solutions/20_weight_init_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: Kaiming Initialization\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -74,4 +89,4 @@
|
|
| 74 |
},
|
| 75 |
"nbformat": 4,
|
| 76 |
"nbformat_minor": 4
|
| 77 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/20_weight_init_solution.ipynb)\n\n",
|
| 8 |
"# Solution: Kaiming Initialization\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 89 |
},
|
| 90 |
"nbformat": 4,
|
| 91 |
"nbformat_minor": 4
|
| 92 |
+
}
|
solutions/21_gradient_clipping_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: Gradient Norm Clipping\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -76,4 +91,4 @@
|
|
| 76 |
},
|
| 77 |
"nbformat": 4,
|
| 78 |
"nbformat_minor": 4
|
| 79 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/21_gradient_clipping_solution.ipynb)\n\n",
|
| 8 |
"# Solution: Gradient Norm Clipping\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 91 |
},
|
| 92 |
"nbformat": 4,
|
| 93 |
"nbformat_minor": 4
|
| 94 |
+
}
|
solutions/22_conv2d_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: 2D Convolution\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -79,4 +94,4 @@
|
|
| 79 |
},
|
| 80 |
"nbformat": 4,
|
| 81 |
"nbformat_minor": 4
|
| 82 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/22_conv2d_solution.ipynb)\n\n",
|
| 8 |
"# Solution: 2D Convolution\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 94 |
},
|
| 95 |
"nbformat": 4,
|
| 96 |
"nbformat_minor": 4
|
| 97 |
+
}
|
solutions/23_cross_attention_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: Multi-Head Cross-Attention\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -88,4 +103,4 @@
|
|
| 88 |
},
|
| 89 |
"nbformat": 4,
|
| 90 |
"nbformat_minor": 4
|
| 91 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/23_cross_attention_solution.ipynb)\n\n",
|
| 8 |
"# Solution: Multi-Head Cross-Attention\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 103 |
},
|
| 104 |
"nbformat": 4,
|
| 105 |
"nbformat_minor": 4
|
| 106 |
+
}
|
solutions/24_rope_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: Rotary Position Embedding (RoPE)\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -83,4 +98,4 @@
|
|
| 83 |
},
|
| 84 |
"nbformat": 4,
|
| 85 |
"nbformat_minor": 4
|
| 86 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/24_rope_solution.ipynb)\n\n",
|
| 8 |
"# Solution: Rotary Position Embedding (RoPE)\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 98 |
},
|
| 99 |
"nbformat": 4,
|
| 100 |
"nbformat_minor": 4
|
| 101 |
+
}
|
solutions/25_flash_attention_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: Flash Attention (Tiled)\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -92,4 +107,4 @@
|
|
| 92 |
},
|
| 93 |
"nbformat": 4,
|
| 94 |
"nbformat_minor": 4
|
| 95 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/25_flash_attention_solution.ipynb)\n\n",
|
| 8 |
"# Solution: Flash Attention (Tiled)\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 107 |
},
|
| 108 |
"nbformat": 4,
|
| 109 |
"nbformat_minor": 4
|
| 110 |
+
}
|
solutions/26_lora_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: LoRA (Low-Rank Adaptation)\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -81,4 +96,4 @@
|
|
| 81 |
},
|
| 82 |
"nbformat": 4,
|
| 83 |
"nbformat_minor": 4
|
| 84 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/26_lora_solution.ipynb)\n\n",
|
| 8 |
"# Solution: LoRA (Low-Rank Adaptation)\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 96 |
},
|
| 97 |
"nbformat": 4,
|
| 98 |
"nbformat_minor": 4
|
| 99 |
+
}
|
solutions/27_vit_patch_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: ViT Patch Embedding\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -81,4 +96,4 @@
|
|
| 81 |
},
|
| 82 |
"nbformat": 4,
|
| 83 |
"nbformat_minor": 4
|
| 84 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/27_vit_patch_solution.ipynb)\n\n",
|
| 8 |
"# Solution: ViT Patch Embedding\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 96 |
},
|
| 97 |
"nbformat": 4,
|
| 98 |
"nbformat_minor": 4
|
| 99 |
+
}
|
solutions/28_moe_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: Mixture of Experts (MoE)\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -94,4 +109,4 @@
|
|
| 94 |
},
|
| 95 |
"nbformat": 4,
|
| 96 |
"nbformat_minor": 4
|
| 97 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/28_moe_solution.ipynb)\n\n",
|
| 8 |
"# Solution: Mixture of Experts (MoE)\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 109 |
},
|
| 110 |
"nbformat": 4,
|
| 111 |
"nbformat_minor": 4
|
| 112 |
+
}
|
solutions/29_adam_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: Adam Optimizer\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -97,4 +112,4 @@
|
|
| 97 |
},
|
| 98 |
"nbformat": 4,
|
| 99 |
"nbformat_minor": 4
|
| 100 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/29_adam_solution.ipynb)\n\n",
|
| 8 |
"# Solution: Adam Optimizer\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 112 |
},
|
| 113 |
"nbformat": 4,
|
| 114 |
"nbformat_minor": 4
|
| 115 |
+
}
|
solutions/30_cosine_lr_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: Cosine LR Scheduler with Warmup\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -71,4 +86,4 @@
|
|
| 71 |
},
|
| 72 |
"nbformat": 4,
|
| 73 |
"nbformat_minor": 4
|
| 74 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/30_cosine_lr_solution.ipynb)\n\n",
|
| 8 |
"# Solution: Cosine LR Scheduler with Warmup\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 86 |
},
|
| 87 |
"nbformat": 4,
|
| 88 |
"nbformat_minor": 4
|
| 89 |
+
}
|
solutions/31_gradient_accumulation_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: Gradient Accumulation\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -78,4 +93,4 @@
|
|
| 78 |
},
|
| 79 |
"nbformat": 4,
|
| 80 |
"nbformat_minor": 4
|
| 81 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/31_gradient_accumulation_solution.ipynb)\n\n",
|
| 8 |
"# Solution: Gradient Accumulation\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 93 |
},
|
| 94 |
"nbformat": 4,
|
| 95 |
"nbformat_minor": 4
|
| 96 |
+
}
|
solutions/32_topk_sampling_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: Top-k / Top-p Sampling\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -79,4 +94,4 @@
|
|
| 79 |
},
|
| 80 |
"nbformat": 4,
|
| 81 |
"nbformat_minor": 4
|
| 82 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/32_topk_sampling_solution.ipynb)\n\n",
|
| 8 |
"# Solution: Top-k / Top-p Sampling\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 94 |
},
|
| 95 |
"nbformat": 4,
|
| 96 |
"nbformat_minor": 4
|
| 97 |
+
}
|
solutions/33_beam_search_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: Beam Search Decoding\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -88,4 +103,4 @@
|
|
| 88 |
},
|
| 89 |
"nbformat": 4,
|
| 90 |
"nbformat_minor": 4
|
| 91 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/33_beam_search_solution.ipynb)\n\n",
|
| 8 |
"# Solution: Beam Search Decoding\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 103 |
},
|
| 104 |
"nbformat": 4,
|
| 105 |
"nbformat_minor": 4
|
| 106 |
+
}
|
solutions/34_speculative_decoding_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: Speculative Decoding\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -84,4 +99,4 @@
|
|
| 84 |
},
|
| 85 |
"nbformat": 4,
|
| 86 |
"nbformat_minor": 4
|
| 87 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/34_speculative_decoding_solution.ipynb)\n\n",
|
| 8 |
"# Solution: Speculative Decoding\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 99 |
},
|
| 100 |
"nbformat": 4,
|
| 101 |
"nbformat_minor": 4
|
| 102 |
+
}
|
solutions/35_bpe_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: Byte-Pair Encoding (BPE)\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -113,4 +128,4 @@
|
|
| 113 |
},
|
| 114 |
"nbformat": 4,
|
| 115 |
"nbformat_minor": 4
|
| 116 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/35_bpe_solution.ipynb)\n\n",
|
| 8 |
"# Solution: Byte-Pair Encoding (BPE)\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 128 |
},
|
| 129 |
"nbformat": 4,
|
| 130 |
"nbformat_minor": 4
|
| 131 |
+
}
|
solutions/36_int8_quantization_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: INT8 Quantized Linear\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -83,4 +98,4 @@
|
|
| 83 |
},
|
| 84 |
"nbformat": 4,
|
| 85 |
"nbformat_minor": 4
|
| 86 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/36_int8_quantization_solution.ipynb)\n\n",
|
| 8 |
"# Solution: INT8 Quantized Linear\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 98 |
},
|
| 99 |
"nbformat": 4,
|
| 100 |
"nbformat_minor": 4
|
| 101 |
+
}
|
solutions/37_dpo_loss_solution.ipynb
CHANGED
|
@@ -4,12 +4,27 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: DPO (Direct Preference Optimization) Loss\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
],
|
| 11 |
"outputs": []
|
| 12 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
{
|
| 14 |
"cell_type": "code",
|
| 15 |
"metadata": {},
|
|
@@ -73,4 +88,4 @@
|
|
| 73 |
},
|
| 74 |
"nbformat": 4,
|
| 75 |
"nbformat_minor": 4
|
| 76 |
-
}
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/37_dpo_loss_solution.ipynb)\n\n",
|
| 8 |
"# Solution: DPO (Direct Preference Optimization) Loss\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
],
|
| 12 |
"outputs": []
|
| 13 |
},
|
| 14 |
+
{
|
| 15 |
+
"cell_type": "code",
|
| 16 |
+
"metadata": {},
|
| 17 |
+
"source": [
|
| 18 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 19 |
+
"try:\n",
|
| 20 |
+
" import google.colab\n",
|
| 21 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 22 |
+
"except ImportError:\n",
|
| 23 |
+
" pass\n"
|
| 24 |
+
],
|
| 25 |
+
"outputs": [],
|
| 26 |
+
"execution_count": null
|
| 27 |
+
},
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"metadata": {},
|
|
|
|
| 88 |
},
|
| 89 |
"nbformat": 4,
|
| 90 |
"nbformat_minor": 4
|
| 91 |
+
}
|
solutions/38_grpo_loss_solution.ipynb
CHANGED
|
@@ -4,11 +4,26 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: GRPO (Group Relative Policy Optimization) Loss\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution."
|
| 10 |
]
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
{
|
| 13 |
"cell_type": "code",
|
| 14 |
"execution_count": null,
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/38_grpo_loss_solution.ipynb)\n\n",
|
| 8 |
"# Solution: GRPO (Group Relative Policy Optimization) Loss\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution."
|
| 11 |
]
|
| 12 |
},
|
| 13 |
+
{
|
| 14 |
+
"cell_type": "code",
|
| 15 |
+
"metadata": {},
|
| 16 |
+
"source": [
|
| 17 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 18 |
+
"try:\n",
|
| 19 |
+
" import google.colab\n",
|
| 20 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 21 |
+
"except ImportError:\n",
|
| 22 |
+
" pass\n"
|
| 23 |
+
],
|
| 24 |
+
"outputs": [],
|
| 25 |
+
"execution_count": null
|
| 26 |
+
},
|
| 27 |
{
|
| 28 |
"cell_type": "code",
|
| 29 |
"execution_count": null,
|
solutions/39_ppo_loss_solution.ipynb
CHANGED
|
@@ -4,11 +4,26 @@
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
|
|
|
| 7 |
"# Solution: PPO Clipped Loss\n",
|
| 8 |
"\n",
|
| 9 |
"Reference solution for the PPO clipped surrogate loss task.\n"
|
| 10 |
]
|
| 11 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
{
|
| 13 |
"cell_type": "code",
|
| 14 |
"execution_count": null,
|
|
@@ -89,4 +104,3 @@
|
|
| 89 |
"nbformat": 4,
|
| 90 |
"nbformat_minor": 5
|
| 91 |
}
|
| 92 |
-
|
|
|
|
| 4 |
"cell_type": "markdown",
|
| 5 |
"metadata": {},
|
| 6 |
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/39_ppo_loss_solution.ipynb)\n\n",
|
| 8 |
"# Solution: PPO Clipped Loss\n",
|
| 9 |
"\n",
|
| 10 |
"Reference solution for the PPO clipped surrogate loss task.\n"
|
| 11 |
]
|
| 12 |
},
|
| 13 |
+
{
|
| 14 |
+
"cell_type": "code",
|
| 15 |
+
"metadata": {},
|
| 16 |
+
"source": [
|
| 17 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 18 |
+
"try:\n",
|
| 19 |
+
" import google.colab\n",
|
| 20 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 21 |
+
"except ImportError:\n",
|
| 22 |
+
" pass\n"
|
| 23 |
+
],
|
| 24 |
+
"outputs": [],
|
| 25 |
+
"execution_count": null
|
| 26 |
+
},
|
| 27 |
{
|
| 28 |
"cell_type": "code",
|
| 29 |
"execution_count": null,
|
|
|
|
| 104 |
"nbformat": 4,
|
| 105 |
"nbformat_minor": 5
|
| 106 |
}
|
|
|
solutions/40_linear_regression_solution.ipynb
CHANGED
|
@@ -1,125 +1,140 @@
|
|
| 1 |
{
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
{
|
| 13 |
-
"cell_type": "code",
|
| 14 |
-
"metadata": {},
|
| 15 |
-
"outputs": [],
|
| 16 |
-
"source": [
|
| 17 |
-
"import torch\n",
|
| 18 |
-
"import torch.nn as nn"
|
| 19 |
-
],
|
| 20 |
-
"execution_count": null
|
| 21 |
-
},
|
| 22 |
-
{
|
| 23 |
-
"cell_type": "code",
|
| 24 |
-
"metadata": {},
|
| 25 |
-
"outputs": [],
|
| 26 |
-
"source": [
|
| 27 |
-
"# ✅ SOLUTION\n",
|
| 28 |
-
"\n",
|
| 29 |
-
"class LinearRegression:\n",
|
| 30 |
-
" def closed_form(self, X: torch.Tensor, y: torch.Tensor):\n",
|
| 31 |
-
" \"\"\"Normal equation via augmented matrix.\"\"\"\n",
|
| 32 |
-
" N, D = X.shape\n",
|
| 33 |
-
" # Augment X with ones column for bias\n",
|
| 34 |
-
" X_aug = torch.cat([X, torch.ones(N, 1)], dim=1) # (N, D+1)\n",
|
| 35 |
-
" # Solve (X^T X) theta = X^T y\n",
|
| 36 |
-
" theta = torch.linalg.lstsq(X_aug, y).solution # (D+1,)\n",
|
| 37 |
-
" w = theta[:D]\n",
|
| 38 |
-
" b = theta[D]\n",
|
| 39 |
-
" return w.detach(), b.detach()\n",
|
| 40 |
-
"\n",
|
| 41 |
-
" def gradient_descent(self, X: torch.Tensor, y: torch.Tensor,\n",
|
| 42 |
-
" lr: float = 0.01, steps: int = 1000):\n",
|
| 43 |
-
" \"\"\"Manual gradient computation — no autograd.\"\"\"\n",
|
| 44 |
-
" N, D = X.shape\n",
|
| 45 |
-
" w = torch.zeros(D)\n",
|
| 46 |
-
" b = torch.tensor(0.0)\n",
|
| 47 |
-
"\n",
|
| 48 |
-
" for _ in range(steps):\n",
|
| 49 |
-
" pred = X @ w + b # (N,)\n",
|
| 50 |
-
" error = pred - y # (N,)\n",
|
| 51 |
-
" grad_w = (2.0 / N) * (X.T @ error) # (D,)\n",
|
| 52 |
-
" grad_b = (2.0 / N) * error.sum() # scalar\n",
|
| 53 |
-
" w = w - lr * grad_w\n",
|
| 54 |
-
" b = b - lr * grad_b\n",
|
| 55 |
-
"\n",
|
| 56 |
-
" return w, b\n",
|
| 57 |
-
"\n",
|
| 58 |
-
" def nn_linear(self, X: torch.Tensor, y: torch.Tensor,\n",
|
| 59 |
-
" lr: float = 0.01, steps: int = 1000):\n",
|
| 60 |
-
" \"\"\"PyTorch nn.Linear with autograd training loop.\"\"\"\n",
|
| 61 |
-
" N, D = X.shape\n",
|
| 62 |
-
" layer = nn.Linear(D, 1)\n",
|
| 63 |
-
" optimizer = torch.optim.SGD(layer.parameters(), lr=lr)\n",
|
| 64 |
-
" loss_fn = nn.MSELoss()\n",
|
| 65 |
-
"\n",
|
| 66 |
-
" for _ in range(steps):\n",
|
| 67 |
-
" optimizer.zero_grad()\n",
|
| 68 |
-
" pred = layer(X).squeeze(-1) # (N,)\n",
|
| 69 |
-
" loss = loss_fn(pred, y)\n",
|
| 70 |
-
" loss.backward()\n",
|
| 71 |
-
" optimizer.step()\n",
|
| 72 |
-
"\n",
|
| 73 |
-
" w = layer.weight.data.squeeze(0) # (D,)\n",
|
| 74 |
-
" b = layer.bias.data.squeeze(0) # scalar ()\n",
|
| 75 |
-
" return w, b"
|
| 76 |
-
],
|
| 77 |
-
"execution_count": null
|
| 78 |
-
},
|
| 79 |
-
{
|
| 80 |
-
"cell_type": "code",
|
| 81 |
-
"metadata": {},
|
| 82 |
-
"outputs": [],
|
| 83 |
-
"source": [
|
| 84 |
-
"# Verify\n",
|
| 85 |
-
"torch.manual_seed(42)\n",
|
| 86 |
-
"X = torch.randn(100, 3)\n",
|
| 87 |
-
"true_w = torch.tensor([2.0, -1.0, 0.5])\n",
|
| 88 |
-
"y = X @ true_w + 3.0\n",
|
| 89 |
-
"\n",
|
| 90 |
-
"model = LinearRegression()\n",
|
| 91 |
-
"for name, method in [(\"Closed-form\", model.closed_form),\n",
|
| 92 |
-
" (\"Grad Descent\", lambda X, y: model.gradient_descent(X, y, lr=0.05, steps=2000)),\n",
|
| 93 |
-
" (\"nn.Linear\", lambda X, y: model.nn_linear(X, y, lr=0.05, steps=2000))]:\n",
|
| 94 |
-
" w, b = method(X, y)\n",
|
| 95 |
-
" print(f\"{name:13s} w={w.tolist()} b={b.item():.4f}\")\n",
|
| 96 |
-
"print(f\"{'True':13s} w={true_w.tolist()} b=3.0000\")"
|
| 97 |
-
],
|
| 98 |
-
"execution_count": null
|
| 99 |
-
},
|
| 100 |
-
{
|
| 101 |
-
"cell_type": "code",
|
| 102 |
-
"metadata": {},
|
| 103 |
-
"outputs": [],
|
| 104 |
-
"source": [
|
| 105 |
-
"# ✅ SUBMIT\n",
|
| 106 |
-
"from torch_judge import check\n",
|
| 107 |
-
"check(\"linear_regression\")"
|
| 108 |
-
],
|
| 109 |
-
"execution_count": null
|
| 110 |
-
}
|
| 111 |
-
],
|
| 112 |
-
"metadata": {
|
| 113 |
-
"kernelspec": {
|
| 114 |
-
"display_name": "Python 3",
|
| 115 |
-
"language": "python",
|
| 116 |
-
"name": "python3"
|
| 117 |
-
},
|
| 118 |
-
"language_info": {
|
| 119 |
-
"name": "python",
|
| 120 |
-
"version": "3.11.0"
|
| 121 |
-
}
|
| 122 |
},
|
| 123 |
-
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/40_linear_regression_solution.ipynb)\n\n",
|
| 8 |
+
"# 🟡 Solution: Linear Regression\n",
|
| 9 |
+
"\n",
|
| 10 |
+
"Reference solution demonstrating closed-form, gradient descent, and nn.Linear approaches."
|
| 11 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
},
|
| 13 |
+
{
|
| 14 |
+
"cell_type": "code",
|
| 15 |
+
"metadata": {},
|
| 16 |
+
"source": [
|
| 17 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 18 |
+
"try:\n",
|
| 19 |
+
" import google.colab\n",
|
| 20 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 21 |
+
"except ImportError:\n",
|
| 22 |
+
" pass\n"
|
| 23 |
+
],
|
| 24 |
+
"outputs": [],
|
| 25 |
+
"execution_count": null
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"cell_type": "code",
|
| 29 |
+
"metadata": {},
|
| 30 |
+
"outputs": [],
|
| 31 |
+
"source": [
|
| 32 |
+
"import torch\n",
|
| 33 |
+
"import torch.nn as nn"
|
| 34 |
+
],
|
| 35 |
+
"execution_count": null
|
| 36 |
+
},
|
| 37 |
+
{
|
| 38 |
+
"cell_type": "code",
|
| 39 |
+
"metadata": {},
|
| 40 |
+
"outputs": [],
|
| 41 |
+
"source": [
|
| 42 |
+
"# ✅ SOLUTION\n",
|
| 43 |
+
"\n",
|
| 44 |
+
"class LinearRegression:\n",
|
| 45 |
+
" def closed_form(self, X: torch.Tensor, y: torch.Tensor):\n",
|
| 46 |
+
" \"\"\"Normal equation via augmented matrix.\"\"\"\n",
|
| 47 |
+
" N, D = X.shape\n",
|
| 48 |
+
" # Augment X with ones column for bias\n",
|
| 49 |
+
" X_aug = torch.cat([X, torch.ones(N, 1)], dim=1) # (N, D+1)\n",
|
| 50 |
+
" # Solve (X^T X) theta = X^T y\n",
|
| 51 |
+
" theta = torch.linalg.lstsq(X_aug, y).solution # (D+1,)\n",
|
| 52 |
+
" w = theta[:D]\n",
|
| 53 |
+
" b = theta[D]\n",
|
| 54 |
+
" return w.detach(), b.detach()\n",
|
| 55 |
+
"\n",
|
| 56 |
+
" def gradient_descent(self, X: torch.Tensor, y: torch.Tensor,\n",
|
| 57 |
+
" lr: float = 0.01, steps: int = 1000):\n",
|
| 58 |
+
" \"\"\"Manual gradient computation — no autograd.\"\"\"\n",
|
| 59 |
+
" N, D = X.shape\n",
|
| 60 |
+
" w = torch.zeros(D)\n",
|
| 61 |
+
" b = torch.tensor(0.0)\n",
|
| 62 |
+
"\n",
|
| 63 |
+
" for _ in range(steps):\n",
|
| 64 |
+
" pred = X @ w + b # (N,)\n",
|
| 65 |
+
" error = pred - y # (N,)\n",
|
| 66 |
+
" grad_w = (2.0 / N) * (X.T @ error) # (D,)\n",
|
| 67 |
+
" grad_b = (2.0 / N) * error.sum() # scalar\n",
|
| 68 |
+
" w = w - lr * grad_w\n",
|
| 69 |
+
" b = b - lr * grad_b\n",
|
| 70 |
+
"\n",
|
| 71 |
+
" return w, b\n",
|
| 72 |
+
"\n",
|
| 73 |
+
" def nn_linear(self, X: torch.Tensor, y: torch.Tensor,\n",
|
| 74 |
+
" lr: float = 0.01, steps: int = 1000):\n",
|
| 75 |
+
" \"\"\"PyTorch nn.Linear with autograd training loop.\"\"\"\n",
|
| 76 |
+
" N, D = X.shape\n",
|
| 77 |
+
" layer = nn.Linear(D, 1)\n",
|
| 78 |
+
" optimizer = torch.optim.SGD(layer.parameters(), lr=lr)\n",
|
| 79 |
+
" loss_fn = nn.MSELoss()\n",
|
| 80 |
+
"\n",
|
| 81 |
+
" for _ in range(steps):\n",
|
| 82 |
+
" optimizer.zero_grad()\n",
|
| 83 |
+
" pred = layer(X).squeeze(-1) # (N,)\n",
|
| 84 |
+
" loss = loss_fn(pred, y)\n",
|
| 85 |
+
" loss.backward()\n",
|
| 86 |
+
" optimizer.step()\n",
|
| 87 |
+
"\n",
|
| 88 |
+
" w = layer.weight.data.squeeze(0) # (D,)\n",
|
| 89 |
+
" b = layer.bias.data.squeeze(0) # scalar ()\n",
|
| 90 |
+
" return w, b"
|
| 91 |
+
],
|
| 92 |
+
"execution_count": null
|
| 93 |
+
},
|
| 94 |
+
{
|
| 95 |
+
"cell_type": "code",
|
| 96 |
+
"metadata": {},
|
| 97 |
+
"outputs": [],
|
| 98 |
+
"source": [
|
| 99 |
+
"# Verify\n",
|
| 100 |
+
"torch.manual_seed(42)\n",
|
| 101 |
+
"X = torch.randn(100, 3)\n",
|
| 102 |
+
"true_w = torch.tensor([2.0, -1.0, 0.5])\n",
|
| 103 |
+
"y = X @ true_w + 3.0\n",
|
| 104 |
+
"\n",
|
| 105 |
+
"model = LinearRegression()\n",
|
| 106 |
+
"for name, method in [(\"Closed-form\", model.closed_form),\n",
|
| 107 |
+
" (\"Grad Descent\", lambda X, y: model.gradient_descent(X, y, lr=0.05, steps=2000)),\n",
|
| 108 |
+
" (\"nn.Linear\", lambda X, y: model.nn_linear(X, y, lr=0.05, steps=2000))]:\n",
|
| 109 |
+
" w, b = method(X, y)\n",
|
| 110 |
+
" print(f\"{name:13s} w={w.tolist()} b={b.item():.4f}\")\n",
|
| 111 |
+
"print(f\"{'True':13s} w={true_w.tolist()} b=3.0000\")"
|
| 112 |
+
],
|
| 113 |
+
"execution_count": null
|
| 114 |
+
},
|
| 115 |
+
{
|
| 116 |
+
"cell_type": "code",
|
| 117 |
+
"metadata": {},
|
| 118 |
+
"outputs": [],
|
| 119 |
+
"source": [
|
| 120 |
+
"# ✅ SUBMIT\n",
|
| 121 |
+
"from torch_judge import check\n",
|
| 122 |
+
"check(\"linear_regression\")"
|
| 123 |
+
],
|
| 124 |
+
"execution_count": null
|
| 125 |
+
}
|
| 126 |
+
],
|
| 127 |
+
"metadata": {
|
| 128 |
+
"kernelspec": {
|
| 129 |
+
"display_name": "Python 3",
|
| 130 |
+
"language": "python",
|
| 131 |
+
"name": "python3"
|
| 132 |
+
},
|
| 133 |
+
"language_info": {
|
| 134 |
+
"name": "python",
|
| 135 |
+
"version": "3.11.0"
|
| 136 |
+
}
|
| 137 |
+
},
|
| 138 |
+
"nbformat": 4,
|
| 139 |
+
"nbformat_minor": 4
|
| 140 |
}
|
templates/00_welcome.ipynb
CHANGED
|
@@ -31,24 +31,120 @@
|
|
| 31 |
"\n",
|
| 32 |
"> 💡 Every notebook also has a **Colab** toolbar button and an **Open in Colab** badge — use them to run problems in Google Colab with zero setup.\n",
|
| 33 |
"\n",
|
| 34 |
-
"## Quick Start"
|
|
|
|
|
|
|
| 35 |
]
|
| 36 |
},
|
| 37 |
{
|
| 38 |
"cell_type": "code",
|
|
|
|
| 39 |
"metadata": {},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
"source": [
|
| 41 |
"from torch_judge import status\n",
|
| 42 |
"status()"
|
| 43 |
-
]
|
| 44 |
-
"execution_count": null,
|
| 45 |
-
"outputs": []
|
| 46 |
},
|
| 47 |
{
|
| 48 |
"cell_type": "markdown",
|
| 49 |
"metadata": {},
|
| 50 |
"source": [
|
| 51 |
-
"## Problem List (40 problems)\n\n### 🧱 Fundamentals — \"Implement X from scratch\"\n\n| # | Problem | Difficulty | Template | Solution |\n|:---:|---------|:----------:|:--------:|:--------:|\n| 1 | ReLU | 🟢 Easy | [Open](01_relu.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/01_relu.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/01_relu.ipynb\" target=\"_blank\">Colab</a> | [Open](01_relu_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 2 | Softmax | 🟢 Easy | [Open](02_softmax.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/02_softmax.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/02_softmax.ipynb\" target=\"_blank\">Colab</a> | [Open](02_softmax_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/02_softmax_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/02_softmax_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 16 | Cross-Entropy Loss | 🟢 Easy | [Open](16_cross_entropy.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/16_cross_entropy.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/16_cross_entropy.ipynb\" target=\"_blank\">Colab</a> | [Open](16_cross_entropy_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/16_cross_entropy_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/16_cross_entropy_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 17 | Dropout | 🟢 Easy | [Open](17_dropout.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/17_dropout.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/17_dropout.ipynb\" target=\"_blank\">Colab</a> | [Open](17_dropout_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/17_dropout_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/17_dropout_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 18 | Embedding | 🟢 Easy | [Open](18_embedding.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/18_embedding.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/18_embedding.ipynb\" target=\"_blank\">Colab</a> | [Open](18_embedding_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/18_embedding_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/18_embedding_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 19 | GELU | 🟢 Easy | [Open](19_gelu.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/19_gelu.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/19_gelu.ipynb\" target=\"_blank\">Colab</a> | [Open](19_gelu_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/19_gelu_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/19_gelu_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 20 | Kaiming Init | 🟢 Easy | [Open](20_weight_init.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/20_weight_init.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/20_weight_init.ipynb\" target=\"_blank\">Colab</a> | [Open](20_weight_init_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/20_weight_init_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/20_weight_init_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 21 | Gradient Clipping | 🟢 Easy | [Open](21_gradient_clipping.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/21_gradient_clipping.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/21_gradient_clipping.ipynb\" target=\"_blank\">Colab</a> | [Open](21_gradient_clipping_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/21_gradient_clipping_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/21_gradient_clipping_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 31 | Gradient Accumulation | 🟢 Easy | [Open](31_gradient_accumulation.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/31_gradient_accumulation.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/31_gradient_accumulation.ipynb\" target=\"_blank\">Colab</a> | [Open](31_gradient_accumulation_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/31_gradient_accumulation_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/31_gradient_accumulation_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 3 | Linear Layer | 🟡 Medium | [Open](03_linear.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/03_linear.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/03_linear.ipynb\" target=\"_blank\">Colab</a> | [Open](03_linear_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/03_linear_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/03_linear_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 4 | LayerNorm | 🟡 Medium | [Open](04_layernorm.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/04_layernorm.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/04_layernorm.ipynb\" target=\"_blank\">Colab</a> | [Open](04_layernorm_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/04_layernorm_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/04_layernorm_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 7 | BatchNorm | 🟡 Medium | [Open](07_batchnorm.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/07_batchnorm.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/07_batchnorm.ipynb\" target=\"_blank\">Colab</a> | [Open](07_batchnorm_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/07_batchnorm_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/07_batchnorm_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 8 | RMSNorm | 🟡 Medium | [Open](08_rmsnorm.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/08_rmsnorm.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/08_rmsnorm.ipynb\" target=\"_blank\">Colab</a> | [Open](08_rmsnorm_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/08_rmsnorm_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/08_rmsnorm_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 15 | SwiGLU MLP | 🟡 Medium | [Open](15_mlp.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/15_mlp.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/15_mlp.ipynb\" target=\"_blank\">Colab</a> | [Open](15_mlp_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/15_mlp_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/15_mlp_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 22 | Conv2d | 🟡 Medium | [Open](22_conv2d.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/22_conv2d.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/22_conv2d.ipynb\" target=\"_blank\">Colab</a> | [Open](22_conv2d_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/22_conv2d_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/22_conv2d_solution.ipynb\" target=\"_blank\">Colab</a> |\n\n### 🧠 Attention Mechanisms\n\n| # | Problem | Difficulty | Template | Solution |\n|:---:|---------|:----------:|:--------:|:--------:|\n| 23 | Cross-Attention | 🟡 Medium | [Open](23_cross_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/23_cross_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/23_cross_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](23_cross_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/23_cross_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/23_cross_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 5 | Scaled Dot-Product Attention | 🔴 Hard | [Open](05_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/05_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/05_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](05_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/05_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/05_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 6 | Multi-Head Attention | 🔴 Hard | [Open](06_multihead_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/06_multihead_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/06_multihead_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](06_multihead_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/06_multihead_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/06_multihead_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 9 | Causal Self-Attention | 🔴 Hard | [Open](09_causal_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/09_causal_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/09_causal_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](09_causal_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/09_causal_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/09_causal_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 10 | Grouped Query Attention | 🔴 Hard | [Open](10_gqa.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/10_gqa.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/10_gqa.ipynb\" target=\"_blank\">Colab</a> | [Open](10_gqa_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/10_gqa_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/10_gqa_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 11 | Sliding Window Attention | 🔴 Hard | [Open](11_sliding_window.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/11_sliding_window.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/11_sliding_window.ipynb\" target=\"_blank\">Colab</a> | [Open](11_sliding_window_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/11_sliding_window_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/11_sliding_window_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 12 | Linear Attention | 🔴 Hard | [Open](12_linear_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/12_linear_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/12_linear_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](12_linear_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/12_linear_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/12_linear_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 14 | KV Cache Attention | 🔴 Hard | [Open](14_kv_cache.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/14_kv_cache.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/14_kv_cache.ipynb\" target=\"_blank\">Colab</a> | [Open](14_kv_cache_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/14_kv_cache_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/14_kv_cache_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 24 | RoPE | 🔴 Hard | [Open](24_rope.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/24_rope.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/24_rope.ipynb\" target=\"_blank\">Colab</a> | [Open](24_rope_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/24_rope_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/24_rope_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 25 | Flash Attention | 🔴 Hard | [Open](25_flash_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/25_flash_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/25_flash_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](25_flash_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/25_flash_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/25_flash_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n\n### 🏗️ Architecture & Adaptation\n\n| # | Problem | Difficulty | Template | Solution |\n|:---:|---------|:----------:|:--------:|:--------:|\n| 26 | LoRA | 🟡 Medium | [Open](26_lora.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/26_lora.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/26_lora.ipynb\" target=\"_blank\">Colab</a> | [Open](26_lora_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/26_lora_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/26_lora_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 27 | ViT Patch Embedding | 🟡 Medium | [Open](27_vit_patch.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/27_vit_patch.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/27_vit_patch.ipynb\" target=\"_blank\">Colab</a> | [Open](27_vit_patch_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/27_vit_patch_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/27_vit_patch_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 13 | GPT-2 Block | 🔴 Hard | [Open](13_gpt2_block.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/13_gpt2_block.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/13_gpt2_block.ipynb\" target=\"_blank\">Colab</a> | [Open](13_gpt2_block_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/13_gpt2_block_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/13_gpt2_block_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 28 | Mixture of Experts | 🔴 Hard | [Open](28_moe.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/28_moe.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/28_moe.ipynb\" target=\"_blank\">Colab</a> | [Open](28_moe_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/28_moe_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/28_moe_solution.ipynb\" target=\"_blank\">Colab</a> |\n\n### ⚙️ Training & Optimization\n\n| # | Problem | Difficulty | Template | Solution |\n|:---:|---------|:----------:|:--------:|:--------:|\n| 29 | Adam Optimizer | 🟡 Medium | [Open](29_adam.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/29_adam.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/29_adam.ipynb\" target=\"_blank\">Colab</a> | [Open](29_adam_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/29_adam_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/29_adam_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 30 | Cosine LR Scheduler | 🟡 Medium | [Open](30_cosine_lr.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/30_cosine_lr.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/30_cosine_lr.ipynb\" target=\"_blank\">Colab</a> | [Open](30_cosine_lr_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/30_cosine_lr_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/30_cosine_lr_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 40 | Linear Regression | 🟡 Medium | [Open](40_linear_regression.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/40_linear_regression.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/40_linear_regression.ipynb\" target=\"_blank\">Colab</a> | [Open](40_linear_regression_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/40_linear_regression_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/40_linear_regression_solution.ipynb\" target=\"_blank\">Colab</a> |\n\n### 🎯 Inference & Decoding\n\n| # | Problem | Difficulty | Template | Solution |\n|:---:|---------|:----------:|:--------:|:--------:|\n| 32 | Top-k / Top-p Sampling | 🟡 Medium | [Open](32_topk_sampling.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/32_topk_sampling.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/32_topk_sampling.ipynb\" target=\"_blank\">Colab</a> | [Open](32_topk_sampling_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/32_topk_sampling_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/32_topk_sampling_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 33 | Beam Search | 🟡 Medium | [Open](33_beam_search.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/33_beam_search.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/33_beam_search.ipynb\" target=\"_blank\">Colab</a> | [Open](33_beam_search_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/33_beam_search_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/33_beam_search_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 34 | Speculative Decoding | 🔴 Hard | [Open](34_speculative_decoding.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/34_speculative_decoding.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/34_speculative_decoding.ipynb\" target=\"_blank\">Colab</a> | [Open](34_speculative_decoding_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/34_speculative_decoding_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/34_speculative_decoding_solution.ipynb\" target=\"_blank\">Colab</a> |\n\n### 🔬 Advanced\n\n| # | Problem | Difficulty | Template | Solution |\n|:---:|---------|:----------:|:--------:|:--------:|\n| 35 | BPE Tokenizer | 🔴 Hard | [Open](35_bpe.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/35_bpe.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/35_bpe.ipynb\" target=\"_blank\">Colab</a> | [Open](35_bpe_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/35_bpe_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/35_bpe_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 36 | INT8 Quantization | 🔴 Hard | [Open](36_int8_quantization.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/36_int8_quantization.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/36_int8_quantization.ipynb\" target=\"_blank\">Colab</a> | [Open](36_int8_quantization_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/36_int8_quantization_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/36_int8_quantization_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 37 | DPO Loss | 🔴 Hard | [Open](37_dpo_loss.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/37_dpo_loss.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/37_dpo_loss.ipynb\" target=\"_blank\">Colab</a> | [Open](37_dpo_loss_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/37_dpo_loss_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/37_dpo_loss_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 38 | GRPO Loss | 🔴 Hard | [Open](38_grpo_loss.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/38_grpo_loss.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/38_grpo_loss.ipynb\" target=\"_blank\">Colab</a> | [Open](38_grpo_loss_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/38_grpo_loss_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/38_grpo_loss_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 39 | PPO Loss | 🔴 Hard | [Open](39_ppo_loss.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/39_ppo_loss.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/39_ppo_loss.ipynb\" target=\"_blank\">Colab</a> | [Open](39_ppo_loss_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/39_ppo_loss_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/39_ppo_loss_solution.ipynb\" target=\"_blank\">Colab</a> |\n\n## Useful Commands\n\n```python\nfrom torch_judge import check, hint, status\n\nstatus() # Progress dashboard\ncheck(\"relu\") # Judge your implementation\nhint(\"causal_attention\") # Get a hint\n```"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
]
|
| 53 |
}
|
| 54 |
],
|
|
@@ -65,4 +161,4 @@
|
|
| 65 |
},
|
| 66 |
"nbformat": 4,
|
| 67 |
"nbformat_minor": 4
|
| 68 |
-
}
|
|
|
|
| 31 |
"\n",
|
| 32 |
"> 💡 Every notebook also has a **Colab** toolbar button and an **Open in Colab** badge — use them to run problems in Google Colab with zero setup.\n",
|
| 33 |
"\n",
|
| 34 |
+
"## Quick Start\n",
|
| 35 |
+
"\n",
|
| 36 |
+
"📖 **Reference solutions in Colab**: [](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb) — Start with ReLU. Or use the **Colab** links in the table below for each solution."
|
| 37 |
]
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"cell_type": "code",
|
| 41 |
+
"execution_count": null,
|
| 42 |
"metadata": {},
|
| 43 |
+
"outputs": [],
|
| 44 |
+
"source": [
|
| 45 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 46 |
+
"try:\n",
|
| 47 |
+
" import google.colab\n",
|
| 48 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 49 |
+
"except ImportError:\n",
|
| 50 |
+
" pass\n"
|
| 51 |
+
]
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"cell_type": "code",
|
| 55 |
+
"execution_count": null,
|
| 56 |
+
"metadata": {},
|
| 57 |
+
"outputs": [],
|
| 58 |
"source": [
|
| 59 |
"from torch_judge import status\n",
|
| 60 |
"status()"
|
| 61 |
+
]
|
|
|
|
|
|
|
| 62 |
},
|
| 63 |
{
|
| 64 |
"cell_type": "markdown",
|
| 65 |
"metadata": {},
|
| 66 |
"source": [
|
| 67 |
+
"## Problem List (40 problems)\n",
|
| 68 |
+
"\n",
|
| 69 |
+
"### 🧱 Fundamentals — \"Implement X from scratch\"\n",
|
| 70 |
+
"\n",
|
| 71 |
+
"| # | Problem | Difficulty | Template | Solution |\n",
|
| 72 |
+
"|:---:|---------|:----------:|:--------:|:--------:|\n",
|
| 73 |
+
"| 1 | ReLU | 🟢 Easy | [Open](01_relu.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/01_relu.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/01_relu.ipynb\" target=\"_blank\">Colab</a> | [Open](01_relu_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 74 |
+
"| 2 | Softmax | 🟢 Easy | [Open](02_softmax.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/02_softmax.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/02_softmax.ipynb\" target=\"_blank\">Colab</a> | [Open](02_softmax_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/02_softmax_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/02_softmax_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 75 |
+
"| 16 | Cross-Entropy Loss | 🟢 Easy | [Open](16_cross_entropy.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/16_cross_entropy.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/16_cross_entropy.ipynb\" target=\"_blank\">Colab</a> | [Open](16_cross_entropy_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/16_cross_entropy_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/16_cross_entropy_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 76 |
+
"| 17 | Dropout | 🟢 Easy | [Open](17_dropout.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/17_dropout.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/17_dropout.ipynb\" target=\"_blank\">Colab</a> | [Open](17_dropout_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/17_dropout_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/17_dropout_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 77 |
+
"| 18 | Embedding | 🟢 Easy | [Open](18_embedding.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/18_embedding.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/18_embedding.ipynb\" target=\"_blank\">Colab</a> | [Open](18_embedding_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/18_embedding_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/18_embedding_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 78 |
+
"| 19 | GELU | 🟢 Easy | [Open](19_gelu.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/19_gelu.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/19_gelu.ipynb\" target=\"_blank\">Colab</a> | [Open](19_gelu_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/19_gelu_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/19_gelu_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 79 |
+
"| 20 | Kaiming Init | 🟢 Easy | [Open](20_weight_init.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/20_weight_init.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/20_weight_init.ipynb\" target=\"_blank\">Colab</a> | [Open](20_weight_init_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/20_weight_init_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/20_weight_init_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 80 |
+
"| 21 | Gradient Clipping | 🟢 Easy | [Open](21_gradient_clipping.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/21_gradient_clipping.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/21_gradient_clipping.ipynb\" target=\"_blank\">Colab</a> | [Open](21_gradient_clipping_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/21_gradient_clipping_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/21_gradient_clipping_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 81 |
+
"| 31 | Gradient Accumulation | 🟢 Easy | [Open](31_gradient_accumulation.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/31_gradient_accumulation.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/31_gradient_accumulation.ipynb\" target=\"_blank\">Colab</a> | [Open](31_gradient_accumulation_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/31_gradient_accumulation_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/31_gradient_accumulation_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 82 |
+
"| 3 | Linear Layer | 🟡 Medium | [Open](03_linear.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/03_linear.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/03_linear.ipynb\" target=\"_blank\">Colab</a> | [Open](03_linear_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/03_linear_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/03_linear_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 83 |
+
"| 4 | LayerNorm | 🟡 Medium | [Open](04_layernorm.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/04_layernorm.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/04_layernorm.ipynb\" target=\"_blank\">Colab</a> | [Open](04_layernorm_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/04_layernorm_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/04_layernorm_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 84 |
+
"| 7 | BatchNorm | 🟡 Medium | [Open](07_batchnorm.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/07_batchnorm.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/07_batchnorm.ipynb\" target=\"_blank\">Colab</a> | [Open](07_batchnorm_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/07_batchnorm_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/07_batchnorm_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 85 |
+
"| 8 | RMSNorm | 🟡 Medium | [Open](08_rmsnorm.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/08_rmsnorm.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/08_rmsnorm.ipynb\" target=\"_blank\">Colab</a> | [Open](08_rmsnorm_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/08_rmsnorm_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/08_rmsnorm_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 86 |
+
"| 15 | SwiGLU MLP | 🟡 Medium | [Open](15_mlp.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/15_mlp.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/15_mlp.ipynb\" target=\"_blank\">Colab</a> | [Open](15_mlp_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/15_mlp_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/15_mlp_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 87 |
+
"| 22 | Conv2d | 🟡 Medium | [Open](22_conv2d.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/22_conv2d.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/22_conv2d.ipynb\" target=\"_blank\">Colab</a> | [Open](22_conv2d_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/22_conv2d_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/22_conv2d_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 88 |
+
"\n",
|
| 89 |
+
"### 🧠 Attention Mechanisms\n",
|
| 90 |
+
"\n",
|
| 91 |
+
"| # | Problem | Difficulty | Template | Solution |\n",
|
| 92 |
+
"|:---:|---------|:----------:|:--------:|:--------:|\n",
|
| 93 |
+
"| 23 | Cross-Attention | 🟡 Medium | [Open](23_cross_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/23_cross_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/23_cross_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](23_cross_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/23_cross_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/23_cross_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 94 |
+
"| 5 | Scaled Dot-Product Attention | 🔴 Hard | [Open](05_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/05_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/05_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](05_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/05_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/05_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 95 |
+
"| 6 | Multi-Head Attention | 🔴 Hard | [Open](06_multihead_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/06_multihead_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/06_multihead_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](06_multihead_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/06_multihead_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/06_multihead_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 96 |
+
"| 9 | Causal Self-Attention | 🔴 Hard | [Open](09_causal_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/09_causal_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/09_causal_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](09_causal_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/09_causal_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/09_causal_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 97 |
+
"| 10 | Grouped Query Attention | 🔴 Hard | [Open](10_gqa.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/10_gqa.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/10_gqa.ipynb\" target=\"_blank\">Colab</a> | [Open](10_gqa_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/10_gqa_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/10_gqa_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 98 |
+
"| 11 | Sliding Window Attention | 🔴 Hard | [Open](11_sliding_window.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/11_sliding_window.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/11_sliding_window.ipynb\" target=\"_blank\">Colab</a> | [Open](11_sliding_window_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/11_sliding_window_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/11_sliding_window_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 99 |
+
"| 12 | Linear Attention | 🔴 Hard | [Open](12_linear_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/12_linear_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/12_linear_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](12_linear_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/12_linear_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/12_linear_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 100 |
+
"| 14 | KV Cache Attention | 🔴 Hard | [Open](14_kv_cache.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/14_kv_cache.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/14_kv_cache.ipynb\" target=\"_blank\">Colab</a> | [Open](14_kv_cache_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/14_kv_cache_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/14_kv_cache_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 101 |
+
"| 24 | RoPE | 🔴 Hard | [Open](24_rope.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/24_rope.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/24_rope.ipynb\" target=\"_blank\">Colab</a> | [Open](24_rope_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/24_rope_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/24_rope_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 102 |
+
"| 25 | Flash Attention | 🔴 Hard | [Open](25_flash_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/25_flash_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/25_flash_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](25_flash_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/25_flash_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/25_flash_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 103 |
+
"\n",
|
| 104 |
+
"### 🏗️ Architecture & Adaptation\n",
|
| 105 |
+
"\n",
|
| 106 |
+
"| # | Problem | Difficulty | Template | Solution |\n",
|
| 107 |
+
"|:---:|---------|:----------:|:--------:|:--------:|\n",
|
| 108 |
+
"| 26 | LoRA | 🟡 Medium | [Open](26_lora.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/26_lora.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/26_lora.ipynb\" target=\"_blank\">Colab</a> | [Open](26_lora_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/26_lora_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/26_lora_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 109 |
+
"| 27 | ViT Patch Embedding | 🟡 Medium | [Open](27_vit_patch.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/27_vit_patch.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/27_vit_patch.ipynb\" target=\"_blank\">Colab</a> | [Open](27_vit_patch_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/27_vit_patch_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/27_vit_patch_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 110 |
+
"| 13 | GPT-2 Block | 🔴 Hard | [Open](13_gpt2_block.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/13_gpt2_block.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/13_gpt2_block.ipynb\" target=\"_blank\">Colab</a> | [Open](13_gpt2_block_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/13_gpt2_block_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/13_gpt2_block_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 111 |
+
"| 28 | Mixture of Experts | 🔴 Hard | [Open](28_moe.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/28_moe.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/28_moe.ipynb\" target=\"_blank\">Colab</a> | [Open](28_moe_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/28_moe_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/28_moe_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 112 |
+
"\n",
|
| 113 |
+
"### ⚙️ Training & Optimization\n",
|
| 114 |
+
"\n",
|
| 115 |
+
"| # | Problem | Difficulty | Template | Solution |\n",
|
| 116 |
+
"|:---:|---------|:----------:|:--------:|:--------:|\n",
|
| 117 |
+
"| 29 | Adam Optimizer | 🟡 Medium | [Open](29_adam.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/29_adam.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/29_adam.ipynb\" target=\"_blank\">Colab</a> | [Open](29_adam_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/29_adam_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/29_adam_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 118 |
+
"| 30 | Cosine LR Scheduler | 🟡 Medium | [Open](30_cosine_lr.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/30_cosine_lr.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/30_cosine_lr.ipynb\" target=\"_blank\">Colab</a> | [Open](30_cosine_lr_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/30_cosine_lr_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/30_cosine_lr_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 119 |
+
"| 40 | Linear Regression | 🟡 Medium | [Open](40_linear_regression.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/40_linear_regression.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/40_linear_regression.ipynb\" target=\"_blank\">Colab</a> | [Open](40_linear_regression_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/40_linear_regression_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/40_linear_regression_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 120 |
+
"\n",
|
| 121 |
+
"### 🎯 Inference & Decoding\n",
|
| 122 |
+
"\n",
|
| 123 |
+
"| # | Problem | Difficulty | Template | Solution |\n",
|
| 124 |
+
"|:---:|---------|:----------:|:--------:|:--------:|\n",
|
| 125 |
+
"| 32 | Top-k / Top-p Sampling | 🟡 Medium | [Open](32_topk_sampling.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/32_topk_sampling.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/32_topk_sampling.ipynb\" target=\"_blank\">Colab</a> | [Open](32_topk_sampling_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/32_topk_sampling_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/32_topk_sampling_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 126 |
+
"| 33 | Beam Search | 🟡 Medium | [Open](33_beam_search.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/33_beam_search.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/33_beam_search.ipynb\" target=\"_blank\">Colab</a> | [Open](33_beam_search_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/33_beam_search_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/33_beam_search_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 127 |
+
"| 34 | Speculative Decoding | 🔴 Hard | [Open](34_speculative_decoding.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/34_speculative_decoding.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/34_speculative_decoding.ipynb\" target=\"_blank\">Colab</a> | [Open](34_speculative_decoding_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/34_speculative_decoding_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/34_speculative_decoding_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 128 |
+
"\n",
|
| 129 |
+
"### 🔬 Advanced\n",
|
| 130 |
+
"\n",
|
| 131 |
+
"| # | Problem | Difficulty | Template | Solution |\n",
|
| 132 |
+
"|:---:|---------|:----------:|:--------:|:--------:|\n",
|
| 133 |
+
"| 35 | BPE Tokenizer | 🔴 Hard | [Open](35_bpe.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/35_bpe.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/35_bpe.ipynb\" target=\"_blank\">Colab</a> | [Open](35_bpe_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/35_bpe_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/35_bpe_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 134 |
+
"| 36 | INT8 Quantization | 🔴 Hard | [Open](36_int8_quantization.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/36_int8_quantization.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/36_int8_quantization.ipynb\" target=\"_blank\">Colab</a> | [Open](36_int8_quantization_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/36_int8_quantization_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/36_int8_quantization_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 135 |
+
"| 37 | DPO Loss | 🔴 Hard | [Open](37_dpo_loss.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/37_dpo_loss.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/37_dpo_loss.ipynb\" target=\"_blank\">Colab</a> | [Open](37_dpo_loss_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/37_dpo_loss_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/37_dpo_loss_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 136 |
+
"| 38 | GRPO Loss | 🔴 Hard | [Open](38_grpo_loss.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/38_grpo_loss.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/38_grpo_loss.ipynb\" target=\"_blank\">Colab</a> | [Open](38_grpo_loss_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/38_grpo_loss_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/38_grpo_loss_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 137 |
+
"| 39 | PPO Loss | 🔴 Hard | [Open](39_ppo_loss.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/39_ppo_loss.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/39_ppo_loss.ipynb\" target=\"_blank\">Colab</a> | [Open](39_ppo_loss_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/39_ppo_loss_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/39_ppo_loss_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
|
| 138 |
+
"\n",
|
| 139 |
+
"## Useful Commands\n",
|
| 140 |
+
"\n",
|
| 141 |
+
"```python\n",
|
| 142 |
+
"from torch_judge import check, hint, status\n",
|
| 143 |
+
"\n",
|
| 144 |
+
"status() # Progress dashboard\n",
|
| 145 |
+
"check(\"relu\") # Judge your implementation\n",
|
| 146 |
+
"hint(\"causal_attention\") # Get a hint\n",
|
| 147 |
+
"```"
|
| 148 |
]
|
| 149 |
}
|
| 150 |
],
|
|
|
|
| 161 |
},
|
| 162 |
"nbformat": 4,
|
| 163 |
"nbformat_minor": 4
|
| 164 |
+
}
|
templates/01_relu.ipynb
CHANGED
|
@@ -30,6 +30,20 @@
|
|
| 30 |
],
|
| 31 |
"outputs": []
|
| 32 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
{
|
| 34 |
"cell_type": "code",
|
| 35 |
"metadata": {},
|
|
|
|
| 30 |
],
|
| 31 |
"outputs": []
|
| 32 |
},
|
| 33 |
+
{
|
| 34 |
+
"cell_type": "code",
|
| 35 |
+
"metadata": {},
|
| 36 |
+
"source": [
|
| 37 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 38 |
+
"try:\n",
|
| 39 |
+
" import google.colab\n",
|
| 40 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 41 |
+
"except ImportError:\n",
|
| 42 |
+
" pass\n"
|
| 43 |
+
],
|
| 44 |
+
"outputs": [],
|
| 45 |
+
"execution_count": null
|
| 46 |
+
},
|
| 47 |
{
|
| 48 |
"cell_type": "code",
|
| 49 |
"metadata": {},
|
templates/02_softmax.ipynb
CHANGED
|
@@ -30,6 +30,20 @@
|
|
| 30 |
],
|
| 31 |
"outputs": []
|
| 32 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
{
|
| 34 |
"cell_type": "code",
|
| 35 |
"metadata": {},
|
|
|
|
| 30 |
],
|
| 31 |
"outputs": []
|
| 32 |
},
|
| 33 |
+
{
|
| 34 |
+
"cell_type": "code",
|
| 35 |
+
"metadata": {},
|
| 36 |
+
"source": [
|
| 37 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 38 |
+
"try:\n",
|
| 39 |
+
" import google.colab\n",
|
| 40 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 41 |
+
"except ImportError:\n",
|
| 42 |
+
" pass\n"
|
| 43 |
+
],
|
| 44 |
+
"outputs": [],
|
| 45 |
+
"execution_count": null
|
| 46 |
+
},
|
| 47 |
{
|
| 48 |
"cell_type": "code",
|
| 49 |
"metadata": {},
|
templates/03_linear.ipynb
CHANGED
|
@@ -26,6 +26,20 @@
|
|
| 26 |
],
|
| 27 |
"outputs": []
|
| 28 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
{
|
| 30 |
"cell_type": "code",
|
| 31 |
"metadata": {},
|
|
|
|
| 26 |
],
|
| 27 |
"outputs": []
|
| 28 |
},
|
| 29 |
+
{
|
| 30 |
+
"cell_type": "code",
|
| 31 |
+
"metadata": {},
|
| 32 |
+
"source": [
|
| 33 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 34 |
+
"try:\n",
|
| 35 |
+
" import google.colab\n",
|
| 36 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 37 |
+
"except ImportError:\n",
|
| 38 |
+
" pass\n"
|
| 39 |
+
],
|
| 40 |
+
"outputs": [],
|
| 41 |
+
"execution_count": null
|
| 42 |
+
},
|
| 43 |
{
|
| 44 |
"cell_type": "code",
|
| 45 |
"metadata": {},
|
templates/04_layernorm.ipynb
CHANGED
|
@@ -32,6 +32,20 @@
|
|
| 32 |
],
|
| 33 |
"outputs": []
|
| 34 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
{
|
| 36 |
"cell_type": "code",
|
| 37 |
"metadata": {},
|
|
|
|
| 32 |
],
|
| 33 |
"outputs": []
|
| 34 |
},
|
| 35 |
+
{
|
| 36 |
+
"cell_type": "code",
|
| 37 |
+
"metadata": {},
|
| 38 |
+
"source": [
|
| 39 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 40 |
+
"try:\n",
|
| 41 |
+
" import google.colab\n",
|
| 42 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 43 |
+
"except ImportError:\n",
|
| 44 |
+
" pass\n"
|
| 45 |
+
],
|
| 46 |
+
"outputs": [],
|
| 47 |
+
"execution_count": null
|
| 48 |
+
},
|
| 49 |
{
|
| 50 |
"cell_type": "code",
|
| 51 |
"metadata": {},
|
templates/05_attention.ipynb
CHANGED
|
@@ -29,6 +29,20 @@
|
|
| 29 |
"- Must handle cross-attention (seq_q ≠ seq_k)"
|
| 30 |
]
|
| 31 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
{
|
| 33 |
"cell_type": "code",
|
| 34 |
"execution_count": null,
|
|
|
|
| 29 |
"- Must handle cross-attention (seq_q ≠ seq_k)"
|
| 30 |
]
|
| 31 |
},
|
| 32 |
+
{
|
| 33 |
+
"cell_type": "code",
|
| 34 |
+
"metadata": {},
|
| 35 |
+
"source": [
|
| 36 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 37 |
+
"try:\n",
|
| 38 |
+
" import google.colab\n",
|
| 39 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 40 |
+
"except ImportError:\n",
|
| 41 |
+
" pass\n"
|
| 42 |
+
],
|
| 43 |
+
"outputs": [],
|
| 44 |
+
"execution_count": null
|
| 45 |
+
},
|
| 46 |
{
|
| 47 |
"cell_type": "code",
|
| 48 |
"execution_count": null,
|
templates/06_multihead_attention.ipynb
CHANGED
|
@@ -37,6 +37,21 @@
|
|
| 37 |
"5. Output projection: `self.W_o(concat)`"
|
| 38 |
]
|
| 39 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
{
|
| 41 |
"cell_type": "code",
|
| 42 |
"execution_count": null,
|
|
|
|
| 37 |
"5. Output projection: `self.W_o(concat)`"
|
| 38 |
]
|
| 39 |
},
|
| 40 |
+
{
|
| 41 |
+
"cell_type": "code",
|
| 42 |
+
"execution_count": null,
|
| 43 |
+
"id": "02a059c4",
|
| 44 |
+
"metadata": {},
|
| 45 |
+
"outputs": [],
|
| 46 |
+
"source": [
|
| 47 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 48 |
+
"try:\n",
|
| 49 |
+
" import google.colab\n",
|
| 50 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 51 |
+
"except ImportError:\n",
|
| 52 |
+
" pass\n"
|
| 53 |
+
]
|
| 54 |
+
},
|
| 55 |
{
|
| 56 |
"cell_type": "code",
|
| 57 |
"execution_count": null,
|
templates/07_batchnorm.ipynb
CHANGED
|
@@ -1,131 +1,145 @@
|
|
| 1 |
{
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
},
|
| 46 |
-
{
|
| 47 |
-
"cell_type": "code",
|
| 48 |
-
"execution_count": null,
|
| 49 |
-
"metadata": {},
|
| 50 |
-
"outputs": [],
|
| 51 |
-
"source": [
|
| 52 |
-
"import torch"
|
| 53 |
-
]
|
| 54 |
-
},
|
| 55 |
-
{
|
| 56 |
-
"cell_type": "code",
|
| 57 |
-
"execution_count": null,
|
| 58 |
-
"id": "d946ca79",
|
| 59 |
-
"metadata": {},
|
| 60 |
-
"outputs": [],
|
| 61 |
-
"source": [
|
| 62 |
-
"# ✏️ YOUR IMPLEMENTATION HERE\n",
|
| 63 |
-
"\n",
|
| 64 |
-
"def my_batch_norm(\n",
|
| 65 |
-
" x,\n",
|
| 66 |
-
" gamma,\n",
|
| 67 |
-
" beta,\n",
|
| 68 |
-
" running_mean,\n",
|
| 69 |
-
" running_var,\n",
|
| 70 |
-
" eps=1e-5,\n",
|
| 71 |
-
" momentum=0.1,\n",
|
| 72 |
-
" training=True,\n",
|
| 73 |
-
"):\n",
|
| 74 |
-
" pass # Replace this"
|
| 75 |
-
]
|
| 76 |
-
},
|
| 77 |
-
{
|
| 78 |
-
"cell_type": "code",
|
| 79 |
-
"execution_count": null,
|
| 80 |
-
"id": "26b93e71",
|
| 81 |
-
"metadata": {},
|
| 82 |
-
"outputs": [],
|
| 83 |
-
"source": [
|
| 84 |
-
"# 🧪 Debug\n",
|
| 85 |
-
"x = torch.randn(8, 4)\n",
|
| 86 |
-
"gamma = torch.ones(4)\n",
|
| 87 |
-
"beta = torch.zeros(4)\n",
|
| 88 |
-
"\n",
|
| 89 |
-
"# Running stats typically live on the same device and shape as features\n",
|
| 90 |
-
"running_mean = torch.zeros(4)\n",
|
| 91 |
-
"running_var = torch.ones(4)\n",
|
| 92 |
-
"\n",
|
| 93 |
-
"# Training mode: uses batch stats and updates running_mean / running_var\n",
|
| 94 |
-
"out_train = my_batch_norm(x, gamma, beta, running_mean, running_var, training=True)\n",
|
| 95 |
-
"print(\"[Train] Output shape:\", out_train.shape)\n",
|
| 96 |
-
"print(\"[Train] Column means:\", out_train.mean(dim=0)) # should be ~0\n",
|
| 97 |
-
"print(\"[Train] Column stds: \", out_train.std(dim=0)) # should be ~1\n",
|
| 98 |
-
"print(\"Updated running_mean:\", running_mean)\n",
|
| 99 |
-
"print(\"Updated running_var:\", running_var)\n",
|
| 100 |
-
"\n",
|
| 101 |
-
"# Inference mode: uses running_mean / running_var only\n",
|
| 102 |
-
"out_eval = my_batch_norm(x, gamma, beta, running_mean, running_var, training=False)\n",
|
| 103 |
-
"print(\"[Eval] Output shape:\", out_eval.shape)"
|
| 104 |
-
]
|
| 105 |
-
},
|
| 106 |
-
{
|
| 107 |
-
"cell_type": "code",
|
| 108 |
-
"execution_count": null,
|
| 109 |
-
"metadata": {},
|
| 110 |
-
"outputs": [],
|
| 111 |
-
"source": [
|
| 112 |
-
"# ✅ SUBMIT\n",
|
| 113 |
-
"from torch_judge import check\n",
|
| 114 |
-
"check(\"batchnorm\")"
|
| 115 |
-
]
|
| 116 |
-
}
|
| 117 |
-
],
|
| 118 |
-
"metadata": {
|
| 119 |
-
"kernelspec": {
|
| 120 |
-
"display_name": "Python 3",
|
| 121 |
-
"language": "python",
|
| 122 |
-
"name": "python3"
|
| 123 |
-
},
|
| 124 |
-
"language_info": {
|
| 125 |
-
"name": "python",
|
| 126 |
-
"version": "3.11.0"
|
| 127 |
-
}
|
| 128 |
},
|
| 129 |
-
|
| 130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"id": "89fd15cb",
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"source": [
|
| 8 |
+
"[](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/07_batchnorm.ipynb)\n",
|
| 9 |
+
"\n",
|
| 10 |
+
"# 🟡 Medium: Implement BatchNorm\n",
|
| 11 |
+
"\n",
|
| 12 |
+
"Implement **Batch Normalization** with both **training** and **inference** behavior.\n",
|
| 13 |
+
"\n",
|
| 14 |
+
"In training mode, use **batch statistics** and update running estimates:\n",
|
| 15 |
+
"\n",
|
| 16 |
+
"$$\\text{BN}(x) = \\gamma \\cdot \\frac{x - \\mu_B}{\\sqrt{\\sigma_B^2 + \\epsilon}} + \\beta$$\n",
|
| 17 |
+
"\n",
|
| 18 |
+
"where $\\mu_B$ and $\\sigma_B^2$ are the mean and variance computed **across the batch** (dim=0).\n",
|
| 19 |
+
"\n",
|
| 20 |
+
"In inference mode, use the provided **running mean/var** instead of current batch stats.\n",
|
| 21 |
+
"\n",
|
| 22 |
+
"### Signature\n",
|
| 23 |
+
"```python\n",
|
| 24 |
+
"def my_batch_norm(\n",
|
| 25 |
+
" x: torch.Tensor,\n",
|
| 26 |
+
" gamma: torch.Tensor,\n",
|
| 27 |
+
" beta: torch.Tensor,\n",
|
| 28 |
+
" running_mean: torch.Tensor,\n",
|
| 29 |
+
" running_var: torch.Tensor,\n",
|
| 30 |
+
" eps: float = 1e-5,\n",
|
| 31 |
+
" momentum: float = 0.1,\n",
|
| 32 |
+
" training: bool = True,\n",
|
| 33 |
+
") -> torch.Tensor:\n",
|
| 34 |
+
" # x: (N, D) — normalize each feature across all samples in the batch\n",
|
| 35 |
+
" # running_mean, running_var: updated in-place during training; used as-is during inference\n",
|
| 36 |
+
"```\n",
|
| 37 |
+
"\n",
|
| 38 |
+
"### Rules\n",
|
| 39 |
+
"- Do **NOT** use `F.batch_norm`, `nn.BatchNorm1d`, etc.\n",
|
| 40 |
+
"- Compute batch mean and variance over `dim=0` with `unbiased=False`\n",
|
| 41 |
+
"- Update running stats like PyTorch: `running = (1 - momentum) * running + momentum * batch_stat`\n",
|
| 42 |
+
"- Use `running_mean` / `running_var` for inference when `training=False`\n",
|
| 43 |
+
"- Must support autograd w.r.t. `x`, `gamma`, `beta`(running statistics 应视作 buffer,而不是需要梯度的参数)"
|
| 44 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
},
|
| 46 |
+
{
|
| 47 |
+
"cell_type": "code",
|
| 48 |
+
"metadata": {},
|
| 49 |
+
"source": [
|
| 50 |
+
"# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
|
| 51 |
+
"try:\n",
|
| 52 |
+
" import google.colab\n",
|
| 53 |
+
" get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
|
| 54 |
+
"except ImportError:\n",
|
| 55 |
+
" pass\n"
|
| 56 |
+
],
|
| 57 |
+
"outputs": [],
|
| 58 |
+
"execution_count": null
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"cell_type": "code",
|
| 62 |
+
"execution_count": null,
|
| 63 |
+
"metadata": {},
|
| 64 |
+
"outputs": [],
|
| 65 |
+
"source": [
|
| 66 |
+
"import torch"
|
| 67 |
+
]
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"cell_type": "code",
|
| 71 |
+
"execution_count": null,
|
| 72 |
+
"id": "d946ca79",
|
| 73 |
+
"metadata": {},
|
| 74 |
+
"outputs": [],
|
| 75 |
+
"source": [
|
| 76 |
+
"# ✏️ YOUR IMPLEMENTATION HERE\n",
|
| 77 |
+
"\n",
|
| 78 |
+
"def my_batch_norm(\n",
|
| 79 |
+
" x,\n",
|
| 80 |
+
" gamma,\n",
|
| 81 |
+
" beta,\n",
|
| 82 |
+
" running_mean,\n",
|
| 83 |
+
" running_var,\n",
|
| 84 |
+
" eps=1e-5,\n",
|
| 85 |
+
" momentum=0.1,\n",
|
| 86 |
+
" training=True,\n",
|
| 87 |
+
"):\n",
|
| 88 |
+
" pass # Replace this"
|
| 89 |
+
]
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"cell_type": "code",
|
| 93 |
+
"execution_count": null,
|
| 94 |
+
"id": "26b93e71",
|
| 95 |
+
"metadata": {},
|
| 96 |
+
"outputs": [],
|
| 97 |
+
"source": [
|
| 98 |
+
"# 🧪 Debug\n",
|
| 99 |
+
"x = torch.randn(8, 4)\n",
|
| 100 |
+
"gamma = torch.ones(4)\n",
|
| 101 |
+
"beta = torch.zeros(4)\n",
|
| 102 |
+
"\n",
|
| 103 |
+
"# Running stats typically live on the same device and shape as features\n",
|
| 104 |
+
"running_mean = torch.zeros(4)\n",
|
| 105 |
+
"running_var = torch.ones(4)\n",
|
| 106 |
+
"\n",
|
| 107 |
+
"# Training mode: uses batch stats and updates running_mean / running_var\n",
|
| 108 |
+
"out_train = my_batch_norm(x, gamma, beta, running_mean, running_var, training=True)\n",
|
| 109 |
+
"print(\"[Train] Output shape:\", out_train.shape)\n",
|
| 110 |
+
"print(\"[Train] Column means:\", out_train.mean(dim=0)) # should be ~0\n",
|
| 111 |
+
"print(\"[Train] Column stds: \", out_train.std(dim=0)) # should be ~1\n",
|
| 112 |
+
"print(\"Updated running_mean:\", running_mean)\n",
|
| 113 |
+
"print(\"Updated running_var:\", running_var)\n",
|
| 114 |
+
"\n",
|
| 115 |
+
"# Inference mode: uses running_mean / running_var only\n",
|
| 116 |
+
"out_eval = my_batch_norm(x, gamma, beta, running_mean, running_var, training=False)\n",
|
| 117 |
+
"print(\"[Eval] Output shape:\", out_eval.shape)"
|
| 118 |
+
]
|
| 119 |
+
},
|
| 120 |
+
{
|
| 121 |
+
"cell_type": "code",
|
| 122 |
+
"execution_count": null,
|
| 123 |
+
"metadata": {},
|
| 124 |
+
"outputs": [],
|
| 125 |
+
"source": [
|
| 126 |
+
"# ✅ SUBMIT\n",
|
| 127 |
+
"from torch_judge import check\n",
|
| 128 |
+
"check(\"batchnorm\")"
|
| 129 |
+
]
|
| 130 |
+
}
|
| 131 |
+
],
|
| 132 |
+
"metadata": {
|
| 133 |
+
"kernelspec": {
|
| 134 |
+
"display_name": "Python 3",
|
| 135 |
+
"language": "python",
|
| 136 |
+
"name": "python3"
|
| 137 |
+
},
|
| 138 |
+
"language_info": {
|
| 139 |
+
"name": "python",
|
| 140 |
+
"version": "3.11.0"
|
| 141 |
+
}
|
| 142 |
+
},
|
| 143 |
+
"nbformat": 4,
|
| 144 |
+
"nbformat_minor": 5
|
| 145 |
}
|