An, Duo commited on
Commit
227eeac
·
1 Parent(s): bc9ddee

Enhance Colab badge functionality: Update script to add 'Open in Colab' badges to both template and solution notebooks. Modify badge generation to accommodate different notebook directories and ensure proper processing of both templates and solutions.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. scripts/add_colab_badges.py +19 -11
  2. scripts/add_colab_torch_judge_install.py +79 -0
  3. solutions/01_relu_solution.ipynb +45 -29
  4. solutions/02_softmax_solution.ipynb +16 -1
  5. solutions/03_linear_solution.ipynb +16 -1
  6. solutions/04_layernorm_solution.ipynb +16 -1
  7. solutions/05_attention_solution.ipynb +16 -0
  8. solutions/06_multihead_attention_solution.ipynb +117 -102
  9. solutions/07_batchnorm_solution.ipynb +129 -114
  10. solutions/08_rmsnorm_solution.ipynb +16 -1
  11. solutions/09_causal_attention_solution.ipynb +16 -1
  12. solutions/10_gqa_solution.ipynb +16 -1
  13. solutions/11_sliding_window_solution.ipynb +16 -1
  14. solutions/12_linear_attention_solution.ipynb +16 -1
  15. solutions/13_gpt2_block_solution.ipynb +16 -1
  16. solutions/14_kv_cache_solution.ipynb +18 -3
  17. solutions/15_mlp_solution.ipynb +18 -3
  18. solutions/16_cross_entropy_solution.ipynb +16 -1
  19. solutions/17_dropout_solution.ipynb +16 -1
  20. solutions/18_embedding_solution.ipynb +16 -1
  21. solutions/19_gelu_solution.ipynb +16 -1
  22. solutions/20_weight_init_solution.ipynb +16 -1
  23. solutions/21_gradient_clipping_solution.ipynb +16 -1
  24. solutions/22_conv2d_solution.ipynb +16 -1
  25. solutions/23_cross_attention_solution.ipynb +16 -1
  26. solutions/24_rope_solution.ipynb +16 -1
  27. solutions/25_flash_attention_solution.ipynb +16 -1
  28. solutions/26_lora_solution.ipynb +16 -1
  29. solutions/27_vit_patch_solution.ipynb +16 -1
  30. solutions/28_moe_solution.ipynb +16 -1
  31. solutions/29_adam_solution.ipynb +16 -1
  32. solutions/30_cosine_lr_solution.ipynb +16 -1
  33. solutions/31_gradient_accumulation_solution.ipynb +16 -1
  34. solutions/32_topk_sampling_solution.ipynb +16 -1
  35. solutions/33_beam_search_solution.ipynb +16 -1
  36. solutions/34_speculative_decoding_solution.ipynb +16 -1
  37. solutions/35_bpe_solution.ipynb +16 -1
  38. solutions/36_int8_quantization_solution.ipynb +16 -1
  39. solutions/37_dpo_loss_solution.ipynb +16 -1
  40. solutions/38_grpo_loss_solution.ipynb +15 -0
  41. solutions/39_ppo_loss_solution.ipynb +15 -1
  42. solutions/40_linear_regression_solution.ipynb +137 -122
  43. templates/00_welcome.ipynb +102 -6
  44. templates/01_relu.ipynb +14 -0
  45. templates/02_softmax.ipynb +14 -0
  46. templates/03_linear.ipynb +14 -0
  47. templates/04_layernorm.ipynb +14 -0
  48. templates/05_attention.ipynb +14 -0
  49. templates/06_multihead_attention.ipynb +15 -0
  50. templates/07_batchnorm.ipynb +142 -128
scripts/add_colab_badges.py CHANGED
@@ -1,27 +1,29 @@
1
  #!/usr/bin/env python3
2
- """Add 'Open in Colab' badges to all template notebooks."""
3
 
4
  import json
5
  from pathlib import Path
6
 
7
  REPO = "duoan/TorchCode"
8
  BRANCH = "master"
9
- TEMPLATES_DIR = Path(__file__).resolve().parent.parent / "templates"
 
 
10
  BADGE_IMG = "https://colab.research.google.com/assets/colab-badge.svg"
11
 
12
 
13
- def colab_url(filename: str) -> str:
14
  return (
15
  f"https://colab.research.google.com/github/{REPO}"
16
- f"/blob/{BRANCH}/templates/{filename}"
17
  )
18
 
19
 
20
- def badge_markdown(filename: str) -> str:
21
- return f"[![Open In Colab]({BADGE_IMG})]({colab_url(filename)})"
22
 
23
 
24
- def process_notebook(path: Path) -> bool:
25
  with open(path, "r", encoding="utf-8") as f:
26
  nb = json.load(f)
27
 
@@ -34,7 +36,7 @@ def process_notebook(path: Path) -> bool:
34
  if "colab-badge.svg" in flat:
35
  return False
36
 
37
- badge = badge_markdown(path.name)
38
  cells[0]["source"] = [badge + "\n\n"] + (
39
  source_lines if isinstance(source_lines, list) else [source_lines]
40
  )
@@ -49,11 +51,17 @@ def process_notebook(path: Path) -> bool:
49
  def main() -> None:
50
  updated = 0
51
  for nb_path in sorted(TEMPLATES_DIR.glob("*.ipynb")):
52
- if process_notebook(nb_path):
53
- print(f" ✅ {nb_path.name}")
54
  updated += 1
55
  else:
56
- print(f" ⏭️ {nb_path.name} (already has badge or skipped)")
 
 
 
 
 
 
57
  print(f"\nDone — updated {updated} notebooks.")
58
 
59
 
 
1
  #!/usr/bin/env python3
2
+ """Add 'Open in Colab' badges to all template and solution notebooks."""
3
 
4
  import json
5
  from pathlib import Path
6
 
7
  REPO = "duoan/TorchCode"
8
  BRANCH = "master"
9
+ ROOT = Path(__file__).resolve().parent.parent
10
+ TEMPLATES_DIR = ROOT / "templates"
11
+ SOLUTIONS_DIR = ROOT / "solutions"
12
  BADGE_IMG = "https://colab.research.google.com/assets/colab-badge.svg"
13
 
14
 
15
+ def colab_url(filename: str, folder: str) -> str:
16
  return (
17
  f"https://colab.research.google.com/github/{REPO}"
18
+ f"/blob/{BRANCH}/{folder}/{filename}"
19
  )
20
 
21
 
22
+ def badge_markdown(filename: str, folder: str) -> str:
23
+ return f"[![Open In Colab]({BADGE_IMG})]({colab_url(filename, folder)})"
24
 
25
 
26
+ def process_notebook(path: Path, folder: str) -> bool:
27
  with open(path, "r", encoding="utf-8") as f:
28
  nb = json.load(f)
29
 
 
36
  if "colab-badge.svg" in flat:
37
  return False
38
 
39
+ badge = badge_markdown(path.name, folder)
40
  cells[0]["source"] = [badge + "\n\n"] + (
41
  source_lines if isinstance(source_lines, list) else [source_lines]
42
  )
 
51
  def main() -> None:
52
  updated = 0
53
  for nb_path in sorted(TEMPLATES_DIR.glob("*.ipynb")):
54
+ if process_notebook(nb_path, "templates"):
55
+ print(f" ✅ templates/{nb_path.name}")
56
  updated += 1
57
  else:
58
+ print(f" ⏭️ templates/{nb_path.name} (already has badge or skipped)")
59
+ for nb_path in sorted(SOLUTIONS_DIR.glob("*.ipynb")):
60
+ if process_notebook(nb_path, "solutions"):
61
+ print(f" ✅ solutions/{nb_path.name}")
62
+ updated += 1
63
+ else:
64
+ print(f" ⏭️ solutions/{nb_path.name} (already has badge or skipped)")
65
  print(f"\nDone — updated {updated} notebooks.")
66
 
67
 
scripts/add_colab_torch_judge_install.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Add Colab-only pip install of torch-judge to all notebooks that use torch_judge."""
3
+
4
+ import json
5
+ from pathlib import Path
6
+
7
+ INSTALL_CELL_SOURCE = [
8
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
9
+ "try:\n",
10
+ " import google.colab\n",
11
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
12
+ "except ImportError:\n",
13
+ " pass\n",
14
+ ]
15
+
16
+ MARKER = "get_ipython().run_line_magic('pip', 'install"
17
+
18
+
19
+ def has_torch_judge(nb: dict) -> bool:
20
+ for cell in nb.get("cells", []):
21
+ src = cell.get("source", [])
22
+ flat = "".join(src) if isinstance(src, list) else str(src)
23
+ if "torch_judge" in flat:
24
+ return True
25
+ return False
26
+
27
+
28
+ def already_has_install(nb: dict) -> bool:
29
+ for cell in nb.get("cells", []):
30
+ src = cell.get("source", [])
31
+ flat = "".join(src) if isinstance(src, list) else str(src)
32
+ if MARKER in flat and "torch-judge" in flat:
33
+ return True
34
+ return False
35
+
36
+
37
+ def process_notebook(path: Path) -> bool:
38
+ with open(path, "r", encoding="utf-8") as f:
39
+ nb = json.load(f)
40
+
41
+ if not has_torch_judge(nb):
42
+ return False
43
+ if already_has_install(nb):
44
+ return False
45
+
46
+ cells = nb["cells"]
47
+ if not cells:
48
+ return False
49
+
50
+ # Insert install cell at index 1 (after first cell, usually markdown title)
51
+ install_cell = {
52
+ "cell_type": "code",
53
+ "metadata": {},
54
+ "source": INSTALL_CELL_SOURCE,
55
+ "outputs": [],
56
+ "execution_count": None,
57
+ }
58
+ cells.insert(1, install_cell)
59
+
60
+ with open(path, "w", encoding="utf-8") as f:
61
+ json.dump(nb, f, ensure_ascii=False, indent=1)
62
+ f.write("\n")
63
+
64
+ return True
65
+
66
+
67
+ def main() -> None:
68
+ root = Path(__file__).resolve().parent.parent
69
+ updated = 0
70
+ for pattern in ["templates/*.ipynb", "solutions/*.ipynb"]:
71
+ for path in sorted(root.glob(pattern)):
72
+ if process_notebook(path):
73
+ print(f" + {path.relative_to(root)}")
74
+ updated += 1
75
+ print(f"Updated {updated} notebooks.")
76
+
77
+
78
+ if __name__ == "__main__":
79
+ main()
solutions/01_relu_solution.ipynb CHANGED
@@ -1,73 +1,89 @@
1
  {
2
- "nbformat": 4,
3
- "nbformat_minor": 5,
4
- "metadata": {
5
- "kernelspec": {
6
- "display_name": "Python 3",
7
- "language": "python",
8
- "name": "python3"
9
- },
10
- "language_info": {
11
- "name": "python",
12
- "version": "3.11.0"
13
- }
14
- },
15
  "cells": [
16
  {
17
  "cell_type": "markdown",
 
18
  "metadata": {},
19
  "source": [
 
 
20
  "# 🟢 Solution: Implement ReLU\n",
21
  "\n",
22
  "Reference solution for the ReLU activation function.\n",
23
  "\n",
24
  "$$\\text{ReLU}(x) = \\max(0, x)$$"
25
- ],
26
- "outputs": []
27
  },
28
  {
29
  "cell_type": "code",
 
30
  "metadata": {},
 
31
  "source": [
32
- "import torch"
33
- ],
 
 
 
 
 
 
 
 
 
 
34
  "outputs": [],
35
- "execution_count": null
 
 
36
  },
37
  {
38
  "cell_type": "code",
 
39
  "metadata": {},
 
40
  "source": [
41
  "# ✅ SOLUTION\n",
42
  "\n",
43
  "def relu(x: torch.Tensor) -> torch.Tensor:\n",
44
  " return x * (x > 0).float()"
45
- ],
46
- "outputs": [],
47
- "execution_count": null
48
  },
49
  {
50
  "cell_type": "code",
 
51
  "metadata": {},
 
52
  "source": [
53
  "# Verify\n",
54
  "x = torch.tensor([-2., -1., 0., 1., 2.])\n",
55
  "print(\"Input: \", x)\n",
56
  "print(\"Output:\", relu(x))"
57
- ],
58
- "outputs": [],
59
- "execution_count": null
60
  },
61
  {
62
  "cell_type": "code",
 
63
  "metadata": {},
 
64
  "source": [
65
  "# Run judge\n",
66
  "from torch_judge import check\n",
67
  "check(\"relu\")"
68
- ],
69
- "outputs": [],
70
- "execution_count": null
71
  }
72
- ]
73
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "cells": [
3
  {
4
  "cell_type": "markdown",
5
+ "id": "0556419b",
6
  "metadata": {},
7
  "source": [
8
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb)\n",
9
+ "\n",
10
  "# 🟢 Solution: Implement ReLU\n",
11
  "\n",
12
  "Reference solution for the ReLU activation function.\n",
13
  "\n",
14
  "$$\\text{ReLU}(x) = \\max(0, x)$$"
15
+ ]
 
16
  },
17
  {
18
  "cell_type": "code",
19
+ "execution_count": null,
20
  "metadata": {},
21
+ "outputs": [],
22
  "source": [
23
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
24
+ "try:\n",
25
+ " import google.colab\n",
26
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
27
+ "except ImportError:\n",
28
+ " pass\n"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "code",
33
+ "execution_count": null,
34
+ "metadata": {},
35
  "outputs": [],
36
+ "source": [
37
+ "import torch"
38
+ ]
39
  },
40
  {
41
  "cell_type": "code",
42
+ "execution_count": null,
43
  "metadata": {},
44
+ "outputs": [],
45
  "source": [
46
  "# ✅ SOLUTION\n",
47
  "\n",
48
  "def relu(x: torch.Tensor) -> torch.Tensor:\n",
49
  " return x * (x > 0).float()"
50
+ ]
 
 
51
  },
52
  {
53
  "cell_type": "code",
54
+ "execution_count": null,
55
  "metadata": {},
56
+ "outputs": [],
57
  "source": [
58
  "# Verify\n",
59
  "x = torch.tensor([-2., -1., 0., 1., 2.])\n",
60
  "print(\"Input: \", x)\n",
61
  "print(\"Output:\", relu(x))"
62
+ ]
 
 
63
  },
64
  {
65
  "cell_type": "code",
66
+ "execution_count": null,
67
  "metadata": {},
68
+ "outputs": [],
69
  "source": [
70
  "# Run judge\n",
71
  "from torch_judge import check\n",
72
  "check(\"relu\")"
73
+ ]
 
 
74
  }
75
+ ],
76
+ "metadata": {
77
+ "kernelspec": {
78
+ "display_name": "Python 3",
79
+ "language": "python",
80
+ "name": "python3"
81
+ },
82
+ "language_info": {
83
+ "name": "python",
84
+ "version": "3.11.0"
85
+ }
86
+ },
87
+ "nbformat": 4,
88
+ "nbformat_minor": 5
89
+ }
solutions/02_softmax_solution.ipynb CHANGED
@@ -17,6 +17,7 @@
17
  "cell_type": "markdown",
18
  "metadata": {},
19
  "source": [
 
20
  "# 🟢 Solution: Implement Softmax\n",
21
  "\n",
22
  "Reference solution for the numerically-stable Softmax function.\n",
@@ -25,6 +26,20 @@
25
  ],
26
  "outputs": []
27
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
@@ -73,4 +88,4 @@
73
  "execution_count": null
74
  }
75
  ]
76
- }
 
17
  "cell_type": "markdown",
18
  "metadata": {},
19
  "source": [
20
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/02_softmax_solution.ipynb)\n\n",
21
  "# 🟢 Solution: Implement Softmax\n",
22
  "\n",
23
  "Reference solution for the numerically-stable Softmax function.\n",
 
26
  ],
27
  "outputs": []
28
  },
29
+ {
30
+ "cell_type": "code",
31
+ "metadata": {},
32
+ "source": [
33
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
34
+ "try:\n",
35
+ " import google.colab\n",
36
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
37
+ "except ImportError:\n",
38
+ " pass\n"
39
+ ],
40
+ "outputs": [],
41
+ "execution_count": null
42
+ },
43
  {
44
  "cell_type": "code",
45
  "metadata": {},
 
88
  "execution_count": null
89
  }
90
  ]
91
+ }
solutions/03_linear_solution.ipynb CHANGED
@@ -17,12 +17,27 @@
17
  "cell_type": "markdown",
18
  "metadata": {},
19
  "source": [
 
20
  "# 🟡 Solution: Simple Linear Layer\n",
21
  "\n",
22
  "Reference solution for a fully-connected linear layer: **y = xW^T + b**"
23
  ],
24
  "outputs": []
25
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  {
27
  "cell_type": "code",
28
  "metadata": {},
@@ -77,4 +92,4 @@
77
  "execution_count": null
78
  }
79
  ]
80
- }
 
17
  "cell_type": "markdown",
18
  "metadata": {},
19
  "source": [
20
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/03_linear_solution.ipynb)\n\n",
21
  "# 🟡 Solution: Simple Linear Layer\n",
22
  "\n",
23
  "Reference solution for a fully-connected linear layer: **y = xW^T + b**"
24
  ],
25
  "outputs": []
26
  },
27
+ {
28
+ "cell_type": "code",
29
+ "metadata": {},
30
+ "source": [
31
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
32
+ "try:\n",
33
+ " import google.colab\n",
34
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
35
+ "except ImportError:\n",
36
+ " pass\n"
37
+ ],
38
+ "outputs": [],
39
+ "execution_count": null
40
+ },
41
  {
42
  "cell_type": "code",
43
  "metadata": {},
 
92
  "execution_count": null
93
  }
94
  ]
95
+ }
solutions/04_layernorm_solution.ipynb CHANGED
@@ -17,6 +17,7 @@
17
  "cell_type": "markdown",
18
  "metadata": {},
19
  "source": [
 
20
  "# 🟡 Solution: Implement LayerNorm\n",
21
  "\n",
22
  "Reference solution for Layer Normalization.\n",
@@ -25,6 +26,20 @@
25
  ],
26
  "outputs": []
27
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
@@ -76,4 +91,4 @@
76
  "execution_count": null
77
  }
78
  ]
79
- }
 
17
  "cell_type": "markdown",
18
  "metadata": {},
19
  "source": [
20
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/04_layernorm_solution.ipynb)\n\n",
21
  "# 🟡 Solution: Implement LayerNorm\n",
22
  "\n",
23
  "Reference solution for Layer Normalization.\n",
 
26
  ],
27
  "outputs": []
28
  },
29
+ {
30
+ "cell_type": "code",
31
+ "metadata": {},
32
+ "source": [
33
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
34
+ "try:\n",
35
+ " import google.colab\n",
36
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
37
+ "except ImportError:\n",
38
+ " pass\n"
39
+ ],
40
+ "outputs": [],
41
+ "execution_count": null
42
+ },
43
  {
44
  "cell_type": "code",
45
  "metadata": {},
 
91
  "execution_count": null
92
  }
93
  ]
94
+ }
solutions/05_attention_solution.ipynb CHANGED
@@ -5,6 +5,7 @@
5
  "id": "5f63d076",
6
  "metadata": {},
7
  "source": [
 
8
  "# 🔴 Solution: Softmax Attention\n",
9
  "\n",
10
  "Reference solution for the core Transformer attention mechanism.\n",
@@ -12,6 +13,21 @@
12
  "$$\\text{Attention}(Q, K, V) = \\text{softmax}\\!\\left(\\frac{QK^T}{\\sqrt{d_k}}\\right)V$$"
13
  ]
14
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  {
16
  "cell_type": "code",
17
  "execution_count": null,
 
5
  "id": "5f63d076",
6
  "metadata": {},
7
  "source": [
8
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/05_attention_solution.ipynb)\n\n",
9
  "# 🔴 Solution: Softmax Attention\n",
10
  "\n",
11
  "Reference solution for the core Transformer attention mechanism.\n",
 
13
  "$$\\text{Attention}(Q, K, V) = \\text{softmax}\\!\\left(\\frac{QK^T}{\\sqrt{d_k}}\\right)V$$"
14
  ]
15
  },
16
+ {
17
+ "cell_type": "code",
18
+ "execution_count": null,
19
+ "id": "ce663fb0",
20
+ "metadata": {},
21
+ "outputs": [],
22
+ "source": [
23
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
24
+ "try:\n",
25
+ " import google.colab\n",
26
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
27
+ "except ImportError:\n",
28
+ " pass\n"
29
+ ]
30
+ },
31
  {
32
  "cell_type": "code",
33
  "execution_count": null,
solutions/06_multihead_attention_solution.ipynb CHANGED
@@ -1,105 +1,120 @@
1
  {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "metadata": {},
6
- "source": [
7
- "# 🔴 Solution: Multi-Head Attention\n",
8
- "\n",
9
- "Reference solution for the Multi-Head Attention mechanism.\n",
10
- "\n",
11
- "$$\\text{MultiHead}(Q, K, V) = \\text{Concat}(\\text{head}_1, \\dots, \\text{head}_h) W^O$$"
12
- ]
13
- },
14
- {
15
- "cell_type": "code",
16
- "execution_count": null,
17
- "metadata": {},
18
- "outputs": [],
19
- "source": [
20
- "import torch\n",
21
- "import torch.nn as nn\n",
22
- "import math"
23
- ]
24
- },
25
- {
26
- "cell_type": "code",
27
- "execution_count": null,
28
- "id": "46b73737",
29
- "metadata": {},
30
- "outputs": [],
31
- "source": [
32
- "# ✅ SOLUTION\n",
33
- "\n",
34
- "class MultiHeadAttention:\n",
35
- " def __init__(self, d_model: int, num_heads: int):\n",
36
- " self.num_heads = num_heads\n",
37
- " self.d_k = d_model // num_heads\n",
38
- "\n",
39
- " self.W_q = nn.Linear(d_model, d_model)\n",
40
- " self.W_k = nn.Linear(d_model, d_model)\n",
41
- " self.W_v = nn.Linear(d_model, d_model)\n",
42
- " self.W_o = nn.Linear(d_model, d_model)\n",
43
- "\n",
44
- " def forward(self, Q, K, V):\n",
45
- " B, S_q, _ = Q.shape\n",
46
- " S_k = K.shape[1]\n",
47
- "\n",
48
- " q = self.W_q(Q).view(B, S_q, self.num_heads, self.d_k).transpose(1, 2)\n",
49
- " k = self.W_k(K).view(B, S_k, self.num_heads, self.d_k).transpose(1, 2)\n",
50
- " v = self.W_v(V).view(B, S_k, self.num_heads, self.d_k).transpose(1, 2)\n",
51
- "\n",
52
- " scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)\n",
53
- " weights = torch.softmax(scores, dim=-1)\n",
54
- " attn = torch.matmul(weights, v)\n",
55
- "\n",
56
- " out = attn.transpose(1, 2).contiguous().view(B, S_q, -1)\n",
57
- " return self.W_o(out)"
58
- ]
59
- },
60
- {
61
- "cell_type": "code",
62
- "execution_count": null,
63
- "metadata": {},
64
- "outputs": [],
65
- "source": [
66
- "# Verify\n",
67
- "torch.manual_seed(0)\n",
68
- "mha = MultiHeadAttention(d_model=32, num_heads=4)\n",
69
- "x = torch.randn(2, 6, 32)\n",
70
- "out = mha.forward(x, x, x)\n",
71
- "print(\"Self-attn shape:\", out.shape)\n",
72
- "\n",
73
- "Q = torch.randn(1, 3, 32)\n",
74
- "K = torch.randn(1, 7, 32)\n",
75
- "V = torch.randn(1, 7, 32)\n",
76
- "out2 = mha.forward(Q, K, V)\n",
77
- "print(\"Cross-attn shape:\", out2.shape)"
78
- ]
79
- },
80
- {
81
- "cell_type": "code",
82
- "execution_count": null,
83
- "metadata": {},
84
- "outputs": [],
85
- "source": [
86
- "# Run judge\n",
87
- "from torch_judge import check\n",
88
- "check(\"mha\")"
89
- ]
90
- }
91
- ],
92
- "metadata": {
93
- "kernelspec": {
94
- "display_name": "Python 3",
95
- "language": "python",
96
- "name": "python3"
97
- },
98
- "language_info": {
99
- "name": "python",
100
- "version": "3.11.0"
101
- }
102
  },
103
- "nbformat": 4,
104
- "nbformat_minor": 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  }
 
1
  {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/06_multihead_attention_solution.ipynb)\n\n",
8
+ "# 🔴 Solution: Multi-Head Attention\n",
9
+ "\n",
10
+ "Reference solution for the Multi-Head Attention mechanism.\n",
11
+ "\n",
12
+ "$$\\text{MultiHead}(Q, K, V) = \\text{Concat}(\\text{head}_1, \\dots, \\text{head}_h) W^O$$"
13
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  },
15
+ {
16
+ "cell_type": "code",
17
+ "metadata": {},
18
+ "source": [
19
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
20
+ "try:\n",
21
+ " import google.colab\n",
22
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
23
+ "except ImportError:\n",
24
+ " pass\n"
25
+ ],
26
+ "outputs": [],
27
+ "execution_count": null
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": null,
32
+ "metadata": {},
33
+ "outputs": [],
34
+ "source": [
35
+ "import torch\n",
36
+ "import torch.nn as nn\n",
37
+ "import math"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": null,
43
+ "id": "46b73737",
44
+ "metadata": {},
45
+ "outputs": [],
46
+ "source": [
47
+ "# ✅ SOLUTION\n",
48
+ "\n",
49
+ "class MultiHeadAttention:\n",
50
+ " def __init__(self, d_model: int, num_heads: int):\n",
51
+ " self.num_heads = num_heads\n",
52
+ " self.d_k = d_model // num_heads\n",
53
+ "\n",
54
+ " self.W_q = nn.Linear(d_model, d_model)\n",
55
+ " self.W_k = nn.Linear(d_model, d_model)\n",
56
+ " self.W_v = nn.Linear(d_model, d_model)\n",
57
+ " self.W_o = nn.Linear(d_model, d_model)\n",
58
+ "\n",
59
+ " def forward(self, Q, K, V):\n",
60
+ " B, S_q, _ = Q.shape\n",
61
+ " S_k = K.shape[1]\n",
62
+ "\n",
63
+ " q = self.W_q(Q).view(B, S_q, self.num_heads, self.d_k).transpose(1, 2)\n",
64
+ " k = self.W_k(K).view(B, S_k, self.num_heads, self.d_k).transpose(1, 2)\n",
65
+ " v = self.W_v(V).view(B, S_k, self.num_heads, self.d_k).transpose(1, 2)\n",
66
+ "\n",
67
+ " scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)\n",
68
+ " weights = torch.softmax(scores, dim=-1)\n",
69
+ " attn = torch.matmul(weights, v)\n",
70
+ "\n",
71
+ " out = attn.transpose(1, 2).contiguous().view(B, S_q, -1)\n",
72
+ " return self.W_o(out)"
73
+ ]
74
+ },
75
+ {
76
+ "cell_type": "code",
77
+ "execution_count": null,
78
+ "metadata": {},
79
+ "outputs": [],
80
+ "source": [
81
+ "# Verify\n",
82
+ "torch.manual_seed(0)\n",
83
+ "mha = MultiHeadAttention(d_model=32, num_heads=4)\n",
84
+ "x = torch.randn(2, 6, 32)\n",
85
+ "out = mha.forward(x, x, x)\n",
86
+ "print(\"Self-attn shape:\", out.shape)\n",
87
+ "\n",
88
+ "Q = torch.randn(1, 3, 32)\n",
89
+ "K = torch.randn(1, 7, 32)\n",
90
+ "V = torch.randn(1, 7, 32)\n",
91
+ "out2 = mha.forward(Q, K, V)\n",
92
+ "print(\"Cross-attn shape:\", out2.shape)"
93
+ ]
94
+ },
95
+ {
96
+ "cell_type": "code",
97
+ "execution_count": null,
98
+ "metadata": {},
99
+ "outputs": [],
100
+ "source": [
101
+ "# Run judge\n",
102
+ "from torch_judge import check\n",
103
+ "check(\"mha\")"
104
+ ]
105
+ }
106
+ ],
107
+ "metadata": {
108
+ "kernelspec": {
109
+ "display_name": "Python 3",
110
+ "language": "python",
111
+ "name": "python3"
112
+ },
113
+ "language_info": {
114
+ "name": "python",
115
+ "version": "3.11.0"
116
+ }
117
+ },
118
+ "nbformat": 4,
119
+ "nbformat_minor": 5
120
  }
solutions/07_batchnorm_solution.ipynb CHANGED
@@ -1,117 +1,132 @@
1
  {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "id": "ffd42526",
6
- "metadata": {},
7
- "source": [
8
- "# 🟡 Solution: Implement BatchNorm\n",
9
- "\n",
10
- "Reference solution for Batch Normalization with both **training** and **inference** behavior, including running mean/variance updates."
11
- ]
12
- },
13
- {
14
- "cell_type": "code",
15
- "execution_count": null,
16
- "metadata": {},
17
- "outputs": [],
18
- "source": [
19
- "import torch"
20
- ]
21
- },
22
- {
23
- "cell_type": "code",
24
- "execution_count": null,
25
- "id": "70488b9f",
26
- "metadata": {},
27
- "outputs": [],
28
- "source": [
29
- "# ✅ SOLUTION\n",
30
- "\n",
31
- "import torch\n",
32
- "\n",
33
- "def my_batch_norm(\n",
34
- " x,\n",
35
- " gamma,\n",
36
- " beta,\n",
37
- " running_mean,\n",
38
- " running_var,\n",
39
- " eps=1e-5,\n",
40
- " momentum=0.1,\n",
41
- " training=True,\n",
42
- "):\n",
43
- " \"\"\"BatchNorm with train/eval behavior and running stats.\n",
44
- "\n",
45
- " - Training: use batch stats, update running_mean / running_var in-place.\n",
46
- " - Inference: use running_mean / running_var as-is.\n",
47
- " \"\"\"\n",
48
- " if training:\n",
49
- " batch_mean = x.mean(dim=0)\n",
50
- " batch_var = x.var(dim=0, unbiased=False)\n",
51
- "\n",
52
- " # Update running statistics in-place. Detach to avoid tracking gradients.\n",
53
- " running_mean.mul_(1 - momentum).add_(momentum * batch_mean.detach())\n",
54
- " running_var.mul_(1 - momentum).add_(momentum * batch_var.detach())\n",
55
- "\n",
56
- " mean = batch_mean\n",
57
- " var = batch_var\n",
58
- " else:\n",
59
- " mean = running_mean\n",
60
- " var = running_var\n",
61
- "\n",
62
- " x_norm = (x - mean) / torch.sqrt(var + eps)\n",
63
- " return gamma * x_norm + beta"
64
- ]
65
- },
66
- {
67
- "cell_type": "code",
68
- "execution_count": null,
69
- "id": "dbd7bb4e",
70
- "metadata": {},
71
- "outputs": [],
72
- "source": [
73
- "# Verify\n",
74
- "x = torch.randn(8, 4)\n",
75
- "gamma = torch.ones(4)\n",
76
- "beta = torch.zeros(4)\n",
77
- "\n",
78
- "running_mean = torch.zeros(4)\n",
79
- "running_var = torch.ones(4)\n",
80
- "\n",
81
- "# Training behavior: normalize with batch stats and update running stats\n",
82
- "out_train = my_batch_norm(x, gamma, beta, running_mean, running_var, training=True)\n",
83
- "print(\"[Train] Column means:\", out_train.mean(dim=0))\n",
84
- "print(\"[Train] Column stds: \", out_train.std(dim=0))\n",
85
- "print(\"Updated running_mean:\", running_mean)\n",
86
- "print(\"Updated running_var:\", running_var)\n",
87
- "\n",
88
- "# Inference behavior: use running_mean / running_var only\n",
89
- "out_eval = my_batch_norm(x, gamma, beta, running_mean, running_var, training=False)\n",
90
- "print(\"[Eval] Column means (using running stats):\", out_eval.mean(dim=0))"
91
- ]
92
- },
93
- {
94
- "cell_type": "code",
95
- "execution_count": null,
96
- "metadata": {},
97
- "outputs": [],
98
- "source": [
99
- "from torch_judge import check\n",
100
- "check('batchnorm')"
101
- ]
102
- }
103
- ],
104
- "metadata": {
105
- "kernelspec": {
106
- "display_name": "Python 3",
107
- "language": "python",
108
- "name": "python3"
109
- },
110
- "language_info": {
111
- "name": "python",
112
- "version": "3.11.0"
113
- }
114
  },
115
- "nbformat": 4,
116
- "nbformat_minor": 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  }
 
1
  {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "ffd42526",
6
+ "metadata": {},
7
+ "source": [
8
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/07_batchnorm_solution.ipynb)\n\n",
9
+ "# 🟡 Solution: Implement BatchNorm\n",
10
+ "\n",
11
+ "Reference solution for Batch Normalization with both **training** and **inference** behavior, including running mean/variance updates."
12
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": null,
31
+ "metadata": {},
32
+ "outputs": [],
33
+ "source": [
34
+ "import torch"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": null,
40
+ "id": "70488b9f",
41
+ "metadata": {},
42
+ "outputs": [],
43
+ "source": [
44
+ "# ✅ SOLUTION\n",
45
+ "\n",
46
+ "import torch\n",
47
+ "\n",
48
+ "def my_batch_norm(\n",
49
+ " x,\n",
50
+ " gamma,\n",
51
+ " beta,\n",
52
+ " running_mean,\n",
53
+ " running_var,\n",
54
+ " eps=1e-5,\n",
55
+ " momentum=0.1,\n",
56
+ " training=True,\n",
57
+ "):\n",
58
+ " \"\"\"BatchNorm with train/eval behavior and running stats.\n",
59
+ "\n",
60
+ " - Training: use batch stats, update running_mean / running_var in-place.\n",
61
+ " - Inference: use running_mean / running_var as-is.\n",
62
+ " \"\"\"\n",
63
+ " if training:\n",
64
+ " batch_mean = x.mean(dim=0)\n",
65
+ " batch_var = x.var(dim=0, unbiased=False)\n",
66
+ "\n",
67
+ " # Update running statistics in-place. Detach to avoid tracking gradients.\n",
68
+ " running_mean.mul_(1 - momentum).add_(momentum * batch_mean.detach())\n",
69
+ " running_var.mul_(1 - momentum).add_(momentum * batch_var.detach())\n",
70
+ "\n",
71
+ " mean = batch_mean\n",
72
+ " var = batch_var\n",
73
+ " else:\n",
74
+ " mean = running_mean\n",
75
+ " var = running_var\n",
76
+ "\n",
77
+ " x_norm = (x - mean) / torch.sqrt(var + eps)\n",
78
+ " return gamma * x_norm + beta"
79
+ ]
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": null,
84
+ "id": "dbd7bb4e",
85
+ "metadata": {},
86
+ "outputs": [],
87
+ "source": [
88
+ "# Verify\n",
89
+ "x = torch.randn(8, 4)\n",
90
+ "gamma = torch.ones(4)\n",
91
+ "beta = torch.zeros(4)\n",
92
+ "\n",
93
+ "running_mean = torch.zeros(4)\n",
94
+ "running_var = torch.ones(4)\n",
95
+ "\n",
96
+ "# Training behavior: normalize with batch stats and update running stats\n",
97
+ "out_train = my_batch_norm(x, gamma, beta, running_mean, running_var, training=True)\n",
98
+ "print(\"[Train] Column means:\", out_train.mean(dim=0))\n",
99
+ "print(\"[Train] Column stds: \", out_train.std(dim=0))\n",
100
+ "print(\"Updated running_mean:\", running_mean)\n",
101
+ "print(\"Updated running_var:\", running_var)\n",
102
+ "\n",
103
+ "# Inference behavior: use running_mean / running_var only\n",
104
+ "out_eval = my_batch_norm(x, gamma, beta, running_mean, running_var, training=False)\n",
105
+ "print(\"[Eval] Column means (using running stats):\", out_eval.mean(dim=0))"
106
+ ]
107
+ },
108
+ {
109
+ "cell_type": "code",
110
+ "execution_count": null,
111
+ "metadata": {},
112
+ "outputs": [],
113
+ "source": [
114
+ "from torch_judge import check\n",
115
+ "check('batchnorm')"
116
+ ]
117
+ }
118
+ ],
119
+ "metadata": {
120
+ "kernelspec": {
121
+ "display_name": "Python 3",
122
+ "language": "python",
123
+ "name": "python3"
124
+ },
125
+ "language_info": {
126
+ "name": "python",
127
+ "version": "3.11.0"
128
+ }
129
+ },
130
+ "nbformat": 4,
131
+ "nbformat_minor": 5
132
  }
solutions/08_rmsnorm_solution.ipynb CHANGED
@@ -17,12 +17,27 @@
17
  "cell_type": "markdown",
18
  "metadata": {},
19
  "source": [
 
20
  "# 🟡 Solution: Implement RMSNorm\n",
21
  "\n",
22
  "Reference solution for Root Mean Square Normalization."
23
  ],
24
  "outputs": []
25
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  {
27
  "cell_type": "code",
28
  "metadata": {},
@@ -67,4 +82,4 @@
67
  "execution_count": null
68
  }
69
  ]
70
- }
 
17
  "cell_type": "markdown",
18
  "metadata": {},
19
  "source": [
20
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/08_rmsnorm_solution.ipynb)\n\n",
21
  "# 🟡 Solution: Implement RMSNorm\n",
22
  "\n",
23
  "Reference solution for Root Mean Square Normalization."
24
  ],
25
  "outputs": []
26
  },
27
+ {
28
+ "cell_type": "code",
29
+ "metadata": {},
30
+ "source": [
31
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
32
+ "try:\n",
33
+ " import google.colab\n",
34
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
35
+ "except ImportError:\n",
36
+ " pass\n"
37
+ ],
38
+ "outputs": [],
39
+ "execution_count": null
40
+ },
41
  {
42
  "cell_type": "code",
43
  "metadata": {},
 
82
  "execution_count": null
83
  }
84
  ]
85
+ }
solutions/09_causal_attention_solution.ipynb CHANGED
@@ -17,12 +17,27 @@
17
  "cell_type": "markdown",
18
  "metadata": {},
19
  "source": [
 
20
  "# 🔴 Solution: Causal Self-Attention\n",
21
  "\n",
22
  "Reference solution — softmax attention with an upper-triangular mask."
23
  ],
24
  "outputs": []
25
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  {
27
  "cell_type": "code",
28
  "metadata": {},
@@ -77,4 +92,4 @@
77
  "execution_count": null
78
  }
79
  ]
80
- }
 
17
  "cell_type": "markdown",
18
  "metadata": {},
19
  "source": [
20
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/09_causal_attention_solution.ipynb)\n\n",
21
  "# 🔴 Solution: Causal Self-Attention\n",
22
  "\n",
23
  "Reference solution — softmax attention with an upper-triangular mask."
24
  ],
25
  "outputs": []
26
  },
27
+ {
28
+ "cell_type": "code",
29
+ "metadata": {},
30
+ "source": [
31
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
32
+ "try:\n",
33
+ " import google.colab\n",
34
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
35
+ "except ImportError:\n",
36
+ " pass\n"
37
+ ],
38
+ "outputs": [],
39
+ "execution_count": null
40
+ },
41
  {
42
  "cell_type": "code",
43
  "metadata": {},
 
92
  "execution_count": null
93
  }
94
  ]
95
+ }
solutions/10_gqa_solution.ipynb CHANGED
@@ -17,12 +17,27 @@
17
  "cell_type": "markdown",
18
  "metadata": {},
19
  "source": [
 
20
  "# 🔴 Solution: Grouped Query Attention\n",
21
  "\n",
22
  "Reference solution for GQA — MHA with shared KV heads."
23
  ],
24
  "outputs": []
25
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  {
27
  "cell_type": "code",
28
  "metadata": {},
@@ -88,4 +103,4 @@
88
  "execution_count": null
89
  }
90
  ]
91
- }
 
17
  "cell_type": "markdown",
18
  "metadata": {},
19
  "source": [
20
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/10_gqa_solution.ipynb)\n\n",
21
  "# 🔴 Solution: Grouped Query Attention\n",
22
  "\n",
23
  "Reference solution for GQA — MHA with shared KV heads."
24
  ],
25
  "outputs": []
26
  },
27
+ {
28
+ "cell_type": "code",
29
+ "metadata": {},
30
+ "source": [
31
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
32
+ "try:\n",
33
+ " import google.colab\n",
34
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
35
+ "except ImportError:\n",
36
+ " pass\n"
37
+ ],
38
+ "outputs": [],
39
+ "execution_count": null
40
+ },
41
  {
42
  "cell_type": "code",
43
  "metadata": {},
 
103
  "execution_count": null
104
  }
105
  ]
106
+ }
solutions/11_sliding_window_solution.ipynb CHANGED
@@ -17,12 +17,27 @@
17
  "cell_type": "markdown",
18
  "metadata": {},
19
  "source": [
 
20
  "# 🔴 Solution: Sliding Window Attention\n",
21
  "\n",
22
  "Reference solution — softmax attention with a band mask."
23
  ],
24
  "outputs": []
25
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  {
27
  "cell_type": "code",
28
  "metadata": {},
@@ -73,4 +88,4 @@
73
  "execution_count": null
74
  }
75
  ]
76
- }
 
17
  "cell_type": "markdown",
18
  "metadata": {},
19
  "source": [
20
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/11_sliding_window_solution.ipynb)\n\n",
21
  "# 🔴 Solution: Sliding Window Attention\n",
22
  "\n",
23
  "Reference solution — softmax attention with a band mask."
24
  ],
25
  "outputs": []
26
  },
27
+ {
28
+ "cell_type": "code",
29
+ "metadata": {},
30
+ "source": [
31
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
32
+ "try:\n",
33
+ " import google.colab\n",
34
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
35
+ "except ImportError:\n",
36
+ " pass\n"
37
+ ],
38
+ "outputs": [],
39
+ "execution_count": null
40
+ },
41
  {
42
  "cell_type": "code",
43
  "metadata": {},
 
88
  "execution_count": null
89
  }
90
  ]
91
+ }
solutions/12_linear_attention_solution.ipynb CHANGED
@@ -17,12 +17,27 @@
17
  "cell_type": "markdown",
18
  "metadata": {},
19
  "source": [
 
20
  "# 🔴 Solution: Linear Self-Attention\n",
21
  "\n",
22
  "Reference solution — kernel-based attention with elu+1 feature map."
23
  ],
24
  "outputs": []
25
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  {
27
  "cell_type": "code",
28
  "metadata": {},
@@ -72,4 +87,4 @@
72
  "execution_count": null
73
  }
74
  ]
75
- }
 
17
  "cell_type": "markdown",
18
  "metadata": {},
19
  "source": [
20
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/12_linear_attention_solution.ipynb)\n\n",
21
  "# 🔴 Solution: Linear Self-Attention\n",
22
  "\n",
23
  "Reference solution — kernel-based attention with elu+1 feature map."
24
  ],
25
  "outputs": []
26
  },
27
+ {
28
+ "cell_type": "code",
29
+ "metadata": {},
30
+ "source": [
31
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
32
+ "try:\n",
33
+ " import google.colab\n",
34
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
35
+ "except ImportError:\n",
36
+ " pass\n"
37
+ ],
38
+ "outputs": [],
39
+ "execution_count": null
40
+ },
41
  {
42
  "cell_type": "code",
43
  "metadata": {},
 
87
  "execution_count": null
88
  }
89
  ]
90
+ }
solutions/13_gpt2_block_solution.ipynb CHANGED
@@ -17,12 +17,27 @@
17
  "cell_type": "markdown",
18
  "metadata": {},
19
  "source": [
 
20
  "# 🔴 Solution: GPT-2 Transformer Block\n",
21
  "\n",
22
  "Reference solution — pre-norm, causal self-attention, 4x MLP with GELU."
23
  ],
24
  "outputs": []
25
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  {
27
  "cell_type": "code",
28
  "metadata": {},
@@ -102,4 +117,4 @@
102
  "execution_count": null
103
  }
104
  ]
105
- }
 
17
  "cell_type": "markdown",
18
  "metadata": {},
19
  "source": [
20
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/13_gpt2_block_solution.ipynb)\n\n",
21
  "# 🔴 Solution: GPT-2 Transformer Block\n",
22
  "\n",
23
  "Reference solution — pre-norm, causal self-attention, 4x MLP with GELU."
24
  ],
25
  "outputs": []
26
  },
27
+ {
28
+ "cell_type": "code",
29
+ "metadata": {},
30
+ "source": [
31
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
32
+ "try:\n",
33
+ " import google.colab\n",
34
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
35
+ "except ImportError:\n",
36
+ " pass\n"
37
+ ],
38
+ "outputs": [],
39
+ "execution_count": null
40
+ },
41
  {
42
  "cell_type": "code",
43
  "metadata": {},
 
117
  "execution_count": null
118
  }
119
  ]
120
+ }
solutions/14_kv_cache_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
- "# \ud83d\udd34 Solution: KV Cache Attention\n",
 
8
  "\n",
9
- "Reference solution \u2014 multi-head attention with KV caching for autoregressive inference."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -26,7 +41,7 @@
26
  "metadata": {},
27
  "outputs": [],
28
  "source": [
29
- "# \u2705 SOLUTION\n",
30
  "\n",
31
  "class KVCacheAttention(nn.Module):\n",
32
  " def __init__(self, d_model, num_heads):\n",
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/14_kv_cache_solution.ipynb)\n\n",
8
+ "# 🔴 Solution: KV Cache Attention\n",
9
  "\n",
10
+ "Reference solution multi-head attention with KV caching for autoregressive inference."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
41
  "metadata": {},
42
  "outputs": [],
43
  "source": [
44
+ "# SOLUTION\n",
45
  "\n",
46
  "class KVCacheAttention(nn.Module):\n",
47
  " def __init__(self, d_model, num_heads):\n",
solutions/15_mlp_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
- "# \ud83d\udfe0 Solution: SwiGLU MLP\n",
 
8
  "\n",
9
- "Reference solution \u2014 gated feed-forward network used in LLaMA, Mistral, and PaLM."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -26,7 +41,7 @@
26
  "metadata": {},
27
  "outputs": [],
28
  "source": [
29
- "# \u2705 SOLUTION\n",
30
  "\n",
31
  "class SwiGLUMLP(nn.Module):\n",
32
  " def __init__(self, d_model, d_ff):\n",
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/15_mlp_solution.ipynb)\n\n",
8
+ "# 🟠 Solution: SwiGLU MLP\n",
9
  "\n",
10
+ "Reference solution gated feed-forward network used in LLaMA, Mistral, and PaLM."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
41
  "metadata": {},
42
  "outputs": [],
43
  "source": [
44
+ "# SOLUTION\n",
45
  "\n",
46
  "class SwiGLUMLP(nn.Module):\n",
47
  " def __init__(self, d_model, d_ff):\n",
solutions/16_cross_entropy_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: Cross-Entropy Loss\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -69,4 +84,4 @@
69
  },
70
  "nbformat": 4,
71
  "nbformat_minor": 4
72
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/16_cross_entropy_solution.ipynb)\n\n",
8
  "# Solution: Cross-Entropy Loss\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
84
  },
85
  "nbformat": 4,
86
  "nbformat_minor": 4
87
+ }
solutions/17_dropout_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: Implement Dropout\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -79,4 +94,4 @@
79
  },
80
  "nbformat": 4,
81
  "nbformat_minor": 4
82
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/17_dropout_solution.ipynb)\n\n",
8
  "# Solution: Implement Dropout\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
94
  },
95
  "nbformat": 4,
96
  "nbformat_minor": 4
97
+ }
solutions/18_embedding_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: Embedding Layer\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -74,4 +89,4 @@
74
  },
75
  "nbformat": 4,
76
  "nbformat_minor": 4
77
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/18_embedding_solution.ipynb)\n\n",
8
  "# Solution: Embedding Layer\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
89
  },
90
  "nbformat": 4,
91
  "nbformat_minor": 4
92
+ }
solutions/19_gelu_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: GELU Activation\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -68,4 +83,4 @@
68
  },
69
  "nbformat": 4,
70
  "nbformat_minor": 4
71
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/19_gelu_solution.ipynb)\n\n",
8
  "# Solution: GELU Activation\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
83
  },
84
  "nbformat": 4,
85
  "nbformat_minor": 4
86
+ }
solutions/20_weight_init_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: Kaiming Initialization\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -74,4 +89,4 @@
74
  },
75
  "nbformat": 4,
76
  "nbformat_minor": 4
77
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/20_weight_init_solution.ipynb)\n\n",
8
  "# Solution: Kaiming Initialization\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
89
  },
90
  "nbformat": 4,
91
  "nbformat_minor": 4
92
+ }
solutions/21_gradient_clipping_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: Gradient Norm Clipping\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -76,4 +91,4 @@
76
  },
77
  "nbformat": 4,
78
  "nbformat_minor": 4
79
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/21_gradient_clipping_solution.ipynb)\n\n",
8
  "# Solution: Gradient Norm Clipping\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
91
  },
92
  "nbformat": 4,
93
  "nbformat_minor": 4
94
+ }
solutions/22_conv2d_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: 2D Convolution\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -79,4 +94,4 @@
79
  },
80
  "nbformat": 4,
81
  "nbformat_minor": 4
82
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/22_conv2d_solution.ipynb)\n\n",
8
  "# Solution: 2D Convolution\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
94
  },
95
  "nbformat": 4,
96
  "nbformat_minor": 4
97
+ }
solutions/23_cross_attention_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: Multi-Head Cross-Attention\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -88,4 +103,4 @@
88
  },
89
  "nbformat": 4,
90
  "nbformat_minor": 4
91
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/23_cross_attention_solution.ipynb)\n\n",
8
  "# Solution: Multi-Head Cross-Attention\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
103
  },
104
  "nbformat": 4,
105
  "nbformat_minor": 4
106
+ }
solutions/24_rope_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: Rotary Position Embedding (RoPE)\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -83,4 +98,4 @@
83
  },
84
  "nbformat": 4,
85
  "nbformat_minor": 4
86
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/24_rope_solution.ipynb)\n\n",
8
  "# Solution: Rotary Position Embedding (RoPE)\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
98
  },
99
  "nbformat": 4,
100
  "nbformat_minor": 4
101
+ }
solutions/25_flash_attention_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: Flash Attention (Tiled)\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -92,4 +107,4 @@
92
  },
93
  "nbformat": 4,
94
  "nbformat_minor": 4
95
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/25_flash_attention_solution.ipynb)\n\n",
8
  "# Solution: Flash Attention (Tiled)\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
107
  },
108
  "nbformat": 4,
109
  "nbformat_minor": 4
110
+ }
solutions/26_lora_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: LoRA (Low-Rank Adaptation)\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -81,4 +96,4 @@
81
  },
82
  "nbformat": 4,
83
  "nbformat_minor": 4
84
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/26_lora_solution.ipynb)\n\n",
8
  "# Solution: LoRA (Low-Rank Adaptation)\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
96
  },
97
  "nbformat": 4,
98
  "nbformat_minor": 4
99
+ }
solutions/27_vit_patch_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: ViT Patch Embedding\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -81,4 +96,4 @@
81
  },
82
  "nbformat": 4,
83
  "nbformat_minor": 4
84
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/27_vit_patch_solution.ipynb)\n\n",
8
  "# Solution: ViT Patch Embedding\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
96
  },
97
  "nbformat": 4,
98
  "nbformat_minor": 4
99
+ }
solutions/28_moe_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: Mixture of Experts (MoE)\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -94,4 +109,4 @@
94
  },
95
  "nbformat": 4,
96
  "nbformat_minor": 4
97
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/28_moe_solution.ipynb)\n\n",
8
  "# Solution: Mixture of Experts (MoE)\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
109
  },
110
  "nbformat": 4,
111
  "nbformat_minor": 4
112
+ }
solutions/29_adam_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: Adam Optimizer\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -97,4 +112,4 @@
97
  },
98
  "nbformat": 4,
99
  "nbformat_minor": 4
100
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/29_adam_solution.ipynb)\n\n",
8
  "# Solution: Adam Optimizer\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
112
  },
113
  "nbformat": 4,
114
  "nbformat_minor": 4
115
+ }
solutions/30_cosine_lr_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: Cosine LR Scheduler with Warmup\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -71,4 +86,4 @@
71
  },
72
  "nbformat": 4,
73
  "nbformat_minor": 4
74
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/30_cosine_lr_solution.ipynb)\n\n",
8
  "# Solution: Cosine LR Scheduler with Warmup\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
86
  },
87
  "nbformat": 4,
88
  "nbformat_minor": 4
89
+ }
solutions/31_gradient_accumulation_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: Gradient Accumulation\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -78,4 +93,4 @@
78
  },
79
  "nbformat": 4,
80
  "nbformat_minor": 4
81
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/31_gradient_accumulation_solution.ipynb)\n\n",
8
  "# Solution: Gradient Accumulation\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
93
  },
94
  "nbformat": 4,
95
  "nbformat_minor": 4
96
+ }
solutions/32_topk_sampling_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: Top-k / Top-p Sampling\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -79,4 +94,4 @@
79
  },
80
  "nbformat": 4,
81
  "nbformat_minor": 4
82
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/32_topk_sampling_solution.ipynb)\n\n",
8
  "# Solution: Top-k / Top-p Sampling\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
94
  },
95
  "nbformat": 4,
96
  "nbformat_minor": 4
97
+ }
solutions/33_beam_search_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: Beam Search Decoding\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -88,4 +103,4 @@
88
  },
89
  "nbformat": 4,
90
  "nbformat_minor": 4
91
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/33_beam_search_solution.ipynb)\n\n",
8
  "# Solution: Beam Search Decoding\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
103
  },
104
  "nbformat": 4,
105
  "nbformat_minor": 4
106
+ }
solutions/34_speculative_decoding_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: Speculative Decoding\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -84,4 +99,4 @@
84
  },
85
  "nbformat": 4,
86
  "nbformat_minor": 4
87
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/34_speculative_decoding_solution.ipynb)\n\n",
8
  "# Solution: Speculative Decoding\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
99
  },
100
  "nbformat": 4,
101
  "nbformat_minor": 4
102
+ }
solutions/35_bpe_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: Byte-Pair Encoding (BPE)\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -113,4 +128,4 @@
113
  },
114
  "nbformat": 4,
115
  "nbformat_minor": 4
116
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/35_bpe_solution.ipynb)\n\n",
8
  "# Solution: Byte-Pair Encoding (BPE)\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
128
  },
129
  "nbformat": 4,
130
  "nbformat_minor": 4
131
+ }
solutions/36_int8_quantization_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: INT8 Quantized Linear\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -83,4 +98,4 @@
83
  },
84
  "nbformat": 4,
85
  "nbformat_minor": 4
86
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/36_int8_quantization_solution.ipynb)\n\n",
8
  "# Solution: INT8 Quantized Linear\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
98
  },
99
  "nbformat": 4,
100
  "nbformat_minor": 4
101
+ }
solutions/37_dpo_loss_solution.ipynb CHANGED
@@ -4,12 +4,27 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: DPO (Direct Preference Optimization) Loss\n",
8
  "\n",
9
  "Reference solution."
10
  ],
11
  "outputs": []
12
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  {
14
  "cell_type": "code",
15
  "metadata": {},
@@ -73,4 +88,4 @@
73
  },
74
  "nbformat": 4,
75
  "nbformat_minor": 4
76
- }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/37_dpo_loss_solution.ipynb)\n\n",
8
  "# Solution: DPO (Direct Preference Optimization) Loss\n",
9
  "\n",
10
  "Reference solution."
11
  ],
12
  "outputs": []
13
  },
14
+ {
15
+ "cell_type": "code",
16
+ "metadata": {},
17
+ "source": [
18
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
19
+ "try:\n",
20
+ " import google.colab\n",
21
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
22
+ "except ImportError:\n",
23
+ " pass\n"
24
+ ],
25
+ "outputs": [],
26
+ "execution_count": null
27
+ },
28
  {
29
  "cell_type": "code",
30
  "metadata": {},
 
88
  },
89
  "nbformat": 4,
90
  "nbformat_minor": 4
91
+ }
solutions/38_grpo_loss_solution.ipynb CHANGED
@@ -4,11 +4,26 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: GRPO (Group Relative Policy Optimization) Loss\n",
8
  "\n",
9
  "Reference solution."
10
  ]
11
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  {
13
  "cell_type": "code",
14
  "execution_count": null,
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/38_grpo_loss_solution.ipynb)\n\n",
8
  "# Solution: GRPO (Group Relative Policy Optimization) Loss\n",
9
  "\n",
10
  "Reference solution."
11
  ]
12
  },
13
+ {
14
+ "cell_type": "code",
15
+ "metadata": {},
16
+ "source": [
17
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
18
+ "try:\n",
19
+ " import google.colab\n",
20
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
21
+ "except ImportError:\n",
22
+ " pass\n"
23
+ ],
24
+ "outputs": [],
25
+ "execution_count": null
26
+ },
27
  {
28
  "cell_type": "code",
29
  "execution_count": null,
solutions/39_ppo_loss_solution.ipynb CHANGED
@@ -4,11 +4,26 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
 
7
  "# Solution: PPO Clipped Loss\n",
8
  "\n",
9
  "Reference solution for the PPO clipped surrogate loss task.\n"
10
  ]
11
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  {
13
  "cell_type": "code",
14
  "execution_count": null,
@@ -89,4 +104,3 @@
89
  "nbformat": 4,
90
  "nbformat_minor": 5
91
  }
92
-
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/39_ppo_loss_solution.ipynb)\n\n",
8
  "# Solution: PPO Clipped Loss\n",
9
  "\n",
10
  "Reference solution for the PPO clipped surrogate loss task.\n"
11
  ]
12
  },
13
+ {
14
+ "cell_type": "code",
15
+ "metadata": {},
16
+ "source": [
17
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
18
+ "try:\n",
19
+ " import google.colab\n",
20
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
21
+ "except ImportError:\n",
22
+ " pass\n"
23
+ ],
24
+ "outputs": [],
25
+ "execution_count": null
26
+ },
27
  {
28
  "cell_type": "code",
29
  "execution_count": null,
 
104
  "nbformat": 4,
105
  "nbformat_minor": 5
106
  }
 
solutions/40_linear_regression_solution.ipynb CHANGED
@@ -1,125 +1,140 @@
1
  {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "metadata": {},
6
- "source": [
7
- "# 🟡 Solution: Linear Regression\n",
8
- "\n",
9
- "Reference solution demonstrating closed-form, gradient descent, and nn.Linear approaches."
10
- ]
11
- },
12
- {
13
- "cell_type": "code",
14
- "metadata": {},
15
- "outputs": [],
16
- "source": [
17
- "import torch\n",
18
- "import torch.nn as nn"
19
- ],
20
- "execution_count": null
21
- },
22
- {
23
- "cell_type": "code",
24
- "metadata": {},
25
- "outputs": [],
26
- "source": [
27
- "# ✅ SOLUTION\n",
28
- "\n",
29
- "class LinearRegression:\n",
30
- " def closed_form(self, X: torch.Tensor, y: torch.Tensor):\n",
31
- " \"\"\"Normal equation via augmented matrix.\"\"\"\n",
32
- " N, D = X.shape\n",
33
- " # Augment X with ones column for bias\n",
34
- " X_aug = torch.cat([X, torch.ones(N, 1)], dim=1) # (N, D+1)\n",
35
- " # Solve (X^T X) theta = X^T y\n",
36
- " theta = torch.linalg.lstsq(X_aug, y).solution # (D+1,)\n",
37
- " w = theta[:D]\n",
38
- " b = theta[D]\n",
39
- " return w.detach(), b.detach()\n",
40
- "\n",
41
- " def gradient_descent(self, X: torch.Tensor, y: torch.Tensor,\n",
42
- " lr: float = 0.01, steps: int = 1000):\n",
43
- " \"\"\"Manual gradient computation — no autograd.\"\"\"\n",
44
- " N, D = X.shape\n",
45
- " w = torch.zeros(D)\n",
46
- " b = torch.tensor(0.0)\n",
47
- "\n",
48
- " for _ in range(steps):\n",
49
- " pred = X @ w + b # (N,)\n",
50
- " error = pred - y # (N,)\n",
51
- " grad_w = (2.0 / N) * (X.T @ error) # (D,)\n",
52
- " grad_b = (2.0 / N) * error.sum() # scalar\n",
53
- " w = w - lr * grad_w\n",
54
- " b = b - lr * grad_b\n",
55
- "\n",
56
- " return w, b\n",
57
- "\n",
58
- " def nn_linear(self, X: torch.Tensor, y: torch.Tensor,\n",
59
- " lr: float = 0.01, steps: int = 1000):\n",
60
- " \"\"\"PyTorch nn.Linear with autograd training loop.\"\"\"\n",
61
- " N, D = X.shape\n",
62
- " layer = nn.Linear(D, 1)\n",
63
- " optimizer = torch.optim.SGD(layer.parameters(), lr=lr)\n",
64
- " loss_fn = nn.MSELoss()\n",
65
- "\n",
66
- " for _ in range(steps):\n",
67
- " optimizer.zero_grad()\n",
68
- " pred = layer(X).squeeze(-1) # (N,)\n",
69
- " loss = loss_fn(pred, y)\n",
70
- " loss.backward()\n",
71
- " optimizer.step()\n",
72
- "\n",
73
- " w = layer.weight.data.squeeze(0) # (D,)\n",
74
- " b = layer.bias.data.squeeze(0) # scalar ()\n",
75
- " return w, b"
76
- ],
77
- "execution_count": null
78
- },
79
- {
80
- "cell_type": "code",
81
- "metadata": {},
82
- "outputs": [],
83
- "source": [
84
- "# Verify\n",
85
- "torch.manual_seed(42)\n",
86
- "X = torch.randn(100, 3)\n",
87
- "true_w = torch.tensor([2.0, -1.0, 0.5])\n",
88
- "y = X @ true_w + 3.0\n",
89
- "\n",
90
- "model = LinearRegression()\n",
91
- "for name, method in [(\"Closed-form\", model.closed_form),\n",
92
- " (\"Grad Descent\", lambda X, y: model.gradient_descent(X, y, lr=0.05, steps=2000)),\n",
93
- " (\"nn.Linear\", lambda X, y: model.nn_linear(X, y, lr=0.05, steps=2000))]:\n",
94
- " w, b = method(X, y)\n",
95
- " print(f\"{name:13s} w={w.tolist()} b={b.item():.4f}\")\n",
96
- "print(f\"{'True':13s} w={true_w.tolist()} b=3.0000\")"
97
- ],
98
- "execution_count": null
99
- },
100
- {
101
- "cell_type": "code",
102
- "metadata": {},
103
- "outputs": [],
104
- "source": [
105
- "# ✅ SUBMIT\n",
106
- "from torch_judge import check\n",
107
- "check(\"linear_regression\")"
108
- ],
109
- "execution_count": null
110
- }
111
- ],
112
- "metadata": {
113
- "kernelspec": {
114
- "display_name": "Python 3",
115
- "language": "python",
116
- "name": "python3"
117
- },
118
- "language_info": {
119
- "name": "python",
120
- "version": "3.11.0"
121
- }
122
  },
123
- "nbformat": 4,
124
- "nbformat_minor": 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  }
 
1
  {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/40_linear_regression_solution.ipynb)\n\n",
8
+ "# 🟡 Solution: Linear Regression\n",
9
+ "\n",
10
+ "Reference solution demonstrating closed-form, gradient descent, and nn.Linear approaches."
11
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  },
13
+ {
14
+ "cell_type": "code",
15
+ "metadata": {},
16
+ "source": [
17
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
18
+ "try:\n",
19
+ " import google.colab\n",
20
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
21
+ "except ImportError:\n",
22
+ " pass\n"
23
+ ],
24
+ "outputs": [],
25
+ "execution_count": null
26
+ },
27
+ {
28
+ "cell_type": "code",
29
+ "metadata": {},
30
+ "outputs": [],
31
+ "source": [
32
+ "import torch\n",
33
+ "import torch.nn as nn"
34
+ ],
35
+ "execution_count": null
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "metadata": {},
40
+ "outputs": [],
41
+ "source": [
42
+ "# ✅ SOLUTION\n",
43
+ "\n",
44
+ "class LinearRegression:\n",
45
+ " def closed_form(self, X: torch.Tensor, y: torch.Tensor):\n",
46
+ " \"\"\"Normal equation via augmented matrix.\"\"\"\n",
47
+ " N, D = X.shape\n",
48
+ " # Augment X with ones column for bias\n",
49
+ " X_aug = torch.cat([X, torch.ones(N, 1)], dim=1) # (N, D+1)\n",
50
+ " # Solve (X^T X) theta = X^T y\n",
51
+ " theta = torch.linalg.lstsq(X_aug, y).solution # (D+1,)\n",
52
+ " w = theta[:D]\n",
53
+ " b = theta[D]\n",
54
+ " return w.detach(), b.detach()\n",
55
+ "\n",
56
+ " def gradient_descent(self, X: torch.Tensor, y: torch.Tensor,\n",
57
+ " lr: float = 0.01, steps: int = 1000):\n",
58
+ " \"\"\"Manual gradient computation — no autograd.\"\"\"\n",
59
+ " N, D = X.shape\n",
60
+ " w = torch.zeros(D)\n",
61
+ " b = torch.tensor(0.0)\n",
62
+ "\n",
63
+ " for _ in range(steps):\n",
64
+ " pred = X @ w + b # (N,)\n",
65
+ " error = pred - y # (N,)\n",
66
+ " grad_w = (2.0 / N) * (X.T @ error) # (D,)\n",
67
+ " grad_b = (2.0 / N) * error.sum() # scalar\n",
68
+ " w = w - lr * grad_w\n",
69
+ " b = b - lr * grad_b\n",
70
+ "\n",
71
+ " return w, b\n",
72
+ "\n",
73
+ " def nn_linear(self, X: torch.Tensor, y: torch.Tensor,\n",
74
+ " lr: float = 0.01, steps: int = 1000):\n",
75
+ " \"\"\"PyTorch nn.Linear with autograd training loop.\"\"\"\n",
76
+ " N, D = X.shape\n",
77
+ " layer = nn.Linear(D, 1)\n",
78
+ " optimizer = torch.optim.SGD(layer.parameters(), lr=lr)\n",
79
+ " loss_fn = nn.MSELoss()\n",
80
+ "\n",
81
+ " for _ in range(steps):\n",
82
+ " optimizer.zero_grad()\n",
83
+ " pred = layer(X).squeeze(-1) # (N,)\n",
84
+ " loss = loss_fn(pred, y)\n",
85
+ " loss.backward()\n",
86
+ " optimizer.step()\n",
87
+ "\n",
88
+ " w = layer.weight.data.squeeze(0) # (D,)\n",
89
+ " b = layer.bias.data.squeeze(0) # scalar ()\n",
90
+ " return w, b"
91
+ ],
92
+ "execution_count": null
93
+ },
94
+ {
95
+ "cell_type": "code",
96
+ "metadata": {},
97
+ "outputs": [],
98
+ "source": [
99
+ "# Verify\n",
100
+ "torch.manual_seed(42)\n",
101
+ "X = torch.randn(100, 3)\n",
102
+ "true_w = torch.tensor([2.0, -1.0, 0.5])\n",
103
+ "y = X @ true_w + 3.0\n",
104
+ "\n",
105
+ "model = LinearRegression()\n",
106
+ "for name, method in [(\"Closed-form\", model.closed_form),\n",
107
+ " (\"Grad Descent\", lambda X, y: model.gradient_descent(X, y, lr=0.05, steps=2000)),\n",
108
+ " (\"nn.Linear\", lambda X, y: model.nn_linear(X, y, lr=0.05, steps=2000))]:\n",
109
+ " w, b = method(X, y)\n",
110
+ " print(f\"{name:13s} w={w.tolist()} b={b.item():.4f}\")\n",
111
+ "print(f\"{'True':13s} w={true_w.tolist()} b=3.0000\")"
112
+ ],
113
+ "execution_count": null
114
+ },
115
+ {
116
+ "cell_type": "code",
117
+ "metadata": {},
118
+ "outputs": [],
119
+ "source": [
120
+ "# ✅ SUBMIT\n",
121
+ "from torch_judge import check\n",
122
+ "check(\"linear_regression\")"
123
+ ],
124
+ "execution_count": null
125
+ }
126
+ ],
127
+ "metadata": {
128
+ "kernelspec": {
129
+ "display_name": "Python 3",
130
+ "language": "python",
131
+ "name": "python3"
132
+ },
133
+ "language_info": {
134
+ "name": "python",
135
+ "version": "3.11.0"
136
+ }
137
+ },
138
+ "nbformat": 4,
139
+ "nbformat_minor": 4
140
  }
templates/00_welcome.ipynb CHANGED
@@ -31,24 +31,120 @@
31
  "\n",
32
  "> 💡 Every notebook also has a **Colab** toolbar button and an **Open in Colab** badge — use them to run problems in Google Colab with zero setup.\n",
33
  "\n",
34
- "## Quick Start"
 
 
35
  ]
36
  },
37
  {
38
  "cell_type": "code",
 
39
  "metadata": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  "source": [
41
  "from torch_judge import status\n",
42
  "status()"
43
- ],
44
- "execution_count": null,
45
- "outputs": []
46
  },
47
  {
48
  "cell_type": "markdown",
49
  "metadata": {},
50
  "source": [
51
- "## Problem List (40 problems)\n\n### 🧱 Fundamentals — \"Implement X from scratch\"\n\n| # | Problem | Difficulty | Template | Solution |\n|:---:|---------|:----------:|:--------:|:--------:|\n| 1 | ReLU | 🟢 Easy | [Open](01_relu.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/01_relu.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/01_relu.ipynb\" target=\"_blank\">Colab</a> | [Open](01_relu_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 2 | Softmax | 🟢 Easy | [Open](02_softmax.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/02_softmax.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/02_softmax.ipynb\" target=\"_blank\">Colab</a> | [Open](02_softmax_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/02_softmax_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/02_softmax_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 16 | Cross-Entropy Loss | 🟢 Easy | [Open](16_cross_entropy.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/16_cross_entropy.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/16_cross_entropy.ipynb\" target=\"_blank\">Colab</a> | [Open](16_cross_entropy_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/16_cross_entropy_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/16_cross_entropy_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 17 | Dropout | 🟢 Easy | [Open](17_dropout.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/17_dropout.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/17_dropout.ipynb\" target=\"_blank\">Colab</a> | [Open](17_dropout_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/17_dropout_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/17_dropout_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 18 | Embedding | 🟢 Easy | [Open](18_embedding.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/18_embedding.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/18_embedding.ipynb\" target=\"_blank\">Colab</a> | [Open](18_embedding_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/18_embedding_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/18_embedding_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 19 | GELU | 🟢 Easy | [Open](19_gelu.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/19_gelu.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/19_gelu.ipynb\" target=\"_blank\">Colab</a> | [Open](19_gelu_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/19_gelu_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/19_gelu_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 20 | Kaiming Init | 🟢 Easy | [Open](20_weight_init.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/20_weight_init.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/20_weight_init.ipynb\" target=\"_blank\">Colab</a> | [Open](20_weight_init_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/20_weight_init_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/20_weight_init_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 21 | Gradient Clipping | 🟢 Easy | [Open](21_gradient_clipping.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/21_gradient_clipping.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/21_gradient_clipping.ipynb\" target=\"_blank\">Colab</a> | [Open](21_gradient_clipping_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/21_gradient_clipping_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/21_gradient_clipping_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 31 | Gradient Accumulation | 🟢 Easy | [Open](31_gradient_accumulation.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/31_gradient_accumulation.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/31_gradient_accumulation.ipynb\" target=\"_blank\">Colab</a> | [Open](31_gradient_accumulation_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/31_gradient_accumulation_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/31_gradient_accumulation_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 3 | Linear Layer | 🟡 Medium | [Open](03_linear.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/03_linear.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/03_linear.ipynb\" target=\"_blank\">Colab</a> | [Open](03_linear_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/03_linear_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/03_linear_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 4 | LayerNorm | 🟡 Medium | [Open](04_layernorm.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/04_layernorm.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/04_layernorm.ipynb\" target=\"_blank\">Colab</a> | [Open](04_layernorm_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/04_layernorm_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/04_layernorm_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 7 | BatchNorm | 🟡 Medium | [Open](07_batchnorm.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/07_batchnorm.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/07_batchnorm.ipynb\" target=\"_blank\">Colab</a> | [Open](07_batchnorm_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/07_batchnorm_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/07_batchnorm_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 8 | RMSNorm | 🟡 Medium | [Open](08_rmsnorm.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/08_rmsnorm.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/08_rmsnorm.ipynb\" target=\"_blank\">Colab</a> | [Open](08_rmsnorm_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/08_rmsnorm_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/08_rmsnorm_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 15 | SwiGLU MLP | 🟡 Medium | [Open](15_mlp.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/15_mlp.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/15_mlp.ipynb\" target=\"_blank\">Colab</a> | [Open](15_mlp_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/15_mlp_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/15_mlp_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 22 | Conv2d | 🟡 Medium | [Open](22_conv2d.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/22_conv2d.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/22_conv2d.ipynb\" target=\"_blank\">Colab</a> | [Open](22_conv2d_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/22_conv2d_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/22_conv2d_solution.ipynb\" target=\"_blank\">Colab</a> |\n\n### 🧠 Attention Mechanisms\n\n| # | Problem | Difficulty | Template | Solution |\n|:---:|---------|:----------:|:--------:|:--------:|\n| 23 | Cross-Attention | 🟡 Medium | [Open](23_cross_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/23_cross_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/23_cross_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](23_cross_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/23_cross_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/23_cross_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 5 | Scaled Dot-Product Attention | 🔴 Hard | [Open](05_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/05_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/05_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](05_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/05_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/05_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 6 | Multi-Head Attention | 🔴 Hard | [Open](06_multihead_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/06_multihead_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/06_multihead_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](06_multihead_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/06_multihead_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/06_multihead_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 9 | Causal Self-Attention | 🔴 Hard | [Open](09_causal_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/09_causal_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/09_causal_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](09_causal_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/09_causal_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/09_causal_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 10 | Grouped Query Attention | 🔴 Hard | [Open](10_gqa.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/10_gqa.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/10_gqa.ipynb\" target=\"_blank\">Colab</a> | [Open](10_gqa_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/10_gqa_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/10_gqa_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 11 | Sliding Window Attention | 🔴 Hard | [Open](11_sliding_window.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/11_sliding_window.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/11_sliding_window.ipynb\" target=\"_blank\">Colab</a> | [Open](11_sliding_window_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/11_sliding_window_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/11_sliding_window_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 12 | Linear Attention | 🔴 Hard | [Open](12_linear_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/12_linear_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/12_linear_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](12_linear_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/12_linear_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/12_linear_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 14 | KV Cache Attention | 🔴 Hard | [Open](14_kv_cache.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/14_kv_cache.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/14_kv_cache.ipynb\" target=\"_blank\">Colab</a> | [Open](14_kv_cache_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/14_kv_cache_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/14_kv_cache_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 24 | RoPE | 🔴 Hard | [Open](24_rope.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/24_rope.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/24_rope.ipynb\" target=\"_blank\">Colab</a> | [Open](24_rope_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/24_rope_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/24_rope_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 25 | Flash Attention | 🔴 Hard | [Open](25_flash_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/25_flash_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/25_flash_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](25_flash_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/25_flash_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/25_flash_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n\n### 🏗️ Architecture & Adaptation\n\n| # | Problem | Difficulty | Template | Solution |\n|:---:|---------|:----------:|:--------:|:--------:|\n| 26 | LoRA | 🟡 Medium | [Open](26_lora.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/26_lora.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/26_lora.ipynb\" target=\"_blank\">Colab</a> | [Open](26_lora_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/26_lora_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/26_lora_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 27 | ViT Patch Embedding | 🟡 Medium | [Open](27_vit_patch.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/27_vit_patch.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/27_vit_patch.ipynb\" target=\"_blank\">Colab</a> | [Open](27_vit_patch_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/27_vit_patch_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/27_vit_patch_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 13 | GPT-2 Block | 🔴 Hard | [Open](13_gpt2_block.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/13_gpt2_block.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/13_gpt2_block.ipynb\" target=\"_blank\">Colab</a> | [Open](13_gpt2_block_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/13_gpt2_block_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/13_gpt2_block_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 28 | Mixture of Experts | 🔴 Hard | [Open](28_moe.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/28_moe.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/28_moe.ipynb\" target=\"_blank\">Colab</a> | [Open](28_moe_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/28_moe_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/28_moe_solution.ipynb\" target=\"_blank\">Colab</a> |\n\n### ⚙️ Training & Optimization\n\n| # | Problem | Difficulty | Template | Solution |\n|:---:|---------|:----------:|:--------:|:--------:|\n| 29 | Adam Optimizer | 🟡 Medium | [Open](29_adam.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/29_adam.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/29_adam.ipynb\" target=\"_blank\">Colab</a> | [Open](29_adam_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/29_adam_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/29_adam_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 30 | Cosine LR Scheduler | 🟡 Medium | [Open](30_cosine_lr.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/30_cosine_lr.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/30_cosine_lr.ipynb\" target=\"_blank\">Colab</a> | [Open](30_cosine_lr_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/30_cosine_lr_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/30_cosine_lr_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 40 | Linear Regression | 🟡 Medium | [Open](40_linear_regression.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/40_linear_regression.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/40_linear_regression.ipynb\" target=\"_blank\">Colab</a> | [Open](40_linear_regression_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/40_linear_regression_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/40_linear_regression_solution.ipynb\" target=\"_blank\">Colab</a> |\n\n### 🎯 Inference & Decoding\n\n| # | Problem | Difficulty | Template | Solution |\n|:---:|---------|:----------:|:--------:|:--------:|\n| 32 | Top-k / Top-p Sampling | 🟡 Medium | [Open](32_topk_sampling.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/32_topk_sampling.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/32_topk_sampling.ipynb\" target=\"_blank\">Colab</a> | [Open](32_topk_sampling_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/32_topk_sampling_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/32_topk_sampling_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 33 | Beam Search | 🟡 Medium | [Open](33_beam_search.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/33_beam_search.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/33_beam_search.ipynb\" target=\"_blank\">Colab</a> | [Open](33_beam_search_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/33_beam_search_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/33_beam_search_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 34 | Speculative Decoding | 🔴 Hard | [Open](34_speculative_decoding.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/34_speculative_decoding.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/34_speculative_decoding.ipynb\" target=\"_blank\">Colab</a> | [Open](34_speculative_decoding_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/34_speculative_decoding_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/34_speculative_decoding_solution.ipynb\" target=\"_blank\">Colab</a> |\n\n### 🔬 Advanced\n\n| # | Problem | Difficulty | Template | Solution |\n|:---:|---------|:----------:|:--------:|:--------:|\n| 35 | BPE Tokenizer | 🔴 Hard | [Open](35_bpe.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/35_bpe.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/35_bpe.ipynb\" target=\"_blank\">Colab</a> | [Open](35_bpe_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/35_bpe_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/35_bpe_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 36 | INT8 Quantization | 🔴 Hard | [Open](36_int8_quantization.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/36_int8_quantization.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/36_int8_quantization.ipynb\" target=\"_blank\">Colab</a> | [Open](36_int8_quantization_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/36_int8_quantization_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/36_int8_quantization_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 37 | DPO Loss | 🔴 Hard | [Open](37_dpo_loss.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/37_dpo_loss.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/37_dpo_loss.ipynb\" target=\"_blank\">Colab</a> | [Open](37_dpo_loss_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/37_dpo_loss_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/37_dpo_loss_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 38 | GRPO Loss | 🔴 Hard | [Open](38_grpo_loss.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/38_grpo_loss.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/38_grpo_loss.ipynb\" target=\"_blank\">Colab</a> | [Open](38_grpo_loss_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/38_grpo_loss_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/38_grpo_loss_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 39 | PPO Loss | 🔴 Hard | [Open](39_ppo_loss.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/39_ppo_loss.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/39_ppo_loss.ipynb\" target=\"_blank\">Colab</a> | [Open](39_ppo_loss_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/39_ppo_loss_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/39_ppo_loss_solution.ipynb\" target=\"_blank\">Colab</a> |\n\n## Useful Commands\n\n```python\nfrom torch_judge import check, hint, status\n\nstatus() # Progress dashboard\ncheck(\"relu\") # Judge your implementation\nhint(\"causal_attention\") # Get a hint\n```"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  ]
53
  }
54
  ],
@@ -65,4 +161,4 @@
65
  },
66
  "nbformat": 4,
67
  "nbformat_minor": 4
68
- }
 
31
  "\n",
32
  "> 💡 Every notebook also has a **Colab** toolbar button and an **Open in Colab** badge — use them to run problems in Google Colab with zero setup.\n",
33
  "\n",
34
+ "## Quick Start\n",
35
+ "\n",
36
+ "📖 **Reference solutions in Colab**: [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb) — Start with ReLU. Or use the **Colab** links in the table below for each solution."
37
  ]
38
  },
39
  {
40
  "cell_type": "code",
41
+ "execution_count": null,
42
  "metadata": {},
43
+ "outputs": [],
44
+ "source": [
45
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
46
+ "try:\n",
47
+ " import google.colab\n",
48
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
49
+ "except ImportError:\n",
50
+ " pass\n"
51
+ ]
52
+ },
53
+ {
54
+ "cell_type": "code",
55
+ "execution_count": null,
56
+ "metadata": {},
57
+ "outputs": [],
58
  "source": [
59
  "from torch_judge import status\n",
60
  "status()"
61
+ ]
 
 
62
  },
63
  {
64
  "cell_type": "markdown",
65
  "metadata": {},
66
  "source": [
67
+ "## Problem List (40 problems)\n",
68
+ "\n",
69
+ "### 🧱 Fundamentals — \"Implement X from scratch\"\n",
70
+ "\n",
71
+ "| # | Problem | Difficulty | Template | Solution |\n",
72
+ "|:---:|---------|:----------:|:--------:|:--------:|\n",
73
+ "| 1 | ReLU | 🟢 Easy | [Open](01_relu.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/01_relu.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/01_relu.ipynb\" target=\"_blank\">Colab</a> | [Open](01_relu_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
74
+ "| 2 | Softmax | 🟢 Easy | [Open](02_softmax.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/02_softmax.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/02_softmax.ipynb\" target=\"_blank\">Colab</a> | [Open](02_softmax_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/02_softmax_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/02_softmax_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
75
+ "| 16 | Cross-Entropy Loss | 🟢 Easy | [Open](16_cross_entropy.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/16_cross_entropy.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/16_cross_entropy.ipynb\" target=\"_blank\">Colab</a> | [Open](16_cross_entropy_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/16_cross_entropy_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/16_cross_entropy_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
76
+ "| 17 | Dropout | 🟢 Easy | [Open](17_dropout.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/17_dropout.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/17_dropout.ipynb\" target=\"_blank\">Colab</a> | [Open](17_dropout_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/17_dropout_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/17_dropout_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
77
+ "| 18 | Embedding | 🟢 Easy | [Open](18_embedding.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/18_embedding.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/18_embedding.ipynb\" target=\"_blank\">Colab</a> | [Open](18_embedding_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/18_embedding_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/18_embedding_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
78
+ "| 19 | GELU | 🟢 Easy | [Open](19_gelu.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/19_gelu.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/19_gelu.ipynb\" target=\"_blank\">Colab</a> | [Open](19_gelu_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/19_gelu_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/19_gelu_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
79
+ "| 20 | Kaiming Init | 🟢 Easy | [Open](20_weight_init.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/20_weight_init.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/20_weight_init.ipynb\" target=\"_blank\">Colab</a> | [Open](20_weight_init_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/20_weight_init_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/20_weight_init_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
80
+ "| 21 | Gradient Clipping | 🟢 Easy | [Open](21_gradient_clipping.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/21_gradient_clipping.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/21_gradient_clipping.ipynb\" target=\"_blank\">Colab</a> | [Open](21_gradient_clipping_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/21_gradient_clipping_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/21_gradient_clipping_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
81
+ "| 31 | Gradient Accumulation | 🟢 Easy | [Open](31_gradient_accumulation.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/31_gradient_accumulation.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/31_gradient_accumulation.ipynb\" target=\"_blank\">Colab</a> | [Open](31_gradient_accumulation_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/31_gradient_accumulation_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/31_gradient_accumulation_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
82
+ "| 3 | Linear Layer | 🟡 Medium | [Open](03_linear.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/03_linear.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/03_linear.ipynb\" target=\"_blank\">Colab</a> | [Open](03_linear_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/03_linear_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/03_linear_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
83
+ "| 4 | LayerNorm | 🟡 Medium | [Open](04_layernorm.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/04_layernorm.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/04_layernorm.ipynb\" target=\"_blank\">Colab</a> | [Open](04_layernorm_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/04_layernorm_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/04_layernorm_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
84
+ "| 7 | BatchNorm | 🟡 Medium | [Open](07_batchnorm.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/07_batchnorm.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/07_batchnorm.ipynb\" target=\"_blank\">Colab</a> | [Open](07_batchnorm_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/07_batchnorm_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/07_batchnorm_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
85
+ "| 8 | RMSNorm | 🟡 Medium | [Open](08_rmsnorm.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/08_rmsnorm.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/08_rmsnorm.ipynb\" target=\"_blank\">Colab</a> | [Open](08_rmsnorm_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/08_rmsnorm_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/08_rmsnorm_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
86
+ "| 15 | SwiGLU MLP | 🟡 Medium | [Open](15_mlp.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/15_mlp.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/15_mlp.ipynb\" target=\"_blank\">Colab</a> | [Open](15_mlp_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/15_mlp_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/15_mlp_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
87
+ "| 22 | Conv2d | 🟡 Medium | [Open](22_conv2d.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/22_conv2d.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/22_conv2d.ipynb\" target=\"_blank\">Colab</a> | [Open](22_conv2d_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/22_conv2d_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/22_conv2d_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
88
+ "\n",
89
+ "### 🧠 Attention Mechanisms\n",
90
+ "\n",
91
+ "| # | Problem | Difficulty | Template | Solution |\n",
92
+ "|:---:|---------|:----------:|:--------:|:--------:|\n",
93
+ "| 23 | Cross-Attention | 🟡 Medium | [Open](23_cross_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/23_cross_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/23_cross_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](23_cross_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/23_cross_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/23_cross_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
94
+ "| 5 | Scaled Dot-Product Attention | 🔴 Hard | [Open](05_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/05_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/05_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](05_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/05_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/05_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
95
+ "| 6 | Multi-Head Attention | 🔴 Hard | [Open](06_multihead_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/06_multihead_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/06_multihead_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](06_multihead_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/06_multihead_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/06_multihead_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
96
+ "| 9 | Causal Self-Attention | 🔴 Hard | [Open](09_causal_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/09_causal_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/09_causal_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](09_causal_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/09_causal_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/09_causal_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
97
+ "| 10 | Grouped Query Attention | 🔴 Hard | [Open](10_gqa.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/10_gqa.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/10_gqa.ipynb\" target=\"_blank\">Colab</a> | [Open](10_gqa_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/10_gqa_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/10_gqa_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
98
+ "| 11 | Sliding Window Attention | 🔴 Hard | [Open](11_sliding_window.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/11_sliding_window.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/11_sliding_window.ipynb\" target=\"_blank\">Colab</a> | [Open](11_sliding_window_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/11_sliding_window_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/11_sliding_window_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
99
+ "| 12 | Linear Attention | 🔴 Hard | [Open](12_linear_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/12_linear_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/12_linear_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](12_linear_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/12_linear_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/12_linear_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
100
+ "| 14 | KV Cache Attention | 🔴 Hard | [Open](14_kv_cache.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/14_kv_cache.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/14_kv_cache.ipynb\" target=\"_blank\">Colab</a> | [Open](14_kv_cache_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/14_kv_cache_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/14_kv_cache_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
101
+ "| 24 | RoPE | 🔴 Hard | [Open](24_rope.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/24_rope.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/24_rope.ipynb\" target=\"_blank\">Colab</a> | [Open](24_rope_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/24_rope_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/24_rope_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
102
+ "| 25 | Flash Attention | 🔴 Hard | [Open](25_flash_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/25_flash_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/25_flash_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](25_flash_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/25_flash_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/25_flash_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
103
+ "\n",
104
+ "### 🏗️ Architecture & Adaptation\n",
105
+ "\n",
106
+ "| # | Problem | Difficulty | Template | Solution |\n",
107
+ "|:---:|---------|:----------:|:--------:|:--------:|\n",
108
+ "| 26 | LoRA | 🟡 Medium | [Open](26_lora.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/26_lora.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/26_lora.ipynb\" target=\"_blank\">Colab</a> | [Open](26_lora_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/26_lora_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/26_lora_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
109
+ "| 27 | ViT Patch Embedding | 🟡 Medium | [Open](27_vit_patch.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/27_vit_patch.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/27_vit_patch.ipynb\" target=\"_blank\">Colab</a> | [Open](27_vit_patch_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/27_vit_patch_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/27_vit_patch_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
110
+ "| 13 | GPT-2 Block | 🔴 Hard | [Open](13_gpt2_block.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/13_gpt2_block.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/13_gpt2_block.ipynb\" target=\"_blank\">Colab</a> | [Open](13_gpt2_block_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/13_gpt2_block_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/13_gpt2_block_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
111
+ "| 28 | Mixture of Experts | 🔴 Hard | [Open](28_moe.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/28_moe.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/28_moe.ipynb\" target=\"_blank\">Colab</a> | [Open](28_moe_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/28_moe_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/28_moe_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
112
+ "\n",
113
+ "### ⚙️ Training & Optimization\n",
114
+ "\n",
115
+ "| # | Problem | Difficulty | Template | Solution |\n",
116
+ "|:---:|---------|:----------:|:--------:|:--------:|\n",
117
+ "| 29 | Adam Optimizer | 🟡 Medium | [Open](29_adam.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/29_adam.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/29_adam.ipynb\" target=\"_blank\">Colab</a> | [Open](29_adam_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/29_adam_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/29_adam_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
118
+ "| 30 | Cosine LR Scheduler | 🟡 Medium | [Open](30_cosine_lr.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/30_cosine_lr.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/30_cosine_lr.ipynb\" target=\"_blank\">Colab</a> | [Open](30_cosine_lr_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/30_cosine_lr_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/30_cosine_lr_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
119
+ "| 40 | Linear Regression | 🟡 Medium | [Open](40_linear_regression.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/40_linear_regression.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/40_linear_regression.ipynb\" target=\"_blank\">Colab</a> | [Open](40_linear_regression_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/40_linear_regression_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/40_linear_regression_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
120
+ "\n",
121
+ "### 🎯 Inference & Decoding\n",
122
+ "\n",
123
+ "| # | Problem | Difficulty | Template | Solution |\n",
124
+ "|:---:|---------|:----------:|:--------:|:--------:|\n",
125
+ "| 32 | Top-k / Top-p Sampling | 🟡 Medium | [Open](32_topk_sampling.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/32_topk_sampling.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/32_topk_sampling.ipynb\" target=\"_blank\">Colab</a> | [Open](32_topk_sampling_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/32_topk_sampling_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/32_topk_sampling_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
126
+ "| 33 | Beam Search | 🟡 Medium | [Open](33_beam_search.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/33_beam_search.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/33_beam_search.ipynb\" target=\"_blank\">Colab</a> | [Open](33_beam_search_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/33_beam_search_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/33_beam_search_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
127
+ "| 34 | Speculative Decoding | 🔴 Hard | [Open](34_speculative_decoding.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/34_speculative_decoding.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/34_speculative_decoding.ipynb\" target=\"_blank\">Colab</a> | [Open](34_speculative_decoding_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/34_speculative_decoding_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/34_speculative_decoding_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
128
+ "\n",
129
+ "### 🔬 Advanced\n",
130
+ "\n",
131
+ "| # | Problem | Difficulty | Template | Solution |\n",
132
+ "|:---:|---------|:----------:|:--------:|:--------:|\n",
133
+ "| 35 | BPE Tokenizer | 🔴 Hard | [Open](35_bpe.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/35_bpe.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/35_bpe.ipynb\" target=\"_blank\">Colab</a> | [Open](35_bpe_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/35_bpe_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/35_bpe_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
134
+ "| 36 | INT8 Quantization | 🔴 Hard | [Open](36_int8_quantization.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/36_int8_quantization.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/36_int8_quantization.ipynb\" target=\"_blank\">Colab</a> | [Open](36_int8_quantization_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/36_int8_quantization_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/36_int8_quantization_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
135
+ "| 37 | DPO Loss | 🔴 Hard | [Open](37_dpo_loss.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/37_dpo_loss.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/37_dpo_loss.ipynb\" target=\"_blank\">Colab</a> | [Open](37_dpo_loss_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/37_dpo_loss_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/37_dpo_loss_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
136
+ "| 38 | GRPO Loss | 🔴 Hard | [Open](38_grpo_loss.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/38_grpo_loss.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/38_grpo_loss.ipynb\" target=\"_blank\">Colab</a> | [Open](38_grpo_loss_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/38_grpo_loss_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/38_grpo_loss_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
137
+ "| 39 | PPO Loss | 🔴 Hard | [Open](39_ppo_loss.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/39_ppo_loss.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/39_ppo_loss.ipynb\" target=\"_blank\">Colab</a> | [Open](39_ppo_loss_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/39_ppo_loss_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/39_ppo_loss_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
138
+ "\n",
139
+ "## Useful Commands\n",
140
+ "\n",
141
+ "```python\n",
142
+ "from torch_judge import check, hint, status\n",
143
+ "\n",
144
+ "status() # Progress dashboard\n",
145
+ "check(\"relu\") # Judge your implementation\n",
146
+ "hint(\"causal_attention\") # Get a hint\n",
147
+ "```"
148
  ]
149
  }
150
  ],
 
161
  },
162
  "nbformat": 4,
163
  "nbformat_minor": 4
164
+ }
templates/01_relu.ipynb CHANGED
@@ -30,6 +30,20 @@
30
  ],
31
  "outputs": []
32
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  {
34
  "cell_type": "code",
35
  "metadata": {},
 
30
  ],
31
  "outputs": []
32
  },
33
+ {
34
+ "cell_type": "code",
35
+ "metadata": {},
36
+ "source": [
37
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
38
+ "try:\n",
39
+ " import google.colab\n",
40
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
41
+ "except ImportError:\n",
42
+ " pass\n"
43
+ ],
44
+ "outputs": [],
45
+ "execution_count": null
46
+ },
47
  {
48
  "cell_type": "code",
49
  "metadata": {},
templates/02_softmax.ipynb CHANGED
@@ -30,6 +30,20 @@
30
  ],
31
  "outputs": []
32
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  {
34
  "cell_type": "code",
35
  "metadata": {},
 
30
  ],
31
  "outputs": []
32
  },
33
+ {
34
+ "cell_type": "code",
35
+ "metadata": {},
36
+ "source": [
37
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
38
+ "try:\n",
39
+ " import google.colab\n",
40
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
41
+ "except ImportError:\n",
42
+ " pass\n"
43
+ ],
44
+ "outputs": [],
45
+ "execution_count": null
46
+ },
47
  {
48
  "cell_type": "code",
49
  "metadata": {},
templates/03_linear.ipynb CHANGED
@@ -26,6 +26,20 @@
26
  ],
27
  "outputs": []
28
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  {
30
  "cell_type": "code",
31
  "metadata": {},
 
26
  ],
27
  "outputs": []
28
  },
29
+ {
30
+ "cell_type": "code",
31
+ "metadata": {},
32
+ "source": [
33
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
34
+ "try:\n",
35
+ " import google.colab\n",
36
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
37
+ "except ImportError:\n",
38
+ " pass\n"
39
+ ],
40
+ "outputs": [],
41
+ "execution_count": null
42
+ },
43
  {
44
  "cell_type": "code",
45
  "metadata": {},
templates/04_layernorm.ipynb CHANGED
@@ -32,6 +32,20 @@
32
  ],
33
  "outputs": []
34
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  {
36
  "cell_type": "code",
37
  "metadata": {},
 
32
  ],
33
  "outputs": []
34
  },
35
+ {
36
+ "cell_type": "code",
37
+ "metadata": {},
38
+ "source": [
39
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
40
+ "try:\n",
41
+ " import google.colab\n",
42
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
43
+ "except ImportError:\n",
44
+ " pass\n"
45
+ ],
46
+ "outputs": [],
47
+ "execution_count": null
48
+ },
49
  {
50
  "cell_type": "code",
51
  "metadata": {},
templates/05_attention.ipynb CHANGED
@@ -29,6 +29,20 @@
29
  "- Must handle cross-attention (seq_q ≠ seq_k)"
30
  ]
31
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  {
33
  "cell_type": "code",
34
  "execution_count": null,
 
29
  "- Must handle cross-attention (seq_q ≠ seq_k)"
30
  ]
31
  },
32
+ {
33
+ "cell_type": "code",
34
+ "metadata": {},
35
+ "source": [
36
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
37
+ "try:\n",
38
+ " import google.colab\n",
39
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
40
+ "except ImportError:\n",
41
+ " pass\n"
42
+ ],
43
+ "outputs": [],
44
+ "execution_count": null
45
+ },
46
  {
47
  "cell_type": "code",
48
  "execution_count": null,
templates/06_multihead_attention.ipynb CHANGED
@@ -37,6 +37,21 @@
37
  "5. Output projection: `self.W_o(concat)`"
38
  ]
39
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  {
41
  "cell_type": "code",
42
  "execution_count": null,
 
37
  "5. Output projection: `self.W_o(concat)`"
38
  ]
39
  },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": null,
43
+ "id": "02a059c4",
44
+ "metadata": {},
45
+ "outputs": [],
46
+ "source": [
47
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
48
+ "try:\n",
49
+ " import google.colab\n",
50
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
51
+ "except ImportError:\n",
52
+ " pass\n"
53
+ ]
54
+ },
55
  {
56
  "cell_type": "code",
57
  "execution_count": null,
templates/07_batchnorm.ipynb CHANGED
@@ -1,131 +1,145 @@
1
  {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "id": "89fd15cb",
6
- "metadata": {},
7
- "source": [
8
- "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/07_batchnorm.ipynb)\n",
9
- "\n",
10
- "# 🟡 Medium: Implement BatchNorm\n",
11
- "\n",
12
- "Implement **Batch Normalization** with both **training** and **inference** behavior.\n",
13
- "\n",
14
- "In training mode, use **batch statistics** and update running estimates:\n",
15
- "\n",
16
- "$$\\text{BN}(x) = \\gamma \\cdot \\frac{x - \\mu_B}{\\sqrt{\\sigma_B^2 + \\epsilon}} + \\beta$$\n",
17
- "\n",
18
- "where $\\mu_B$ and $\\sigma_B^2$ are the mean and variance computed **across the batch** (dim=0).\n",
19
- "\n",
20
- "In inference mode, use the provided **running mean/var** instead of current batch stats.\n",
21
- "\n",
22
- "### Signature\n",
23
- "```python\n",
24
- "def my_batch_norm(\n",
25
- " x: torch.Tensor,\n",
26
- " gamma: torch.Tensor,\n",
27
- " beta: torch.Tensor,\n",
28
- " running_mean: torch.Tensor,\n",
29
- " running_var: torch.Tensor,\n",
30
- " eps: float = 1e-5,\n",
31
- " momentum: float = 0.1,\n",
32
- " training: bool = True,\n",
33
- ") -> torch.Tensor:\n",
34
- " # x: (N, D) — normalize each feature across all samples in the batch\n",
35
- " # running_mean, running_var: updated in-place during training; used as-is during inference\n",
36
- "```\n",
37
- "\n",
38
- "### Rules\n",
39
- "- Do **NOT** use `F.batch_norm`, `nn.BatchNorm1d`, etc.\n",
40
- "- Compute batch mean and variance over `dim=0` with `unbiased=False`\n",
41
- "- Update running stats like PyTorch: `running = (1 - momentum) * running + momentum * batch_stat`\n",
42
- "- Use `running_mean` / `running_var` for inference when `training=False`\n",
43
- "- Must support autograd w.r.t. `x`, `gamma`, `beta`(running statistics 应视作 buffer,而不是需要梯度的参数)"
44
- ]
45
- },
46
- {
47
- "cell_type": "code",
48
- "execution_count": null,
49
- "metadata": {},
50
- "outputs": [],
51
- "source": [
52
- "import torch"
53
- ]
54
- },
55
- {
56
- "cell_type": "code",
57
- "execution_count": null,
58
- "id": "d946ca79",
59
- "metadata": {},
60
- "outputs": [],
61
- "source": [
62
- "# ✏️ YOUR IMPLEMENTATION HERE\n",
63
- "\n",
64
- "def my_batch_norm(\n",
65
- " x,\n",
66
- " gamma,\n",
67
- " beta,\n",
68
- " running_mean,\n",
69
- " running_var,\n",
70
- " eps=1e-5,\n",
71
- " momentum=0.1,\n",
72
- " training=True,\n",
73
- "):\n",
74
- " pass # Replace this"
75
- ]
76
- },
77
- {
78
- "cell_type": "code",
79
- "execution_count": null,
80
- "id": "26b93e71",
81
- "metadata": {},
82
- "outputs": [],
83
- "source": [
84
- "# 🧪 Debug\n",
85
- "x = torch.randn(8, 4)\n",
86
- "gamma = torch.ones(4)\n",
87
- "beta = torch.zeros(4)\n",
88
- "\n",
89
- "# Running stats typically live on the same device and shape as features\n",
90
- "running_mean = torch.zeros(4)\n",
91
- "running_var = torch.ones(4)\n",
92
- "\n",
93
- "# Training mode: uses batch stats and updates running_mean / running_var\n",
94
- "out_train = my_batch_norm(x, gamma, beta, running_mean, running_var, training=True)\n",
95
- "print(\"[Train] Output shape:\", out_train.shape)\n",
96
- "print(\"[Train] Column means:\", out_train.mean(dim=0)) # should be ~0\n",
97
- "print(\"[Train] Column stds: \", out_train.std(dim=0)) # should be ~1\n",
98
- "print(\"Updated running_mean:\", running_mean)\n",
99
- "print(\"Updated running_var:\", running_var)\n",
100
- "\n",
101
- "# Inference mode: uses running_mean / running_var only\n",
102
- "out_eval = my_batch_norm(x, gamma, beta, running_mean, running_var, training=False)\n",
103
- "print(\"[Eval] Output shape:\", out_eval.shape)"
104
- ]
105
- },
106
- {
107
- "cell_type": "code",
108
- "execution_count": null,
109
- "metadata": {},
110
- "outputs": [],
111
- "source": [
112
- "# ✅ SUBMIT\n",
113
- "from torch_judge import check\n",
114
- "check(\"batchnorm\")"
115
- ]
116
- }
117
- ],
118
- "metadata": {
119
- "kernelspec": {
120
- "display_name": "Python 3",
121
- "language": "python",
122
- "name": "python3"
123
- },
124
- "language_info": {
125
- "name": "python",
126
- "version": "3.11.0"
127
- }
128
  },
129
- "nbformat": 4,
130
- "nbformat_minor": 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  }
 
1
  {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "89fd15cb",
6
+ "metadata": {},
7
+ "source": [
8
+ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/07_batchnorm.ipynb)\n",
9
+ "\n",
10
+ "# 🟡 Medium: Implement BatchNorm\n",
11
+ "\n",
12
+ "Implement **Batch Normalization** with both **training** and **inference** behavior.\n",
13
+ "\n",
14
+ "In training mode, use **batch statistics** and update running estimates:\n",
15
+ "\n",
16
+ "$$\\text{BN}(x) = \\gamma \\cdot \\frac{x - \\mu_B}{\\sqrt{\\sigma_B^2 + \\epsilon}} + \\beta$$\n",
17
+ "\n",
18
+ "where $\\mu_B$ and $\\sigma_B^2$ are the mean and variance computed **across the batch** (dim=0).\n",
19
+ "\n",
20
+ "In inference mode, use the provided **running mean/var** instead of current batch stats.\n",
21
+ "\n",
22
+ "### Signature\n",
23
+ "```python\n",
24
+ "def my_batch_norm(\n",
25
+ " x: torch.Tensor,\n",
26
+ " gamma: torch.Tensor,\n",
27
+ " beta: torch.Tensor,\n",
28
+ " running_mean: torch.Tensor,\n",
29
+ " running_var: torch.Tensor,\n",
30
+ " eps: float = 1e-5,\n",
31
+ " momentum: float = 0.1,\n",
32
+ " training: bool = True,\n",
33
+ ") -> torch.Tensor:\n",
34
+ " # x: (N, D) — normalize each feature across all samples in the batch\n",
35
+ " # running_mean, running_var: updated in-place during training; used as-is during inference\n",
36
+ "```\n",
37
+ "\n",
38
+ "### Rules\n",
39
+ "- Do **NOT** use `F.batch_norm`, `nn.BatchNorm1d`, etc.\n",
40
+ "- Compute batch mean and variance over `dim=0` with `unbiased=False`\n",
41
+ "- Update running stats like PyTorch: `running = (1 - momentum) * running + momentum * batch_stat`\n",
42
+ "- Use `running_mean` / `running_var` for inference when `training=False`\n",
43
+ "- Must support autograd w.r.t. `x`, `gamma`, `beta`(running statistics 应视作 buffer,而不是需要梯度的参数)"
44
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  },
46
+ {
47
+ "cell_type": "code",
48
+ "metadata": {},
49
+ "source": [
50
+ "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
51
+ "try:\n",
52
+ " import google.colab\n",
53
+ " get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
54
+ "except ImportError:\n",
55
+ " pass\n"
56
+ ],
57
+ "outputs": [],
58
+ "execution_count": null
59
+ },
60
+ {
61
+ "cell_type": "code",
62
+ "execution_count": null,
63
+ "metadata": {},
64
+ "outputs": [],
65
+ "source": [
66
+ "import torch"
67
+ ]
68
+ },
69
+ {
70
+ "cell_type": "code",
71
+ "execution_count": null,
72
+ "id": "d946ca79",
73
+ "metadata": {},
74
+ "outputs": [],
75
+ "source": [
76
+ "# ✏️ YOUR IMPLEMENTATION HERE\n",
77
+ "\n",
78
+ "def my_batch_norm(\n",
79
+ " x,\n",
80
+ " gamma,\n",
81
+ " beta,\n",
82
+ " running_mean,\n",
83
+ " running_var,\n",
84
+ " eps=1e-5,\n",
85
+ " momentum=0.1,\n",
86
+ " training=True,\n",
87
+ "):\n",
88
+ " pass # Replace this"
89
+ ]
90
+ },
91
+ {
92
+ "cell_type": "code",
93
+ "execution_count": null,
94
+ "id": "26b93e71",
95
+ "metadata": {},
96
+ "outputs": [],
97
+ "source": [
98
+ "# 🧪 Debug\n",
99
+ "x = torch.randn(8, 4)\n",
100
+ "gamma = torch.ones(4)\n",
101
+ "beta = torch.zeros(4)\n",
102
+ "\n",
103
+ "# Running stats typically live on the same device and shape as features\n",
104
+ "running_mean = torch.zeros(4)\n",
105
+ "running_var = torch.ones(4)\n",
106
+ "\n",
107
+ "# Training mode: uses batch stats and updates running_mean / running_var\n",
108
+ "out_train = my_batch_norm(x, gamma, beta, running_mean, running_var, training=True)\n",
109
+ "print(\"[Train] Output shape:\", out_train.shape)\n",
110
+ "print(\"[Train] Column means:\", out_train.mean(dim=0)) # should be ~0\n",
111
+ "print(\"[Train] Column stds: \", out_train.std(dim=0)) # should be ~1\n",
112
+ "print(\"Updated running_mean:\", running_mean)\n",
113
+ "print(\"Updated running_var:\", running_var)\n",
114
+ "\n",
115
+ "# Inference mode: uses running_mean / running_var only\n",
116
+ "out_eval = my_batch_norm(x, gamma, beta, running_mean, running_var, training=False)\n",
117
+ "print(\"[Eval] Output shape:\", out_eval.shape)"
118
+ ]
119
+ },
120
+ {
121
+ "cell_type": "code",
122
+ "execution_count": null,
123
+ "metadata": {},
124
+ "outputs": [],
125
+ "source": [
126
+ "# ✅ SUBMIT\n",
127
+ "from torch_judge import check\n",
128
+ "check(\"batchnorm\")"
129
+ ]
130
+ }
131
+ ],
132
+ "metadata": {
133
+ "kernelspec": {
134
+ "display_name": "Python 3",
135
+ "language": "python",
136
+ "name": "python3"
137
+ },
138
+ "language_info": {
139
+ "name": "python",
140
+ "version": "3.11.0"
141
+ }
142
+ },
143
+ "nbformat": 4,
144
+ "nbformat_minor": 5
145
  }