| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>The MLOps Engineer's Interactive Architecture Builder</title> |
| <link rel="preconnect" href="https://fonts.googleapis.com"> |
| <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> |
| <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap" rel="stylesheet"> |
| <link href="https://fonts.googleapis.com/icon?family=Material+Icons" rel="stylesheet"> |
| <style> |
| |
| :root { |
| --primary-color: #1E88E5; |
| --primary-dark: #1565C0; |
| --secondary-color: #004d40; |
| --genai-color: #6A1B9A; |
| --background-color: #f4f6f8; |
| --card-bg-color: #ffffff; |
| --text-color: #333; |
| --heading-color: #212121; |
| --subtle-text-color: #555; |
| --border-color: #e0e0e0; |
| --code-bg-color: #282c34; |
| --code-text-color: #abb2bf; |
| --shadow: 0 4px 12px rgba(0,0,0,0.1); |
| --tile-hover-shadow: 0 6px 16px rgba(0,0,0,0.15); |
| } |
| |
| body { |
| font-family: 'Roboto', sans-serif; |
| background-color: var(--background-color); |
| color: var(--text-color); |
| margin: 0; |
| padding: 0; |
| line-height: 1.6; |
| } |
| |
| |
| .container { max-width: 1200px; margin: 0 auto; padding: 2rem; } |
| header { text-align: center; margin-bottom: 2rem; } |
| header h1 { color: var(--heading-color); font-weight: 700; font-size: 2.8rem; margin-bottom: 0.5rem; } |
| header p { font-size: 1.1rem; color: var(--subtle-text-color); max-width: 800px; margin: 0 auto; } |
| |
| .main-section-title { |
| font-size: 2.2rem; color: var(--heading-color); border-bottom: 3px solid var(--primary-color); |
| padding-bottom: 0.75rem; margin-top: 3rem; margin-bottom: 2rem; display: flex; align-items: center; |
| } |
| .main-section-title .material-icons { font-size: 2.8rem; margin-right: 1rem; } |
| |
| |
| #architecture-builder { background-color: var(--card-bg-color); padding: 2rem; border-radius: 8px; box-shadow: var(--shadow); } |
| .arch-type-selector { display: flex; gap: 1rem; margin-bottom: 2rem; border-bottom: 1px solid var(--border-color); padding-bottom: 1.5rem; } |
| .arch-type-chip { padding: 0.8rem 1.5rem; border-radius: 8px; cursor: pointer; font-weight: 500; font-size: 1.1rem; border: 2px solid transparent; transition: all 0.2s ease; } |
| .arch-type-chip.active.classic { background-color: #e3f2fd; border-color: var(--primary-color); color: var(--primary-dark); } |
| .arch-type-chip.active.gen-ai { background-color: #f3e5f5; border-color: var(--genai-color); color: var(--genai-color); } |
| |
| .builder-fields { display: none; } |
| .builder-fields.active { display: block; } |
| |
| .selection-group { margin-bottom: 1.5rem; transition: opacity 0.3s ease; } |
| .selection-group.disabled { opacity: 0.5; pointer-events: none; } |
| .selection-group h4 { margin-top: 0; margin-bottom: 1rem; font-size: 1.2rem; color: var(--secondary-color); } |
| .selection-chips { display: flex; flex-wrap: wrap; gap: 0.75rem; } |
| .chip { |
| padding: 0.6rem 1.2rem; border: 2px solid var(--border-color); border-radius: 20px; |
| cursor: pointer; transition: all 0.2s ease; font-weight: 500; background-color: #f9f9f9; |
| } |
| .chip:not(.disabled):hover { border-color: var(--primary-dark); background-color: #e3f2fd; } |
| .chip.active { background-color: var(--primary-color); color: white; border-color: var(--primary-color); } |
| .chip.disabled { opacity: 0.6; cursor: not-allowed; background-color: #f0f0f0; border-color: var(--border-color); color: #999; } |
| |
| #generate-btn { |
| background-color: var(--secondary-color); color: white; border: none; padding: 0.8rem 2rem; font-size: 1.1rem; |
| font-weight: 500; border-radius: 6px; cursor: pointer; transition: background-color 0.2s; |
| display: block; margin-top: 2rem; width: 100%; |
| } |
| #generate-btn:hover { background-color: #00695C; } |
| |
| |
| #architecture-diagram-output { |
| display: none; margin-top: 2rem; background-color: #fdfdfd; border: 1px solid var(--border-color); |
| padding: 2rem; border-radius: 8px; text-align: center; |
| } |
| .diagram-title { font-size: 1.5rem; font-weight: 500; margin-bottom: 2rem; } |
| .diagram-stack { display: flex; flex-direction: column; align-items: center; gap: 0.5rem; } |
| .diagram-layer { |
| background-color: var(--card-bg-color); border: 2px solid var(--primary-color); border-radius: 8px; |
| padding: 1.5rem 2.5rem; width: 80%; max-width: 500px; box-shadow: 0 2px 4px rgba(0,0,0,0.05); text-align: center; |
| } |
| .diagram-layer.gen-ai-layer { border-color: var(--genai-color); } |
| .diagram-layer.gen-ai-layer h5 { color: var(--genai-color); } |
| .diagram-layer h5 { margin: 0 0 0.5rem 0; color: var(--primary-dark); font-size: 1.2rem; font-weight: 700; } |
| .diagram-layer p { margin: 0; font-size: 1rem; color: var(--subtle-text-color); } |
| .diagram-arrow { font-family: 'Material Icons'; font-size: 2.5rem; color: var(--primary-color); line-height: 1; } |
| .diagram-arrow.gen-ai-arrow { color: var(--genai-color); } |
| .icon-img-placeholder { |
| height: 32px; |
| max-width: 120px; |
| width: auto; |
| margin-top: 10px; |
| } |
| |
| |
| .tile-container { display: grid; grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); gap: 1.5rem; margin-bottom: 2.5rem; } |
| .tile { background-color: var(--card-bg-color); border: 2px solid var(--border-color); border-radius: 8px; padding: 1.5rem; text-align: center; cursor: pointer; transition: all 0.2s ease; display: flex; flex-direction: column; align-items: center; justify-content: center; min-height: 150px; } |
| .tile:hover { transform: translateY(-5px); box-shadow: var(--tile-hover-shadow); border-color: var(--primary-color); } |
| .tile.active { border-color: var(--primary-color); box-shadow: var(--tile-hover-shadow); background-color: #f0f7ff; } |
| .tile-icon-img { |
| height: 48px; |
| width: auto; |
| max-width: 100%; |
| margin-bottom: 1rem; |
| } |
| .tile h4 { margin: 0; font-size: 1.2rem; color: var(--heading-color); } |
| .content-panel { display: none; background-color: var(--card-bg-color); border-radius: 8px; box-shadow: var(--shadow); padding: 2.5rem; margin-top: 1rem; } |
| .content-panel.active { display: block; } |
| .stack-layer { margin-bottom: 2.5rem; padding-bottom: 1.5rem; border-bottom: 1px solid var(--border-color); } |
| .stack-layer:last-child { border-bottom: none; margin-bottom: 0; } |
| .stack-layer h3 { font-size: 1.6rem; color: var(--secondary-color); margin-top: 0; display: flex; align-items: center; } |
| .stack-layer h3 .material-icons { margin-right: 12px; font-size: 2rem; } |
| details { border: 1px solid var(--border-color); border-radius: 6px; margin-bottom: 1rem; background-color: #f9fafb; } |
| summary { cursor: pointer; padding: 1rem; font-weight: 500; font-size: 1.1rem; list-style: none; display: flex; align-items: center; justify-content: space-between; } |
| pre { background-color: var(--code-bg-color); color: var(--code-text-color); padding: 1.5rem 1rem 1rem 1rem; border-radius: 6px; overflow-x: auto; font-size: 0.9em; position: relative; } |
| code { font-family: 'Courier New', Courier, monospace; } |
| .copy-btn { position: absolute; top: 10px; right: 10px; background-color: #4a505c; color: #fff; border: none; padding: 6px 10px; border-radius: 4px; cursor: pointer; opacity: 0.7; } |
| pre:hover .copy-btn { opacity: 1; } |
| .copy-btn.copied { background-color: var(--primary-dark); } |
| .code-block-header { font-weight: bold; color: var(--subtle-text-color); margin-bottom: -0.5rem; margin-top: 1rem; } |
| </style> |
| </head> |
| <body> |
|
|
| <div class="container"> |
| <header> |
| <h1>MLOps Architecture Builder & Cheatsheet</h1> |
| <p>Design your custom model serving stack using the builder below, or explore detailed deployment guides for common frameworks.</p> |
| </header> |
|
|
| <main> |
| |
| <h2 class="main-section-title"><i class="material-icons">architecture</i>My Architecture</h2> |
| <div id="architecture-builder"> |
| <div class="arch-type-selector"> |
| <div class="arch-type-chip active classic" data-type="classic">Classic ML</div> |
| <div class="arch-type-chip gen-ai" data-type="gen-ai">Generative AI</div> |
| </div> |
|
|
| |
| <div id="classic-builder-fields" class="builder-fields active"> |
| <div class="selection-group" data-group="framework"> |
| <h4>1. ML Framework</h4> |
| <div class="selection-chips"> |
| <div class="chip" data-id="scikit-learn">Scikit-learn</div> |
| <div class="chip" data-id="xgboost">XGBoost</div> |
| <div class="chip" data-id="pytorch">PyTorch</div> |
| <div class="chip" data-id="tensorflow">TensorFlow</div> |
| <div class="chip" data-id="jax">JAX</div> |
| <div class="chip" data-id="keras">Keras</div> |
| </div> |
| </div> |
| <div class="selection-group" data-group="serving"> |
| <h4>2. Serving Container</h4> |
| <div class="selection-chips"> |
| <div class="chip" data-id="kserve">Kubeflow KServe</div> |
| <div class="chip" data-id="ray-serve">Ray Serve</div> |
| <div class="chip" data-id="torchserve">TorchServe</div> |
| <div class="chip" data-id="tf-serving">TF Serving</div> |
| <div class="chip" data-id="triton">NVIDIA Triton</div> |
| <div class="chip" data-id="custom">Custom Container (FastAPI)</div> |
| </div> |
| </div> |
| <div class="selection-group" data-group="orchestration"> |
| <h4>3. Orchestration / Platform</h4> |
| <div class="selection-chips"> |
| <div class="chip active" data-id="kubernetes">Kubernetes</div> |
| <div class="chip" data-id="vertex-ai">Managed: Vertex AI</div> |
| <div class="chip" data-id="sagemaker">Managed: SageMaker</div> |
| </div> |
| </div> |
| <div class="selection-group" data-group="hardware"> |
| <h4>4. Hardware</h4> |
| <div class="selection-chips"> |
| <div class="chip" data-id="vm">VMs (CPU)</div> |
| <div class="chip" data-id="gpu">GPU</div> |
| <div class="chip" data-id="tpu">TPU</div> |
| </div> |
| </div> |
| </div> |
|
|
| |
| <div id="genai-builder-fields" class="builder-fields"> |
| <div class="selection-group" data-group="model-type"> |
| <h4>0. Model Type</h4> |
| <div class="selection-chips"> |
| <div class="chip" data-id="llm">LLM</div> |
| <div class="chip" data-id="vlm">Multimodal LLM (VLM)</div> |
| <div class="chip" data-id="diffusion">Diffusion</div> |
| </div> |
| </div> |
| <div class="selection-group" data-group="framework"> |
| <h4>1. ML Framework</h4> |
| <div class="selection-chips"> |
| <div class="chip" data-id="pytorch">PyTorch</div> |
| <div class="chip" data-id="tensorflow">TensorFlow</div> |
| <div class="chip" data-id="jax">JAX</div> |
| <div class="chip" data-id="keras">Keras</div> |
| </div> |
| </div> |
| <div class="selection-group" data-group="serving"> |
| <h4>2. Serving Container</h4> |
| <div class="selection-chips"> |
| <div class="chip" data-id="vllm">vLLM</div> |
| <div class="chip" data-id="sglang">SGLang</div> |
| <div class="chip" data-id="triton-trt-llm">NVIDIA Triton (TensorRT-LLM)</div> |
| <div class="chip" data-id="custom">Custom Container (Diffusers, etc.)</div> |
| </div> |
| </div> |
| <div class="selection-group" data-group="orchestration"> |
| <h4>3. Orchestration / Platform</h4> |
| <div class="selection-chips"> |
| <div class="chip active" data-id="k8s-ray-kf">Kubernetes (KubeRay/Kubeflow)</div> |
| <div class="chip" data-id="vertex-ai">Managed: Vertex AI</div> |
| <div class="chip" data-id="sagemaker">Managed: SageMaker</div> |
| </div> |
| </div> |
| <div class="selection-group" data-group="hardware"> |
| <h4>4. Hardware</h4> |
| <div class="selection-chips"> |
| <div class="chip" data-id="gpu">GPU</div> |
| <div class="chip" data-id="tpu">TPU</div> |
| </div> |
| </div> |
| </div> |
| |
| <button id="generate-btn">Generate Architecture Diagram</button> |
| </div> |
| |
| <div id="architecture-diagram-output"></div> |
|
|
| <h2 class="main-section-title"><i class="material-icons">menu_book</i>Reference Guides</h2> |
| |
| <h3 class="main-section-title" style="font-size: 1.8rem; border-color: var(--primary-color);"><i class="material-icons" style="color: var(--primary-color);">model_training</i>Classic ML</h3> |
| <div class="tile-container"> |
| <div class="tile" data-target="classic-pytorch"><img src="pytorch.png" class="tile-icon-img" alt="PyTorch Icon"><h4>PyTorch</h4></div> |
| <div class="tile" data-target="classic-tensorflow"><img src="tensorflow.png" class="tile-icon-img" alt="TensorFlow Icon"><h4>TensorFlow</h4></div> |
| <div class="tile" data-target="classic-sklearn"><img src="scikit-learn.png" class="tile-icon-img" alt="Scikit-learn Icon"><h4>Scikit-learn</h4></div> |
| <div class="tile" data-target="classic-xgboost"><img src="xgboost.png" class="tile-icon-img" alt="XGBoost Icon"><h4>XGBoost</h4></div> |
| <div class="tile" data-target="classic-jax"><img src="jax.png" class="tile-icon-img" alt="JAX Icon"><h4>JAX</h4></div> |
| </div> |
|
|
| <h3 class="main-section-title" style="font-size: 1.8rem; border-color: var(--genai-color);"><i class="material-icons" style="color: var(--genai-color);">auto_awesome</i>Generative AI</h3> |
| <div class="tile-container"> |
| <div class="tile" data-target="genai-llm"><img src="llm.png" class="tile-icon-img" alt="LLM Icon"><h4>LLMs</h4></div> |
| <div class="tile" data-target="genai-vlm"><img src="vlm.png" class="tile-icon-img" alt="VLM Icon"><h4>Multimodal (VLMs)</h4></div> |
| <div class="tile" data-target="genai-diffusion"><img src="diffusion.png" class="tile-icon-img" alt="Diffusion Icon"><h4>Diffusion Models</h4></div> |
| </div> |
|
|
| <div class="content-container"> |
| |
| <div id="classic-pytorch" class="content-panel"> |
| <div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3> |
| <p>A simple feed-forward network defined in PyTorch. The model's `state_dict` is saved for deployment.</p> |
| <p class="code-block-header">model_setup.py</p> |
| <pre><code>import torch |
| import torch.nn as nn |
| class SimpleNet(nn.Module): |
| def __init__(self): |
| super(SimpleNet, self).__init__() |
| self.linear = nn.Linear(10, 1) |
| def forward(self, x): return self.linear(x) |
| model = SimpleNet() |
| torch.save(model.state_dict(), "pytorch_model.pth")</code></pre> |
| </div> |
| <div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3> |
| <p>Use a high-performance framework like FastAPI for a custom server. For dedicated solutions, TorchServe is the native choice, while Kubeflow KServe, Ray Serve, and NVIDIA Triton offer powerful, managed abstractions.</p> |
| </div> |
| <div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3> |
| <p>Package the application with a multi-stage Dockerfile and define its runtime with Kubernetes Deployment, Service, and HPA objects. Managed platforms like Vertex AI abstract this away.</p> |
| </div> |
| <div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3> |
| <p><strong>CPUs:</strong> Suitable for small networks. <strong>GPUs:</strong> Essential for deep learning models. <strong>TPUs:</strong> Best for massive-scale inference on GCP.</p> |
| </div> |
| </div> |
| <div id="classic-tensorflow" class="content-panel"> |
| <div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3> |
| <p>A simple Keras model saved in TensorFlow's `SavedModel` format, which bundles the architecture and weights.</p> |
| <p class="code-block-header">model_setup.py</p> |
| <pre><code>import tensorflow as tf |
| model = tf.keras.Sequential([ |
| tf.keras.layers.Dense(10, activation='relu', input_shape=(10,)), |
| tf.keras.layers.Dense(1) |
| ]) |
| model.save("tf_saved_model")</code></pre> |
| </div> |
| <div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3> |
| <p>TF Serving and Kubeflow KServe offer native, high-performance support for the `SavedModel` format. NVIDIA Triton is also highly optimized for TF models. A custom FastAPI server is another flexible option.</p> |
| </div> |
| <div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3> |
| <p>The Kubernetes configuration is very similar to other frameworks. Ensure your Dockerfile copies the entire `tf_saved_model` directory.</p> |
| </div> |
| <div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3> |
| <p><strong>CPUs:</strong> Good for smaller Keras models. <strong>GPUs:</strong> Highly recommended for deep learning models. <strong>TPUs:</strong> The premier choice for running TensorFlow models at scale on GCP.</p> |
| </div> |
| </div> |
| <div id="classic-sklearn" class="content-panel"> |
| <div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3> |
| <p>A classic logistic regression model. Serialization is typically done with `joblib` for efficiency with NumPy structures.</p> |
| <p class="code-block-header">model_setup.py</p> |
| <pre><code>import joblib |
| from sklearn.linear_model import LogisticRegression |
| from sklearn.datasets import make_classification |
| X, y = make_classification(n_features=4) |
| model = LogisticRegression().fit(X, y) |
| joblib.dump(model, "sklearn_model.joblib")</code></pre> |
| </div> |
| <div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3> |
| <p>FastAPI provides a simple and fast web server. Kubeflow KServe and Ray Serve also have native support for scikit-learn models. NVIDIA Triton is an option for CPU-optimized execution using its FIL backend.</p> |
| </div> |
| <div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3> |
| <p>Standard Kubernetes setup. The Docker container will be lightweight as it only needs `scikit-learn`, `joblib`, and `fastapi` for a custom server.</p> |
| </div> |
| <div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3> |
| <p><strong>CPUs:</strong> Almost always sufficient. There is no GPU acceleration for standard scikit-learn algorithms.</p> |
| </div> |
| </div> |
| <div id="classic-xgboost" class="content-panel"> |
| <div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3><p>An XGBoost model saved in its native JSON or UBJ format, which is portable and efficient.</p></div> |
| <div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3><p>Kubeflow KServe, Ray Serve, NVIDIA Triton (with FIL backend), and custom FastAPI servers are all excellent choices.</p></div> |
| <div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3><p>Standard Kubernetes setup. The Dockerfile should include the `xgboost` library.</p></div> |
| <div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3><p><strong>CPUs:</strong> Excellent performance. <strong>GPUs:</strong> XGBoost has optional GPU acceleration which can provide a significant speedup.</p></div> |
| </div> |
| <div id="classic-jax" class="content-panel"> |
| <div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3><p>JAX models are often defined as pure functions with parameters handled separately. We save the parameters using a standard serialization library like Flax's `msgpack`.</p></div> |
| <div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3><p>Ray Serve is an excellent fit for JAX's functional paradigm. A custom FastAPI server is also straightforward. Kubeflow KServe and NVIDIA Triton require a custom container approach wrapping the JAX logic.</p></div> |
| <div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3><p>The Dockerfile needs to install `jax` and `jaxlib` corresponding to the target hardware (CPU, GPU, or TPU).</p></div> |
| <div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3><p><strong>CPUs/GPUs/TPUs:</strong> JAX was designed for accelerators and excels on all of them due to its XLA-based compilation.</p></div> |
| </div> |
| |
| |
| <div id="genai-llm" class="content-panel"> |
| <div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3><p>Large Language Models (e.g., Llama, Mistral) are based on the Transformer architecture. The key inference challenge is managing the <strong>KV Cache</strong>.</p></div> |
| <div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3><p>Specialized serving toolkits like <strong>vLLM</strong>, <strong>SGLang</strong>, or <strong>NVIDIA Triton</strong> with its TensorRT-LLM backend are required for efficient inference, handling complexities like continuous batching and paged attention.</p></div> |
| <div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3><p>Kubernetes (often with KubeRay) is used to manage GPU resources and schedule serving pods. Managed services like Vertex AI and SageMaker also provide optimized runtimes for popular LLMs.</p></div> |
| <div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3><p><strong>GPUs:</strong> Essential. High-VRAM GPUs like NVIDIA A100 or H100 are required to fit the model weights and KV cache. <strong>TPUs:</strong> Viable for specific models, especially on GCP.</p></div> |
| </div> |
| <div id="genai-vlm" class="content-panel"> |
| <div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3><p>Visual Large Models (e.g., LLaVA, IDEFICS) combine a vision encoder (like ViT) with an LLM to process images and text.</p></div> |
| <div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3><p>The stack must handle multi-modal inputs. Frameworks like <strong>vLLM</strong> and <strong>SGLang</strong> are adding native support for VLMs. A custom container is often needed to handle the specific image preprocessing logic.</p></div> |
| <div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3><p>Similar to LLMs, requires robust orchestration to manage high-resource GPU pods and potentially large input payloads.</p></div> |
| <div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3><p><strong>GPUs:</strong> High-VRAM GPUs are mandatory due to the combined size of the vision encoder, LLM, and KV cache.</p></div> |
| </div> |
| <div id="genai-diffusion" class="content-panel"> |
| <div class="stack-layer"><h3><i class="material-icons">psychology</i>Model Layer</h3><p>Diffusion models (e.g., Stable Diffusion) generate images through an iterative denoising process, making latency a key challenge.</p></div> |
| <div class="stack-layer"><h3><i class="material-icons">layers</i>Serving Stack Layer</h3><p>Optimizations focus on reducing latency. Key tools include model compilers like <strong>TensorRT</strong> (often used with NVIDIA Triton), techniques like <strong>Latent Consistency Models (LCMs)</strong>, and libraries like <strong>Diffusers</strong>, typically wrapped in a custom FastAPI container.</p></div> |
| <div class="stack-layer"><h3><i class="material-icons">cloud_queue</i>Orchestration Layer</h3><p>Kubernetes or managed platforms are used to serve the GPU-intensive workload. Autoscaling is critical to handle bursty traffic patterns.</p></div> |
| <div class="stack-layer"><h3><i class="material-icons">memory</i>Hardware Layer</h3><p><strong>GPUs:</strong> High-end consumer or datacenter GPUs are needed for acceptable generation speeds. VRAM is the most critical resource, dictating max resolution and batch size.</p></div> |
| </div> |
| </div> |
| </main> |
| </div> |
|
|
| <script> |
| document.addEventListener('DOMContentLoaded', function() { |
| const builder = document.getElementById('architecture-builder'); |
| const generateBtn = document.getElementById('generate-btn'); |
| const diagramOutput = document.getElementById('architecture-diagram-output'); |
| |
| const archTypeSelector = builder.querySelector('.arch-type-selector'); |
| const classicFields = document.getElementById('classic-builder-fields'); |
| const genaiFields = document.getElementById('genai-builder-fields'); |
| |
| function updateChipStates() { |
| const activeArchType = archTypeSelector.querySelector('.active').dataset.type; |
| const activeBuilderFields = (activeArchType === 'classic') ? classicFields : genaiFields; |
| |
| if (activeArchType === 'classic') { |
| const activeFramework = activeBuilderFields.querySelector('.selection-group[data-group="framework"] .chip.active'); |
| const torchserveChip = activeBuilderFields.querySelector('.chip[data-id="torchserve"]'); |
| const tfservingChip = activeBuilderFields.querySelector('.chip[data-id="tf-serving"]'); |
| |
| [torchserveChip, tfservingChip].forEach(c => c.classList.remove('disabled')); |
| |
| if (activeFramework) { |
| const frameworkId = activeFramework.dataset.id; |
| const nonTfTsFrameworks = ['scikit-learn', 'xgboost', 'jax']; |
| if (frameworkId === 'pytorch') { |
| tfservingChip.classList.add('disabled'); |
| if(tfservingChip.classList.contains('active')) tfservingChip.classList.remove('active'); |
| } else if (frameworkId === 'tensorflow') { |
| torchserveChip.classList.add('disabled'); |
| if(torchserveChip.classList.contains('active')) torchserveChip.classList.remove('active'); |
| } else if (nonTfTsFrameworks.includes(frameworkId)) { |
| [torchserveChip, tfservingChip].forEach(c => { |
| c.classList.add('disabled'); |
| if(c.classList.contains('active')) c.classList.remove('active'); |
| }); |
| } |
| } |
| } else { |
| const activeModelType = activeBuilderFields.querySelector('.selection-group[data-group="model-type"] .chip.active'); |
| const vllmChip = activeBuilderFields.querySelector('.chip[data-id="vllm"]'); |
| const sglangChip = activeBuilderFields.querySelector('.chip[data-id="sglang"]'); |
| |
| [vllmChip, sglangChip].forEach(c => c.classList.remove('disabled')); |
| |
| if (activeModelType && activeModelType.dataset.id === 'diffusion') { |
| [vllmChip, sglangChip].forEach(c => { |
| c.classList.add('disabled'); |
| if(c.classList.contains('active')) c.classList.remove('active'); |
| }); |
| } |
| } |
| |
| const activeOrchestration = activeBuilderFields.querySelector('.selection-group[data-group="orchestration"] .chip.active'); |
| const servingGroup = activeBuilderFields.querySelector('.selection-group[data-group="serving"]'); |
| |
| if (activeOrchestration && (activeOrchestration.dataset.id === 'vertex-ai' || activeOrchestration.dataset.id === 'sagemaker')) { |
| servingGroup.classList.add('disabled'); |
| servingGroup.querySelector('.chip.active')?.classList.remove('active'); |
| } else { |
| servingGroup.classList.remove('disabled'); |
| } |
| } |
| |
| archTypeSelector.addEventListener('click', function(e){ |
| if (!e.target.classList.contains('arch-type-chip')) return; |
| archTypeSelector.querySelectorAll('.arch-type-chip').forEach(c => c.classList.remove('active')); |
| e.target.classList.add('active'); |
| const type = e.target.dataset.type; |
| classicFields.classList.toggle('active', type === 'classic'); |
| genaiFields.classList.toggle('active', type === 'gen-ai'); |
| diagramOutput.style.display = 'none'; |
| updateChipStates(); |
| }); |
| |
| builder.addEventListener('click', function(e) { |
| if (!e.target.classList.contains('chip') || e.target.classList.contains('disabled')) return; |
| const chip = e.target; |
| const group = chip.closest('.selection-group'); |
| if (group.classList.contains('disabled')) return; |
| group.querySelectorAll('.chip').forEach(c => c.classList.remove('active')); |
| chip.classList.add('active'); |
| updateChipStates(); |
| }); |
| |
| generateBtn.addEventListener('click', function() { |
| const activeArchType = archTypeSelector.querySelector('.active').dataset.type; |
| const activeBuilderFields = document.querySelector('.builder-fields.active'); |
| const selections = {}; |
| let allSelected = true; |
| |
| const isManaged = activeBuilderFields.querySelector('.selection-group[data-group="orchestration"] .chip.active')?.dataset.id.includes('vertex') || |
| activeBuilderFields.querySelector('.selection-group[data-group="orchestration"] .chip.active')?.dataset.id.includes('sagemaker'); |
| |
| activeBuilderFields.querySelectorAll('.selection-group').forEach(group => { |
| const groupKey = group.dataset.group; |
| if (isManaged && groupKey === 'serving') return; |
| |
| const activeChip = group.querySelector('.chip.active'); |
| if (activeChip) { |
| selections[groupKey] = { name: activeChip.innerText, id: activeChip.dataset.id }; |
| } else { |
| allSelected = false; |
| } |
| }); |
| |
| if (!allSelected) { |
| alert('Please make a selection for each required layer.'); |
| return; |
| } |
| |
| let diagramHtml = `<h3 class="diagram-title">Your Custom ${activeArchType === 'gen-ai' ? 'Generative AI' : 'Classic ML'} Architecture</h3><div class="diagram-stack">`; |
| const arrowClass = activeArchType === 'gen-ai' ? 'gen-ai-arrow' : ''; |
| const layerClass = activeArchType === 'gen-ai' ? 'gen-ai-layer' : ''; |
| |
| function createImageTag(selection) { |
| return `<img src="${selection.id}.png" alt="${selection.name} Icon" class="icon-img-placeholder">`; |
| } |
| |
| if (activeArchType === 'gen-ai') { |
| diagramHtml += `<div class="diagram-layer ${layerClass}"><h5>${selections['model-type'].name}</h5><p>Model Type</p>${createImageTag(selections['model-type'])}</div><div class="diagram-arrow ${arrowClass}">south</div>`; |
| } |
| |
| diagramHtml += `<div class="diagram-layer ${layerClass}"><h5>${selections.framework.name}</h5><p>ML Framework</p>${createImageTag(selections.framework)}</div><div class="diagram-arrow ${arrowClass}">south</div>`; |
| |
| if (isManaged) { |
| diagramHtml += `<div class="diagram-layer ${layerClass}"><h5>${selections.orchestration.name}</h5><p>Managed Platform</p>${createImageTag(selections.orchestration)}</div><div class="diagram-arrow ${arrowClass}">south</div>`; |
| } else { |
| diagramHtml += `<div class="diagram-layer ${layerClass}"><h5>${selections.serving.name}</h5><p>Serving Container</p>${createImageTag(selections.serving)}</div><div class="diagram-arrow ${arrowClass}">south</div>`; |
| diagramHtml += `<div class="diagram-layer ${layerClass}"><h5>${selections.orchestration.name}</h5><p>Orchestration</p>${createImageTag(selections.orchestration)}</div><div class="diagram-arrow ${arrowClass}">south</div>`; |
| } |
| |
| diagramHtml += `<div class="diagram-layer ${layerClass}"><h5>${selections.hardware.name}</h5><p>Hardware</p>${createImageTag(selections.hardware)}</div>`; |
| diagramHtml += `</div>`; |
| |
| diagramOutput.innerHTML = diagramHtml; |
| diagramOutput.style.display = 'block'; |
| diagramOutput.scrollIntoView({ behavior: 'smooth', block: 'center' }); |
| }); |
| |
| const tiles = document.querySelectorAll('.tile'); |
| const contentPanels = document.querySelectorAll('.content-panel'); |
| tiles.forEach(tile => tile.addEventListener('click', (e) => { |
| const targetId = e.currentTarget.dataset.target; |
| tiles.forEach(t => t.classList.remove('active')); |
| e.currentTarget.classList.add('active'); |
| contentPanels.forEach(p => p.classList.remove('active')); |
| const panel = document.getElementById(targetId); |
| if (panel) { |
| panel.classList.add('active'); |
| panel.scrollIntoView({ behavior: 'smooth', block: 'start' }); |
| } |
| })); |
| |
| document.querySelectorAll('pre code').forEach(codeBlock => { |
| const pre = codeBlock.parentElement; |
| if (!pre.querySelector('.copy-btn')) { |
| const copyButton = document.createElement('button'); |
| copyButton.innerText = 'Copy'; |
| copyButton.className = 'copy-btn'; |
| pre.appendChild(copyButton); |
| copyButton.addEventListener('click', (e) => { |
| e.stopPropagation(); |
| navigator.clipboard.writeText(codeBlock.innerText).then(() => { |
| copyButton.innerText = 'Copied!'; |
| copyButton.classList.add('copied'); |
| setTimeout(() => { copyButton.innerText = 'Copy'; copyButton.classList.remove('copied'); }, 2000); |
| }); |
| }); |
| } |
| }); |
| updateChipStates(); |
| }); |
| </script> |
| </body> |
| </html> |