Spaces:
Runtime error
Runtime error
File size: 10,605 Bytes
153626c 48a542a 153626c 48a542a 153626c 48a542a 153626c 48a542a 153626c 48a542a 153626c 48a542a 153626c 48a542a 153626c 48a542a 153626c 48a542a 153626c 48a542a 153626c 48a542a 153626c 48a542a 153626c 48a542a 153626c 48a542a 153626c 48a542a 153626c 48a542a 153626c 48a542a 153626c 48a542a 153626c 48a542a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 | #!/usr/bin/env python3
"""
#301: On-device readiness checker β a Gradio Space that evaluates whether a
given model will run on a mobile device.
Paste a HuggingFace model ID or upload a config, get a "will it run on a phone?" report:
- Parameter count vs memory budget
- Architecture compatibility
- Quantization recommendations
- Estimated phone farm performance
- Recommended dispatchAI model alternatives
"""
import gradio as gr
import json
import requests
from huggingface_hub import hf_hub_download, HfApi
import os
token = os.environ.get("HF_TOKEN", "")
# Phone farm specs
PHONE_SPECS = {
"Samsung S20 FE (Snapdragon 865, 8GB)": {
"chipset": "Snapdragon 865",
"ram_gb": 8,
"usable_ram_gb": 6, # After OS overhead
"cpu_cores": 8,
"max_model_size_gb": 4, # Safe limit for 8GB phone
},
"Samsung S23 (Snapdragon 8 Gen 2, 8GB)": {
"chipset": "Snapdragon 8 Gen 2",
"ram_gb": 8,
"usable_ram_gb": 6,
"cpu_cores": 8,
"max_model_size_gb": 4,
},
"iPhone 15 Pro (A17 Pro, 8GB)": {
"chipset": "Apple A17 Pro",
"ram_gb": 8,
"usable_ram_gb": 6,
"cpu_cores": 6,
"max_model_size_gb": 4,
},
"Budget Android (4GB RAM)": {
"chipset": "Mid-range",
"ram_gb": 4,
"usable_ram_gb": 3,
"cpu_cores": 8,
"max_model_size_gb": 2,
},
}
# dispatchAI model catalog for recommendations
DISPATCHAI_MODELS = [
{"id": "dispatchAI/SmolLM2-135M-Instruct-mobile", "params_m": 135, "size_mb": 270, "task": "chat"},
{"id": "dispatchAI/SmolLM2-360M-Instruct-mobile", "params_m": 360, "size_mb": 720, "task": "chat"},
{"id": "dispatchAI/Qwen2.5-0.5B-Instruct-mobile-int4", "params_m": 500, "size_mb": 350, "task": "chat"},
{"id": "dispatchAI/Qwen2.5-0.5B-Coder-mobile", "params_m": 500, "size_mb": 350, "task": "code"},
{"id": "dispatchAI/Llama-3.2-1B-Instruct-mobile", "params_m": 1000, "size_mb": 2000, "task": "chat"},
{"id": "dispatchAI/TinyLlama-1.1B-Chat-Q5-mobile", "params_m": 1100, "size_mb": 450, "task": "chat"},
{"id": "dispatchAI/Qwen2.5-1.5B-Instruct-Q5-mobile", "params_m": 1500, "size_mb": 900, "task": "chat"},
{"id": "dispatchAI/Gemma-2-2B-IT-Q5-mobile", "params_m": 2000, "size_mb": 1300, "task": "chat"},
{"id": "dispatchAI/Phi-3.5-mini-instruct-Q5-mobile", "params_m": 2000, "size_mb": 1300, "task": "chat"},
{"id": "dispatchAI/Gemma-2B-Arabic-mobile", "params_m": 2000, "size_mb": 1300, "task": "arabic"},
]
def fetch_model_info(model_id):
"""Fetch config.json from HuggingFace."""
try:
config_path = hf_hub_download(model_id, "config.json", token=token)
with open(config_path, "r") as f:
config = json.load(f)
# Try to get model size from safetensors
api = HfApi(token=token)
files = api.list_repo_files(model_id, token=token)
size_mb = 0
for f in files:
if f.endswith(".safetensors") or f.endswith(".bin") or f.endswith(".gguf"):
try:
info = api.get_paths_info(model_id, [f], repo_type="model", token=token)
if info and hasattr(info[0], 'size'):
size_mb += info[0].size / 1e6
except:
pass
return config, size_mb
except Exception as e:
return None, str(e)
def estimate_params(config):
"""Estimate parameter count from config."""
try:
hidden = config.get("hidden_size", 0)
layers = config.get("num_hidden_layers", config.get("num_layers", 0))
vocab = config.get("vocab_size", 0)
intermediate = config.get("intermediate_size", hidden * 4)
# Rough estimate: transformers params
# Attention: 4 * hidden^2 per layer (Q, K, V, O)
# MLP: 2 * hidden * intermediate per layer
# Embeddings: vocab * hidden
attention_params = 4 * hidden * hidden * layers
mlp_params = 2 * hidden * intermediate * layers
embed_params = vocab * hidden
total = attention_params + mlp_params + embed_params
return total / 1e6 # in millions
except:
return 0
def check_readiness(model_id, target_device):
"""Check if a model will run on the target device."""
if not model_id.strip():
return "Please enter a HuggingFace model ID."
config_result = fetch_model_info(model_id.strip())
if isinstance(config_result[1], str) and not config_result[0]:
return f"β **Error fetching model info**: {config_result[1]}\n\nCheck the model ID and try again."
config, size_mb = config_result
if not config:
return f"β Could not fetch config for `{model_id}`"
specs = PHONE_SPECS.get(target_device, PHONE_SPECS["Samsung S20 FE (Snapdragon 865, 8GB)"])
# Estimate parameters
params_m = estimate_params(config)
model_type = config.get("model_type", "unknown")
hidden_size = config.get("hidden_size", 0)
num_layers = config.get("num_hidden_layers", 0)
# If we couldn't get size from API, estimate it
if size_mb == 0:
size_mb = params_m * 2 # fp16: 2 bytes per param
# Estimates for different quantizations
size_fp16_mb = params_m * 2
size_q8_mb = params_m * 1
size_q5_mb = params_m * 0.625
size_q4_mb = params_m * 0.5
# Phone farm performance estimate (based on real benchmarks)
# S20 FE: ~18 t/s for 135M, ~10 t/s for 500M, ~6 t/s for 1B, ~3 t/s for 2B
if params_m < 200:
est_tps = "15-20 t/s"
rating = "π’ Excellent"
elif params_m < 600:
est_tps = "8-12 t/s"
rating = "π’ Good"
elif params_m < 1200:
est_tps = "5-7 t/s"
rating = "π‘ Usable"
elif params_m < 2500:
est_tps = "2-4 t/s"
rating = "π Slow"
else:
est_tps = "< 2 t/s"
rating = "π΄ Too large"
# Memory check
fits_fp16 = size_fp16_mb < specs["max_model_size_gb"] * 1024
fits_q5 = size_q5_mb < specs["max_model_size_gb"] * 1024
fits_q4 = size_q4_mb < specs["max_model_size_gb"] * 1024
# Find recommended dispatchAI alternatives
recommendations = []
for m in DISPATCHAI_MODELS:
if m["params_m"] <= params_m * 1.2 and m["params_m"] >= params_m * 0.5:
recommendations.append(m)
if not recommendations:
# Find closest smaller model
smaller = [m for m in DISPATCHAI_MODELS if m["params_m"] < params_m]
if smaller:
recommendations = sorted(smaller, key=lambda x: x["params_m"], reverse=True)[:3]
rec_text = "\n".join([f"- [`{m['id']}`](https://huggingface.co/{m['id']}) β {m['params_m']}M params, {m['size_mb']}MB"
for m in recommendations[:5]])
report = f"""## π± On-Device Readiness Report
### Model: `{model_id}`
| Property | Value |
|----------|-------|
| Architecture | {model_type} |
| Hidden size | {hidden_size} |
| Layers | {num_layers} |
| Estimated params | ~{params_m:.0f}M |
### Size estimates by quantization
| Format | Size | Fits {target_device.split('(')[0].strip()}? |
|--------|------|------|
| FP16 | {size_fp16_mb:.0f}MB | {"β
" if fits_fp16 else "β"} |
| Q8 | {size_q8_mb:.0f}MB | {"β
" if fits_q8 else "β"} |
| Q5_K_M | {size_q5_mb:.0f}MB | {"β
" if fits_q5 else "β"} |
| Q4_K_M | {size_q4_mb:.0f}MB | {"β
" if fits_q4 else "β"} |
### Performance estimate (Snapdragon 865)
| Metric | Value |
|--------|-------|
| Estimated speed | {est_tps} |
| Readiness | {rating} |
### Target device: {target_device}
| Property | Value |
|----------|-------|
| Chipset | {specs['chipset']} |
| RAM | {specs['ram_gb']}GB |
| Max model size | {specs['max_model_size_gb']}GB |
### Recommended dispatchAI alternatives
{rec_text if rec_text else "No close matches found."}
### Recommendation
"""
if "π’" in rating:
report += "β
**This model is ready for mobile deployment.** Use Q4_K_M or Q5_K_M GGUF for best size/quality balance."
elif "π‘" in rating:
report += "β οΈ **This model is usable but may be slow.** Consider Q4_K_M quantization and test on target hardware."
elif "π " in rating:
report += "β οΈ **This model will be slow on mobile.** Consider a smaller alternative from dispatchAI."
else:
report += "β **This model is too large for mobile deployment.** Use a dispatchAI alternative above."
return report
# Custom CSS
custom_css = """
.gradio-container { background: #0A0F1A !important; color: #F5F7FA !important; }
h1, h2, h3 { color: #1FE0E6 !important; }
.gr-button { background: linear-gradient(135deg, #2E6BFF, #1FE0E6) !important; color: #0A0F1A !important; }
"""
with gr.Blocks(css=custom_css, title="On-Device Readiness Checker") as demo:
gr.Markdown("""
# π± On-Device Readiness Checker
**Will your model run on a phone?** Paste a HuggingFace model ID and find out.
Powered by [dispatchAI](https://huggingface.co/dispatchAI) β mobile AI that runs.
""")
with gr.Row():
model_input = gr.Textbox(
label="HuggingFace Model ID",
placeholder="e.g., Qwen/Qwen2.5-0.5B-Instruct",
scale=3
)
device_input = gr.Dropdown(
choices=list(PHONE_SPECS.keys()),
value="Samsung S20 FE (Snapdragon 865, 8GB)",
label="Target Device",
scale=2
)
check_btn = gr.Button("Check Readiness", variant="primary", scale=1)
report_output = gr.Markdown(label="Readiness Report")
check_btn.click(
fn=check_readiness,
inputs=[model_input, device_input],
outputs=report_output
)
gr.Markdown("""
---
### How it works
1. Fetches the model's `config.json` from HuggingFace
2. Estimates parameter count and size for each quantization level
3. Compares against the target device's memory budget
4. Estimates inference speed based on real phone farm benchmarks
5. Recommends dispatchAI mobile-optimized alternatives
### Try these models
- `Qwen/Qwen2.5-0.5B-Instruct` β small model, should pass
- `Qwen/Qwen2.5-7B-Instruct` β large model, should fail
- `meta-llama/Llama-3.2-1B-Instruct` β borderline
- `HuggingFaceTB/SmolLM2-135M-Instruct` β tiny, excellent
---
*Dispatch AI (FZE), Sharjah SRTI Free Zone, License No. 10818.*
""")
if __name__ == "__main__":
demo.launch()
|