#!/usr/bin/env python3 """ #301: On-device readiness checker — a Gradio Space that evaluates whether a given model will run on a mobile device. Paste a HuggingFace model ID or upload a config, get a "will it run on a phone?" report: - Parameter count vs memory budget - Architecture compatibility - Quantization recommendations - Estimated phone farm performance - Recommended dispatchAI model alternatives """ import gradio as gr import json import requests from huggingface_hub import hf_hub_download, HfApi import os token = os.environ.get("HF_TOKEN", "") # Phone farm specs PHONE_SPECS = { "Samsung S20 FE (Snapdragon 865, 8GB)": { "chipset": "Snapdragon 865", "ram_gb": 8, "usable_ram_gb": 6, # After OS overhead "cpu_cores": 8, "max_model_size_gb": 4, # Safe limit for 8GB phone }, "Samsung S23 (Snapdragon 8 Gen 2, 8GB)": { "chipset": "Snapdragon 8 Gen 2", "ram_gb": 8, "usable_ram_gb": 6, "cpu_cores": 8, "max_model_size_gb": 4, }, "iPhone 15 Pro (A17 Pro, 8GB)": { "chipset": "Apple A17 Pro", "ram_gb": 8, "usable_ram_gb": 6, "cpu_cores": 6, "max_model_size_gb": 4, }, "Budget Android (4GB RAM)": { "chipset": "Mid-range", "ram_gb": 4, "usable_ram_gb": 3, "cpu_cores": 8, "max_model_size_gb": 2, }, } # dispatchAI model catalog for recommendations DISPATCHAI_MODELS = [ {"id": "dispatchAI/SmolLM2-135M-Instruct-mobile", "params_m": 135, "size_mb": 270, "task": "chat"}, {"id": "dispatchAI/SmolLM2-360M-Instruct-mobile", "params_m": 360, "size_mb": 720, "task": "chat"}, {"id": "dispatchAI/Qwen2.5-0.5B-Instruct-mobile-int4", "params_m": 500, "size_mb": 350, "task": "chat"}, {"id": "dispatchAI/Qwen2.5-0.5B-Coder-mobile", "params_m": 500, "size_mb": 350, "task": "code"}, {"id": "dispatchAI/Llama-3.2-1B-Instruct-mobile", "params_m": 1000, "size_mb": 2000, "task": "chat"}, {"id": "dispatchAI/TinyLlama-1.1B-Chat-Q5-mobile", "params_m": 1100, "size_mb": 450, "task": "chat"}, {"id": "dispatchAI/Qwen2.5-1.5B-Instruct-Q5-mobile", "params_m": 1500, "size_mb": 900, "task": "chat"}, {"id": "dispatchAI/Gemma-2-2B-IT-Q5-mobile", "params_m": 2000, "size_mb": 1300, "task": "chat"}, {"id": "dispatchAI/Phi-3.5-mini-instruct-Q5-mobile", "params_m": 2000, "size_mb": 1300, "task": "chat"}, {"id": "dispatchAI/Gemma-2B-Arabic-mobile", "params_m": 2000, "size_mb": 1300, "task": "arabic"}, ] def fetch_model_info(model_id): """Fetch config.json from HuggingFace.""" try: config_path = hf_hub_download(model_id, "config.json", token=token) with open(config_path, "r") as f: config = json.load(f) # Try to get model size from safetensors api = HfApi(token=token) files = api.list_repo_files(model_id, token=token) size_mb = 0 for f in files: if f.endswith(".safetensors") or f.endswith(".bin") or f.endswith(".gguf"): try: info = api.get_paths_info(model_id, [f], repo_type="model", token=token) if info and hasattr(info[0], 'size'): size_mb += info[0].size / 1e6 except: pass return config, size_mb except Exception as e: return None, str(e) def estimate_params(config): """Estimate parameter count from config.""" try: hidden = config.get("hidden_size", 0) layers = config.get("num_hidden_layers", config.get("num_layers", 0)) vocab = config.get("vocab_size", 0) intermediate = config.get("intermediate_size", hidden * 4) # Rough estimate: transformers params # Attention: 4 * hidden^2 per layer (Q, K, V, O) # MLP: 2 * hidden * intermediate per layer # Embeddings: vocab * hidden attention_params = 4 * hidden * hidden * layers mlp_params = 2 * hidden * intermediate * layers embed_params = vocab * hidden total = attention_params + mlp_params + embed_params return total / 1e6 # in millions except: return 0 def check_readiness(model_id, target_device): """Check if a model will run on the target device.""" if not model_id.strip(): return "Please enter a HuggingFace model ID." config_result = fetch_model_info(model_id.strip()) if isinstance(config_result[1], str) and not config_result[0]: return f"❌ **Error fetching model info**: {config_result[1]}\n\nCheck the model ID and try again." config, size_mb = config_result if not config: return f"❌ Could not fetch config for `{model_id}`" specs = PHONE_SPECS.get(target_device, PHONE_SPECS["Samsung S20 FE (Snapdragon 865, 8GB)"]) # Estimate parameters params_m = estimate_params(config) model_type = config.get("model_type", "unknown") hidden_size = config.get("hidden_size", 0) num_layers = config.get("num_hidden_layers", 0) # If we couldn't get size from API, estimate it if size_mb == 0: size_mb = params_m * 2 # fp16: 2 bytes per param # Estimates for different quantizations size_fp16_mb = params_m * 2 size_q8_mb = params_m * 1 size_q5_mb = params_m * 0.625 size_q4_mb = params_m * 0.5 # Phone farm performance estimate (based on real benchmarks) # S20 FE: ~18 t/s for 135M, ~10 t/s for 500M, ~6 t/s for 1B, ~3 t/s for 2B if params_m < 200: est_tps = "15-20 t/s" rating = "🟢 Excellent" elif params_m < 600: est_tps = "8-12 t/s" rating = "🟢 Good" elif params_m < 1200: est_tps = "5-7 t/s" rating = "🟡 Usable" elif params_m < 2500: est_tps = "2-4 t/s" rating = "🟠 Slow" else: est_tps = "< 2 t/s" rating = "🔴 Too large" # Memory check fits_fp16 = size_fp16_mb < specs["max_model_size_gb"] * 1024 fits_q5 = size_q5_mb < specs["max_model_size_gb"] * 1024 fits_q4 = size_q4_mb < specs["max_model_size_gb"] * 1024 # Find recommended dispatchAI alternatives recommendations = [] for m in DISPATCHAI_MODELS: if m["params_m"] <= params_m * 1.2 and m["params_m"] >= params_m * 0.5: recommendations.append(m) if not recommendations: # Find closest smaller model smaller = [m for m in DISPATCHAI_MODELS if m["params_m"] < params_m] if smaller: recommendations = sorted(smaller, key=lambda x: x["params_m"], reverse=True)[:3] rec_text = "\n".join([f"- [`{m['id']}`](https://huggingface.co/{m['id']}) — {m['params_m']}M params, {m['size_mb']}MB" for m in recommendations[:5]]) report = f"""## 📱 On-Device Readiness Report ### Model: `{model_id}` | Property | Value | |----------|-------| | Architecture | {model_type} | | Hidden size | {hidden_size} | | Layers | {num_layers} | | Estimated params | ~{params_m:.0f}M | ### Size estimates by quantization | Format | Size | Fits {target_device.split('(')[0].strip()}? | |--------|------|------| | FP16 | {size_fp16_mb:.0f}MB | {"✅" if fits_fp16 else "❌"} | | Q8 | {size_q8_mb:.0f}MB | {"✅" if fits_q8 else "❌"} | | Q5_K_M | {size_q5_mb:.0f}MB | {"✅" if fits_q5 else "❌"} | | Q4_K_M | {size_q4_mb:.0f}MB | {"✅" if fits_q4 else "❌"} | ### Performance estimate (Snapdragon 865) | Metric | Value | |--------|-------| | Estimated speed | {est_tps} | | Readiness | {rating} | ### Target device: {target_device} | Property | Value | |----------|-------| | Chipset | {specs['chipset']} | | RAM | {specs['ram_gb']}GB | | Max model size | {specs['max_model_size_gb']}GB | ### Recommended dispatchAI alternatives {rec_text if rec_text else "No close matches found."} ### Recommendation """ if "🟢" in rating: report += "✅ **This model is ready for mobile deployment.** Use Q4_K_M or Q5_K_M GGUF for best size/quality balance." elif "🟡" in rating: report += "⚠️ **This model is usable but may be slow.** Consider Q4_K_M quantization and test on target hardware." elif "🟠" in rating: report += "⚠️ **This model will be slow on mobile.** Consider a smaller alternative from dispatchAI." else: report += "❌ **This model is too large for mobile deployment.** Use a dispatchAI alternative above." return report # Custom CSS custom_css = """ .gradio-container { background: #0A0F1A !important; color: #F5F7FA !important; } h1, h2, h3 { color: #1FE0E6 !important; } .gr-button { background: linear-gradient(135deg, #2E6BFF, #1FE0E6) !important; color: #0A0F1A !important; } """ with gr.Blocks(css=custom_css, title="On-Device Readiness Checker") as demo: gr.Markdown(""" # 📱 On-Device Readiness Checker **Will your model run on a phone?** Paste a HuggingFace model ID and find out. Powered by [dispatchAI](https://huggingface.co/dispatchAI) — mobile AI that runs. """) with gr.Row(): model_input = gr.Textbox( label="HuggingFace Model ID", placeholder="e.g., Qwen/Qwen2.5-0.5B-Instruct", scale=3 ) device_input = gr.Dropdown( choices=list(PHONE_SPECS.keys()), value="Samsung S20 FE (Snapdragon 865, 8GB)", label="Target Device", scale=2 ) check_btn = gr.Button("Check Readiness", variant="primary", scale=1) report_output = gr.Markdown(label="Readiness Report") check_btn.click( fn=check_readiness, inputs=[model_input, device_input], outputs=report_output ) gr.Markdown(""" --- ### How it works 1. Fetches the model's `config.json` from HuggingFace 2. Estimates parameter count and size for each quantization level 3. Compares against the target device's memory budget 4. Estimates inference speed based on real phone farm benchmarks 5. Recommends dispatchAI mobile-optimized alternatives ### Try these models - `Qwen/Qwen2.5-0.5B-Instruct` — small model, should pass - `Qwen/Qwen2.5-7B-Instruct` — large model, should fail - `meta-llama/Llama-3.2-1B-Instruct` — borderline - `HuggingFaceTB/SmolLM2-135M-Instruct` — tiny, excellent --- *Dispatch AI (FZE), Sharjah SRTI Free Zone, License No. 10818.* """) if __name__ == "__main__": demo.launch()