File size: 10,605 Bytes
153626c
 
 
 
 
 
 
 
 
 
 
 
48a542a
153626c
 
 
 
48a542a
153626c
48a542a
153626c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48a542a
 
153626c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48a542a
153626c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48a542a
153626c
48a542a
153626c
 
 
 
 
48a542a
153626c
 
 
48a542a
153626c
 
 
 
 
48a542a
153626c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48a542a
153626c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48a542a
153626c
48a542a
153626c
 
 
48a542a
 
 
153626c
 
 
 
 
 
 
 
 
 
 
 
48a542a
153626c
48a542a
153626c
 
 
 
 
48a542a
 
 
153626c
 
 
 
 
 
 
48a542a
153626c
 
 
 
 
 
 
 
 
48a542a
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
#!/usr/bin/env python3
"""
#301: On-device readiness checker β€” a Gradio Space that evaluates whether a
given model will run on a mobile device.

Paste a HuggingFace model ID or upload a config, get a "will it run on a phone?" report:
- Parameter count vs memory budget
- Architecture compatibility
- Quantization recommendations
- Estimated phone farm performance
- Recommended dispatchAI model alternatives
"""
import gradio as gr
import json
import requests
from huggingface_hub import hf_hub_download, HfApi
import os

token = os.environ.get("HF_TOKEN", "")

# Phone farm specs
PHONE_SPECS = {
    "Samsung S20 FE (Snapdragon 865, 8GB)": {
        "chipset": "Snapdragon 865",
        "ram_gb": 8,
        "usable_ram_gb": 6,  # After OS overhead
        "cpu_cores": 8,
        "max_model_size_gb": 4,  # Safe limit for 8GB phone
    },
    "Samsung S23 (Snapdragon 8 Gen 2, 8GB)": {
        "chipset": "Snapdragon 8 Gen 2",
        "ram_gb": 8,
        "usable_ram_gb": 6,
        "cpu_cores": 8,
        "max_model_size_gb": 4,
    },
    "iPhone 15 Pro (A17 Pro, 8GB)": {
        "chipset": "Apple A17 Pro",
        "ram_gb": 8,
        "usable_ram_gb": 6,
        "cpu_cores": 6,
        "max_model_size_gb": 4,
    },
    "Budget Android (4GB RAM)": {
        "chipset": "Mid-range",
        "ram_gb": 4,
        "usable_ram_gb": 3,
        "cpu_cores": 8,
        "max_model_size_gb": 2,
    },
}

# dispatchAI model catalog for recommendations
DISPATCHAI_MODELS = [
    {"id": "dispatchAI/SmolLM2-135M-Instruct-mobile", "params_m": 135, "size_mb": 270, "task": "chat"},
    {"id": "dispatchAI/SmolLM2-360M-Instruct-mobile", "params_m": 360, "size_mb": 720, "task": "chat"},
    {"id": "dispatchAI/Qwen2.5-0.5B-Instruct-mobile-int4", "params_m": 500, "size_mb": 350, "task": "chat"},
    {"id": "dispatchAI/Qwen2.5-0.5B-Coder-mobile", "params_m": 500, "size_mb": 350, "task": "code"},
    {"id": "dispatchAI/Llama-3.2-1B-Instruct-mobile", "params_m": 1000, "size_mb": 2000, "task": "chat"},
    {"id": "dispatchAI/TinyLlama-1.1B-Chat-Q5-mobile", "params_m": 1100, "size_mb": 450, "task": "chat"},
    {"id": "dispatchAI/Qwen2.5-1.5B-Instruct-Q5-mobile", "params_m": 1500, "size_mb": 900, "task": "chat"},
    {"id": "dispatchAI/Gemma-2-2B-IT-Q5-mobile", "params_m": 2000, "size_mb": 1300, "task": "chat"},
    {"id": "dispatchAI/Phi-3.5-mini-instruct-Q5-mobile", "params_m": 2000, "size_mb": 1300, "task": "chat"},
    {"id": "dispatchAI/Gemma-2B-Arabic-mobile", "params_m": 2000, "size_mb": 1300, "task": "arabic"},
]

def fetch_model_info(model_id):
    """Fetch config.json from HuggingFace."""
    try:
        config_path = hf_hub_download(model_id, "config.json", token=token)
        with open(config_path, "r") as f:
            config = json.load(f)
        
        # Try to get model size from safetensors
        api = HfApi(token=token)
        files = api.list_repo_files(model_id, token=token)
        
        size_mb = 0
        for f in files:
            if f.endswith(".safetensors") or f.endswith(".bin") or f.endswith(".gguf"):
                try:
                    info = api.get_paths_info(model_id, [f], repo_type="model", token=token)
                    if info and hasattr(info[0], 'size'):
                        size_mb += info[0].size / 1e6
                except:
                    pass
        
        return config, size_mb
    except Exception as e:
        return None, str(e)

def estimate_params(config):
    """Estimate parameter count from config."""
    try:
        hidden = config.get("hidden_size", 0)
        layers = config.get("num_hidden_layers", config.get("num_layers", 0))
        vocab = config.get("vocab_size", 0)
        intermediate = config.get("intermediate_size", hidden * 4)
        
        # Rough estimate: transformers params
        # Attention: 4 * hidden^2 per layer (Q, K, V, O)
        # MLP: 2 * hidden * intermediate per layer
        # Embeddings: vocab * hidden
        attention_params = 4 * hidden * hidden * layers
        mlp_params = 2 * hidden * intermediate * layers
        embed_params = vocab * hidden
        
        total = attention_params + mlp_params + embed_params
        return total / 1e6  # in millions
    except:
        return 0

def check_readiness(model_id, target_device):
    """Check if a model will run on the target device."""
    if not model_id.strip():
        return "Please enter a HuggingFace model ID."
    
    config_result = fetch_model_info(model_id.strip())
    
    if isinstance(config_result[1], str) and not config_result[0]:
        return f"❌ **Error fetching model info**: {config_result[1]}\n\nCheck the model ID and try again."
    
    config, size_mb = config_result
    if not config:
        return f"❌ Could not fetch config for `{model_id}`"
    
    specs = PHONE_SPECS.get(target_device, PHONE_SPECS["Samsung S20 FE (Snapdragon 865, 8GB)"])
    
    # Estimate parameters
    params_m = estimate_params(config)
    model_type = config.get("model_type", "unknown")
    hidden_size = config.get("hidden_size", 0)
    num_layers = config.get("num_hidden_layers", 0)
    
    # If we couldn't get size from API, estimate it
    if size_mb == 0:
        size_mb = params_m * 2  # fp16: 2 bytes per param
    
    # Estimates for different quantizations
    size_fp16_mb = params_m * 2
    size_q8_mb = params_m * 1
    size_q5_mb = params_m * 0.625
    size_q4_mb = params_m * 0.5
    
    # Phone farm performance estimate (based on real benchmarks)
    # S20 FE: ~18 t/s for 135M, ~10 t/s for 500M, ~6 t/s for 1B, ~3 t/s for 2B
    if params_m < 200:
        est_tps = "15-20 t/s"
        rating = "🟒 Excellent"
    elif params_m < 600:
        est_tps = "8-12 t/s"
        rating = "🟒 Good"
    elif params_m < 1200:
        est_tps = "5-7 t/s"
        rating = "🟑 Usable"
    elif params_m < 2500:
        est_tps = "2-4 t/s"
        rating = "🟠 Slow"
    else:
        est_tps = "< 2 t/s"
        rating = "πŸ”΄ Too large"
    
    # Memory check
    fits_fp16 = size_fp16_mb < specs["max_model_size_gb"] * 1024
    fits_q5 = size_q5_mb < specs["max_model_size_gb"] * 1024
    fits_q4 = size_q4_mb < specs["max_model_size_gb"] * 1024
    
    # Find recommended dispatchAI alternatives
    recommendations = []
    for m in DISPATCHAI_MODELS:
        if m["params_m"] <= params_m * 1.2 and m["params_m"] >= params_m * 0.5:
            recommendations.append(m)
    if not recommendations:
        # Find closest smaller model
        smaller = [m for m in DISPATCHAI_MODELS if m["params_m"] < params_m]
        if smaller:
            recommendations = sorted(smaller, key=lambda x: x["params_m"], reverse=True)[:3]
    
    rec_text = "\n".join([f"- [`{m['id']}`](https://huggingface.co/{m['id']}) β€” {m['params_m']}M params, {m['size_mb']}MB" 
                          for m in recommendations[:5]])
    
    report = f"""## πŸ“± On-Device Readiness Report

### Model: `{model_id}`

| Property | Value |
|----------|-------|
| Architecture | {model_type} |
| Hidden size | {hidden_size} |
| Layers | {num_layers} |
| Estimated params | ~{params_m:.0f}M |

### Size estimates by quantization

| Format | Size | Fits {target_device.split('(')[0].strip()}? |
|--------|------|------|
| FP16 | {size_fp16_mb:.0f}MB | {"βœ…" if fits_fp16 else "❌"} |
| Q8 | {size_q8_mb:.0f}MB | {"βœ…" if fits_q8 else "❌"} |
| Q5_K_M | {size_q5_mb:.0f}MB | {"βœ…" if fits_q5 else "❌"} |
| Q4_K_M | {size_q4_mb:.0f}MB | {"βœ…" if fits_q4 else "❌"} |

### Performance estimate (Snapdragon 865)

| Metric | Value |
|--------|-------|
| Estimated speed | {est_tps} |
| Readiness | {rating} |

### Target device: {target_device}

| Property | Value |
|----------|-------|
| Chipset | {specs['chipset']} |
| RAM | {specs['ram_gb']}GB |
| Max model size | {specs['max_model_size_gb']}GB |

### Recommended dispatchAI alternatives

{rec_text if rec_text else "No close matches found."}

### Recommendation

"""
    if "🟒" in rating:
        report += "βœ… **This model is ready for mobile deployment.** Use Q4_K_M or Q5_K_M GGUF for best size/quality balance."
    elif "🟑" in rating:
        report += "⚠️ **This model is usable but may be slow.** Consider Q4_K_M quantization and test on target hardware."
    elif "🟠" in rating:
        report += "⚠️ **This model will be slow on mobile.** Consider a smaller alternative from dispatchAI."
    else:
        report += "❌ **This model is too large for mobile deployment.** Use a dispatchAI alternative above."
    
    return report

# Custom CSS
custom_css = """
.gradio-container { background: #0A0F1A !important; color: #F5F7FA !important; }
h1, h2, h3 { color: #1FE0E6 !important; }
.gr-button { background: linear-gradient(135deg, #2E6BFF, #1FE0E6) !important; color: #0A0F1A !important; }
"""

with gr.Blocks(css=custom_css, title="On-Device Readiness Checker") as demo:
    gr.Markdown("""
    # πŸ“± On-Device Readiness Checker
    
    **Will your model run on a phone?** Paste a HuggingFace model ID and find out.
    
    Powered by [dispatchAI](https://huggingface.co/dispatchAI) β€” mobile AI that runs.
    """)
    
    with gr.Row():
        model_input = gr.Textbox(
            label="HuggingFace Model ID",
            placeholder="e.g., Qwen/Qwen2.5-0.5B-Instruct",
            scale=3
        )
        device_input = gr.Dropdown(
            choices=list(PHONE_SPECS.keys()),
            value="Samsung S20 FE (Snapdragon 865, 8GB)",
            label="Target Device",
            scale=2
        )
        check_btn = gr.Button("Check Readiness", variant="primary", scale=1)
    
    report_output = gr.Markdown(label="Readiness Report")
    
    check_btn.click(
        fn=check_readiness,
        inputs=[model_input, device_input],
        outputs=report_output
    )
    
    gr.Markdown("""
    ---
    ### How it works
    
    1. Fetches the model's `config.json` from HuggingFace
    2. Estimates parameter count and size for each quantization level
    3. Compares against the target device's memory budget
    4. Estimates inference speed based on real phone farm benchmarks
    5. Recommends dispatchAI mobile-optimized alternatives
    
    ### Try these models
    
    - `Qwen/Qwen2.5-0.5B-Instruct` β€” small model, should pass
    - `Qwen/Qwen2.5-7B-Instruct` β€” large model, should fail
    - `meta-llama/Llama-3.2-1B-Instruct` β€” borderline
    - `HuggingFaceTB/SmolLM2-135M-Instruct` β€” tiny, excellent
    
    ---
    *Dispatch AI (FZE), Sharjah SRTI Free Zone, License No. 10818.*
    """)

if __name__ == "__main__":
    demo.launch()