File size: 2,892 Bytes
96129e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
openapi: 3.0.0
info:
  title: dispatchAI Inference API
  description: |
    Mobile-optimized LLM inference on real Snapdragon hardware.
    10x cheaper than OpenAI. 46 tokens/sec on phone.
    UAE-built. Small. Mobile. Free.
  version: 1.0.0
  contact:
    name: dispatchAI
    url: https://huggingface.co/dispatchAI
servers:
  - url: https://api.dispatchai.ai/v1
    description: Production
paths:
  /chat/completions:
    post:
      summary: Chat Completion
      description: Create a chat completion (OpenAI-compatible)
      security:
        - apiKeyAuth: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                model:
                  type: string
                  default: dispatchAI/SmolLM2-135M-Instruct-mobile
                  enum:
                    - dispatchAI/SmolLM2-135M-Instruct-mobile
                    - dispatchAI/Qwen2.5-0.5B-Instruct-mobile-int4
                    - dispatchAI/Llama-3.2-1B-Instruct-Q4-mobile
                messages:
                  type: array
                  items:
                    type: object
                    properties:
                      role:
                        type: string
                        enum: [user, assistant, system]
                      content:
                        type: string
                max_tokens:
                  type: integer
                  default: 100
                temperature:
                  type: number
                  default: 0.7
      responses:
        '200':
          description: Successful response
          content:
            application/json:
              schema:
                type: object
                properties:
                  id:
                    type: string
                  choices:
                    type: array
                    items:
                      type: object
                      properties:
                        message:
                          type: object
                          properties:
                            role:
                              type: string
                            content:
                              type: string
                  usage:
                    type: object
                    properties:
                      prompt_tokens:
                        type: integer
                      completion_tokens:
                        type: integer
                      total_tokens:
                        type: integer
  /models:
    get:
      summary: List Models
      description: List all available models
      security:
        - apiKeyAuth: []
      responses:
        '200':
          description: List of models
components:
  securitySchemes:
    apiKeyAuth:
      type: apiKey
      in: header
      name: Authorization