openapi: 3.0.0
info:
  title: dispatchAI Inference API
  description: |
    Mobile-optimized LLM inference on real Snapdragon hardware.
    10x cheaper than OpenAI. 46 tokens/sec on phone.
    UAE-built. Small. Mobile. Free.
  version: 1.0.0
  contact:
    name: dispatchAI
    url: https://huggingface.co/dispatchAI
servers:
  - url: https://api.dispatchai.ai/v1
    description: Production
paths:
  /chat/completions:
    post:
      summary: Chat Completion
      description: Create a chat completion (OpenAI-compatible)
      security:
        - apiKeyAuth: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                model:
                  type: string
                  default: dispatchAI/SmolLM2-135M-Instruct-mobile
                  enum:
                    - dispatchAI/SmolLM2-135M-Instruct-mobile
                    - dispatchAI/Qwen2.5-0.5B-Instruct-mobile-int4
                    - dispatchAI/Llama-3.2-1B-Instruct-Q4-mobile
                messages:
                  type: array
                  items:
                    type: object
                    properties:
                      role:
                        type: string
                        enum: [user, assistant, system]
                      content:
                        type: string
                max_tokens:
                  type: integer
                  default: 100
                temperature:
                  type: number
                  default: 0.7
      responses:
        '200':
          description: Successful response
          content:
            application/json:
              schema:
                type: object
                properties:
                  id:
                    type: string
                  choices:
                    type: array
                    items:
                      type: object
                      properties:
                        message:
                          type: object
                          properties:
                            role:
                              type: string
                            content:
                              type: string
                  usage:
                    type: object
                    properties:
                      prompt_tokens:
                        type: integer
                      completion_tokens:
                        type: integer
                      total_tokens:
                        type: integer
  /models:
    get:
      summary: List Models
      description: List all available models
      security:
        - apiKeyAuth: []
      responses:
        '200':
          description: List of models
components:
  securitySchemes:
    apiKeyAuth:
      type: apiKey
      in: header
      name: Authorization