Spaces:

quickmt
/

quickmt-gui

Running

App Files Files Community

radinplaid commited on Jan 30

Commit

b6b0c93

1 Parent(s): e732115

Initial commit

Browse files

Files changed (32) hide show

Dockerfile +37 -0
README.md +65 -11
locustfile.py +131 -0
pyproject.toml +45 -0
pytest.ini +3 -0
quickmt/__init__.py +0 -0
quickmt/gui/static/app.js +408 -0
quickmt/gui/static/index.html +212 -0
quickmt/gui/static/style.css +668 -0
quickmt/langid.py +150 -0
quickmt/manager.py +367 -0
quickmt/rest_server.py +358 -0
quickmt/settings.py +69 -0
quickmt/translator.py +390 -0
requirements-dev.txt +5 -0
requirements.txt +14 -0
tests/__init__.py +0 -0
tests/conftest.py +16 -0
tests/test_api.py +109 -0
tests/test_auto_translate.py +108 -0
tests/test_cache.py +68 -0
tests/test_identify_language.py +40 -0
tests/test_langid.py +71 -0
tests/test_langid_batch.py +42 -0
tests/test_langid_path.py +47 -0
tests/test_lru.py +47 -0
tests/test_manager.py +118 -0
tests/test_mixed_src.py +69 -0
tests/test_robustness.py +137 -0
tests/test_threading_config.py +39 -0
tests/test_translation_quality.py +101 -0
tests/test_translator.py +161 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,37 @@

+FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04
+# Set working directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    python3-pip \
+    python3 \
+    && rm -rf /var/lib/apt/lists/*
+# Copy local code to the container
+COPY . /app
+# Create a non-root user for security
+RUN useradd -m -u 1001 user
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+# Install the package and dependencies
+# This also installs the quickmt cli scripts
+RUN pip install --break-system-packages --no-cache-dir /app/
+# Expose the default FastAPI port
+EXPOSE 7860
+USER user
+# Hf Spaces expect the app on port 7860 usually
+# We override the port via env var or CLI arg
+CMD ["uvicorn", "quickmt.rest_server:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,11 +1,65 @@
----
-title: Quickmt Gui
-emoji: 🐨
-colorFrom: green
-colorTo: red
-sdk: docker
-pinned: false
-short_description: 'QuickMT Web Application '
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# `quickmt` Neural Machine Translation Inference Library
+## REST Server Features
+- **Dynamic Batching**: Multiple concurrent HTTP requests are pooled together to maximize GPU utilization.
+- **Multi-Model Support**: Requests are routed to specific models based on `src_lang` and `tgt_lang`.
+- **LRU Cache**: Automatically loads and unloads models based on usage to manage memory.
+## Installation
+```bash
+pip install -r requirements.txt
+```
+## Running the Web Application
+```bash
+export MAX_LOADED_MODELS=3
+export MAX_BATCH_SIZE=32
+export DEVICE=cuda # or cpu
+export COMPUTE_TYPE=int8 # default, auto, int8, float16, etc.
+quickmt-gui
+```
+## Running the REST Server
+```bash
+export MAX_LOADED_MODELS=3
+export MAX_BATCH_SIZE=32
+export DEVICE=cuda # or cpu
+export COMPUTE_TYPE=int8 # default, auto, int8, float16, etc.
+quickmt-api
+```
+## API Usage
+### Translate
+```bash
+curl -X POST http://localhost:8000/translate \
+     -H "Content-Type: application/json" \
+     -d '{"src":"Hello world","src_lang":null,"tgt_lang":"fr","beam_size":2,"patience":1,"length_penalty":1,"coverage_penalty":0,"repetition_penalty":1}'
+```
+Returns:
+```json
+{
+  "translation":"Bonjour tout le monde !",
+  "src_lang":"en",
+  "src_lang_score":0.16532786190509796,
+  "tgt_lang":"fr",
+  "processing_time":2.2334513664245605,
+  "model_used":"quickmt/quickmt-en-fr"
+}
+```
+## Load Testing with Locust
+To simulate a multi-user load:
+```bash
+locust -f locustfile.py --host http://localhost:8000
+```
+Then open http://localhost:8089 in your browser.

locustfile.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import random
+from locust import FastHttpUser, task, between
+class TranslationUser(FastHttpUser):
+    wait_time = between(0, 0)
+    # Sample sentences for translation and identification
+    sample_texts = [
+        "The quick brown fox jumps over the lazy dog.",
+        "Can we translate this correctly and quickly?",
+        "هذا نص تجريبي باللغة العربية.",  # Arabic
+        "الذكاء الاصطناعي هو المستقبل.",  # Arabic (AI is the future)
+        "أحب تعلم لغات جديدة.",  # Arabic (I love learning new languages)
+        "这是一段中文测试文本。",  # Chinese
+        "人工智能正在改变世界。",  # Chinese (AI is changing the world)
+        "今天天气真好，去公园散步。",  # Chinese (Weather is nice, let's walk)
+        "Bonjour, comment allez-vous ?",  # French
+        "L'intelligence artificielle transforme notre vie quotidienne.",  # French (AI transforms daily life)
+        "Ceci est un exemple de phrase en français.",  # French
+    ]
+    def on_start(self):
+        """Discover available models on startup."""
+        try:
+            response = self.client.get("/models")
+            if response.status_code == 200:
+                self.available_models = response.json().get("models", [])
+                if not self.available_models:
+                    print("No models found. Load test might fail.")
+            else:
+                self.available_models = []
+        except Exception as e:
+            print(f"Error discovering models: {e}")
+            self.available_models = []
+    def get_random_model(self):
+        """
+        Return a model, favoring the first 3 (hot set) 99% of the time,
+        and others (cold set) 1% of the time to trigger LRU eviction.
+        """
+        if not self.available_models:
+            return None
+        # If we have 4 or more models, we can simulate eviction cycles
+        if len(self.available_models) >= 4:
+            # 99.99% chance to pick from the first 3
+            if random.random() < 0.9999:
+                return random.choice(self.available_models[:3])
+            else:
+                # 0.01% chance to pick from the rest
+                return random.choice(self.available_models[3:])
+        return random.choice(self.available_models)
+    @task(1)
+    def translate_single(self):
+        model = self.get_random_model()
+        if not model:
+            return
+        self.client.post(
+            "/translate",
+            json={
+                "src": random.choice(self.sample_texts) + str(random.random()),
+                "src_lang": model["src_lang"],
+                "tgt_lang": model["tgt_lang"],
+                "beam_size": 2,
+            },
+            name="/translate [single, manual]",
+        )
+    @task(1)
+    def translate_auto_detect(self):
+        """Translate without specifying src_lang to trigger LangID."""
+        ret = self.client.post(
+            "/translate",
+            json={
+                "src": random.choice(self.sample_texts) + str(random.random()),
+                "tgt_lang": "en",
+                "beam_size": 2,
+            },
+            name="/translate [single, auto-detect]",
+        )
+        ret_json = ret.json()
+        assert "src_lang" in ret_json
+        assert "tgt_lang" in ret_json
+        assert "translation" in ret_json
+        assert "src_lang_score" in ret_json
+        assert "model_used" in ret_json
+        assert ret_json["tgt_lang"] == "en"
+    @task(1)
+    def translate_list(self):
+        model = self.get_random_model()
+        if not model:
+            return
+        num_sentences = random.randint(2, 5)
+        texts = random.sample(self.sample_texts, num_sentences)
+        texts = [i + str(random.random()) for i in texts]
+        ret = self.client.post(
+            "/translate",
+            json={
+                "src": texts,
+                "src_lang": model["src_lang"],
+                "tgt_lang": model["tgt_lang"],
+                "beam_size": 2,
+            },
+            name="/translate [list, manual]",
+        )
+        ret_json = ret.json()
+        for i in ret_json["src_lang"]:
+            assert i == model["src_lang"]
+        assert ret_json["tgt_lang"] == model["tgt_lang"]
+        assert len(ret_json["translation"]) == num_sentences
+    @task(1)
+    def identify_language(self):
+        """Directly benchmark the identification endpoint."""
+        num_sentences = random.randint(1, 4)
+        texts = random.sample(self.sample_texts, num_sentences)
+        src = texts[0] if num_sentences == 1 else texts
+        self.client.post(
+            "/identify-language", json={"src": src}, name="/identify-language"
+        )
+    @task(1)
+    def health_check(self):
+        self.client.get("/health", name="/health")

pyproject.toml ADDED Viewed

	@@ -0,0 +1,45 @@

+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "quickmt"
+version = "0.1.0"
+description = "A fast, multi-model translation API based on CTranslate2 and FastAPI"
+readme = "README.md"
+requires-python = ">=3.9"
+license = {text = "MIT"}
+authors = [
+    {name = "QuickMT Team", email = "hello@quickmt.ai"},
+]
+dependencies = [
+    "blingfire",
+    "cachetools",
+    "fastapi",
+    "uvicorn[standard]",
+    "ctranslate2>=3.20.0",
+    "sentencepiece",
+    "huggingface-hub",
+    "fasttext-wheel",
+    "orjson",
+    "uvloop",
+    "httptools",
+    "pydantic",
+    "pydantic-settings"
+]
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+    "sacrebleu",
+    "locust"
+]
+[project.scripts]
+quickmt-serve = "quickmt.rest_server:start"
+quickmt-gui = "quickmt.rest_server:start_gui"
+[tool.hatch.build.targets.wheel]
+packages = ["quickmt"]

pytest.ini ADDED Viewed

	@@ -0,0 +1,3 @@

+[pytest]
+asyncio_mode = auto
+asyncio_default_fixture_loop_scope = function

quickmt/__init__.py ADDED Viewed

File without changes

quickmt/gui/static/app.js ADDED Viewed

	@@ -0,0 +1,408 @@

+document.addEventListener('DOMContentLoaded', () => {
+    // Elements
+    const srcText = document.getElementById('src-text');
+    const tgtText = document.getElementById('tgt-text');
+    const srcLangSelect = document.getElementById('src-lang-select');
+    const tgtLangSelect = document.getElementById('tgt-lang-select');
+    const charCount = document.getElementById('char-count');
+    const timingInfo = document.getElementById('timing-info');
+    const loader = document.getElementById('translation-loader');
+    const detectedBadge = document.getElementById('detected-badge');
+    const navLinks = document.querySelectorAll('.nav-links a');
+    const views = document.querySelectorAll('.view');
+    const healthIndicator = document.getElementById('health-indicator');
+    const modelsList = document.getElementById('models-list');
+    const copyBtn = document.getElementById('copy-btn');
+    const themeToggle = document.getElementById('theme-toggle');
+    const sidebarToggle = document.getElementById('sidebar-toggle');
+    const sidebar = document.querySelector('.sidebar');
+    let debounceTimer;
+    let languages = {};
+    let activeController = null;
+    let settings = {
+        beam_size: 2,
+        patience: 1,
+        length_penalty: 1.0,
+        coverage_penalty: 0.0,
+        repetition_penalty: 1.0
+    };
+    // 0. Theme Logic
+    function initTheme() {
+        const savedTheme = localStorage.getItem('theme') || 'dark';
+        if (savedTheme === 'light') {
+            document.body.classList.add('light-mode');
+            updateThemeUI(true);
+        }
+    }
+    function updateThemeUI(isLight) {
+        const text = themeToggle.querySelector('.mode-text');
+        text.textContent = isLight ? 'Light Mode' : 'Dark Mode';
+    }
+    themeToggle.addEventListener('click', () => {
+        const isLight = document.body.classList.toggle('light-mode');
+        localStorage.setItem('theme', isLight ? 'light' : 'dark');
+        updateThemeUI(isLight);
+    });
+    // 0.1 Sidebar Logic
+    function initSidebar() {
+        const isCollapsed = localStorage.getItem('sidebar-collapsed') === 'true';
+        if (isCollapsed) sidebar.classList.add('collapsed');
+    }
+    sidebarToggle.addEventListener('click', () => {
+        const isCollapsed = sidebar.classList.toggle('collapsed');
+        localStorage.setItem('sidebar-collapsed', isCollapsed);
+    });
+    // 0.2 Inference Settings Logic
+    function initSettings() {
+        const saved = localStorage.getItem('inference-settings');
+        if (saved) {
+            try {
+                const parsed = JSON.parse(saved);
+                settings = { ...settings, ...parsed };
+            } catch (e) { console.error("Failed to parse settings", e); }
+        }
+        updateSettingsUI();
+    }
+    function updateSettingsUI() {
+        // Sync values to inputs
+        Object.keys(settings).forEach(key => {
+            const input = document.getElementById(`setting-${key.replace('_', '-')}`);
+            if (input) {
+                input.value = settings[key];
+                const valDisplay = input.nextElementSibling;
+                if (valDisplay && valDisplay.classList.contains('setting-val')) {
+                    valDisplay.textContent = settings[key];
+                }
+            }
+        });
+    }
+    function saveSettings() {
+        localStorage.setItem('inference-settings', JSON.stringify(settings));
+    }
+    // Add listeners to all settings inputs
+    const settingsInputs = [
+        'setting-beam-size', 'setting-patience', 'setting-length-penalty',
+        'setting-coverage-penalty', 'setting-repetition-penalty'
+    ];
+    settingsInputs.forEach(id => {
+        const input = document.getElementById(id);
+        const key = id.replace('setting-', '').replace(/-/g, '_');
+        input.addEventListener('input', () => {
+            let val = parseFloat(input.value);
+            if (id === 'setting-beam-size' || id === 'setting-patience') val = parseInt(input.value);
+            settings[key] = val;
+            // Enforcement: patience <= beam_size
+            if (id === 'setting-beam-size') {
+                if (settings.patience > settings.beam_size) {
+                    settings.patience = settings.beam_size;
+                    const patienceInput = document.getElementById('setting-patience');
+                    patienceInput.value = settings.patience;
+                    patienceInput.nextElementSibling.textContent = settings.patience;
+                }
+                // Update patience max slider to match beam_size for better UX?
+                // User said "maximum 10", so let's stick to that but cap the value.
+            } else if (id === 'setting-patience') {
+                if (val > settings.beam_size) {
+                    val = settings.beam_size;
+                    input.value = val;
+                    settings.patience = val;
+                }
+            }
+            const valDisplay = input.nextElementSibling;
+            if (valDisplay && valDisplay.classList.contains('setting-val')) {
+                valDisplay.textContent = val;
+            }
+            saveSettings();
+        });
+    });
+    document.getElementById('reset-settings').addEventListener('click', () => {
+        settings = {
+            beam_size: 2,
+            patience: 1,
+            length_penalty: 1.0,
+            coverage_penalty: 0.0,
+            repetition_penalty: 1.0
+        };
+        updateSettingsUI();
+        saveSettings();
+    });
+    // 1. Fetch available languages and populate selects
+    async function initLanguages() {
+        try {
+            const res = await fetch('/api/languages');
+            if (res.ok) {
+                languages = await res.json();
+                populateSelects();
+                updateHealth(true);
+            }
+        } catch (e) {
+            console.error("Failed to load languages", e);
+            updateHealth(false);
+        }
+    }
+    function populateSelects() {
+        const currentSrc = srcLangSelect.value;
+        // Keep only the first "Auto-detect" option
+        srcLangSelect.innerHTML = '<option value="">Auto-detect</option>';
+        const sources = Object.keys(languages);
+        // Populate Source Languages
+        sources.forEach(lang => {
+            const opt = document.createElement('option');
+            opt.value = lang;
+            opt.textContent = lang.toUpperCase();
+            srcLangSelect.appendChild(opt);
+        });
+        // Restore selection if it still exists
+        if (currentSrc && languages[currentSrc]) {
+            srcLangSelect.value = currentSrc;
+        }
+        // Trigger target population for default selection
+        updateTargetOptions();
+    }
+    function updateTargetOptions() {
+        const src = srcLangSelect.value;
+        const currentTgt = tgtLangSelect.value;
+        // Clear targets
+        tgtLangSelect.innerHTML = '';
+        let availableTgts = [];
+        if (src) {
+            availableTgts = languages[src] || [];
+        } else {
+            // If auto-detect, union of all targets
+            const allTgts = new Set();
+            Object.values(languages).forEach(list => list.forEach(l => allTgts.add(l)));
+            availableTgts = Array.from(allTgts).sort();
+        }
+        availableTgts.forEach(lang => {
+            const opt = document.createElement('option');
+            opt.value = lang;
+            opt.textContent = lang.toUpperCase();
+            if (lang === currentTgt || (availableTgts.length === 1)) opt.selected = true;
+            tgtLangSelect.appendChild(opt);
+        });
+    }
+    // 2. Translation Logic
+    async function performTranslation() {
+        const fullText = srcText.value;
+        if (!fullText.trim()) {
+            tgtText.value = '';
+            timingInfo.textContent = 'Ready';
+            detectedBadge.classList.remove('visible');
+            return;
+        }
+        // Abort previous requests
+        if (activeController) activeController.abort();
+        activeController = new AbortController();
+        const { signal } = activeController;
+        const lines = fullText.split('\n');
+        const translatedLines = new Array(lines.length).fill('');
+        let srcLang = srcLangSelect.value || null;
+        const tgtLang = tgtLangSelect.value;
+        loader.classList.remove('hidden');
+        let completedLines = 0;
+        let totalToTranslate = lines.filter(l => l.trim()).length;
+        try {
+            // Step 1: If auto-detect mode, detect language for entire input first
+            if (!srcLang && fullText.trim()) {
+                const detectResponse = await fetch('/api/identify-language', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({
+                        src: fullText,
+                        k: 1,
+                        threshold: 0.0
+                    }),
+                    signal
+                });
+                if (detectResponse.ok) {
+                    const detectData = await detectResponse.json();
+                    // Get the detected language from the response
+                    if (detectData.results && detectData.results.length > 0) {
+                        srcLang = detectData.results[0].lang;
+                        detectedBadge.textContent = `Detected: ${srcLang.toUpperCase()}`;
+                        detectedBadge.classList.add('visible');
+                    }
+                }
+            }
+            // Step 2: Translate all lines with known source language
+            const updateTgtUI = () => {
+                tgtText.value = translatedLines.join('\n');
+            };
+            const translateParagraph = async (line, index) => {
+                if (!line.trim()) {
+                    translatedLines[index] = line;
+                    updateTgtUI();
+                    return;
+                }
+                try {
+                    const response = await fetch('/api/translate', {
+                        method: 'POST',
+                        headers: { 'Content-Type': 'application/json' },
+                        body: JSON.stringify({
+                            src: line,
+                            src_lang: srcLang,  // Now we always have a source language
+                            tgt_lang: tgtLang,
+                            ...settings
+                        }),
+                        signal
+                    });
+                    if (response.ok) {
+                        const data = await response.json();
+                        translatedLines[index] = data.translation;
+                        completedLines++;
+                        updateTgtUI();
+                        timingInfo.textContent = `Translating: ${Math.round((completedLines / totalToTranslate) * 100)}%`;
+                    }
+                } catch (e) {
+                    if (e.name !== 'AbortError') {
+                        console.error("Line translation error", e);
+                        translatedLines[index] = `[[Error: ${line}]]`;
+                    }
+                } finally {
+                    if (completedLines === totalToTranslate) {
+                        loader.classList.add('hidden');
+                        timingInfo.textContent = 'Done';
+                    }
+                }
+            };
+            // Fire all translation requests in parallel
+            lines.forEach((line, i) => translateParagraph(line, i));
+        } catch (e) {
+            if (e.name !== 'AbortError') {
+                console.error("Translation error", e);
+                loader.classList.add('hidden');
+                timingInfo.textContent = 'Error';
+            }
+        }
+    }
+    // 3. Models View
+    async function fetchModels() {
+        try {
+            const res = await fetch('/api/models');
+            const data = await res.json();
+            modelsList.innerHTML = '';
+            // Use DocumentFragment for better performance
+            const fragment = document.createDocumentFragment();
+            data.models.forEach(m => {
+                const card = document.createElement('div');
+                card.className = 'model-card';
+                card.innerHTML = `
+                    <div class="model-lang-pair">
+                        <span>${m.src_lang.toUpperCase()}</span>
+                        <span>→</span>
+                        <span>${m.tgt_lang.toUpperCase()}</span>
+                    </div>
+                    <div class="model-id">${m.model_id}</div>
+                    ${m.loaded ? '<span class="loaded-badge">Currently Loaded</span>' : ''}
+                `;
+                fragment.appendChild(card);
+            });
+            // Single DOM update instead of multiple
+            modelsList.appendChild(fragment);
+        } catch (e) {
+            modelsList.innerHTML = '<p>Error loading models</p>';
+        }
+    }
+    // 4. UI Helpers
+    function updateHealth(isOnline) {
+        if (isOnline) {
+            healthIndicator.className = 'status-pill status-online';
+            healthIndicator.querySelector('.status-text').textContent = 'Online';
+        } else {
+            healthIndicator.className = 'status-pill status-loading';
+            healthIndicator.querySelector('.status-text').textContent = 'Offline';
+        }
+    }
+    // Event Listeners
+    srcText.addEventListener('input', () => {
+        charCount.textContent = `${srcText.value.length} characters`;
+        clearTimeout(debounceTimer);
+        debounceTimer = setTimeout(performTranslation, 250);
+    });
+    srcLangSelect.addEventListener('change', () => {
+        updateTargetOptions();
+        performTranslation();
+    });
+    tgtLangSelect.addEventListener('change', performTranslation);
+    copyBtn.addEventListener('click', () => {
+        navigator.clipboard.writeText(tgtText.value);
+        const originalText = copyBtn.textContent;
+        copyBtn.textContent = 'Copied!';
+        setTimeout(() => copyBtn.textContent = originalText, 2000);
+    });
+    // Navigation
+    navLinks.forEach(link => {
+        link.addEventListener('click', (e) => {
+            e.preventDefault();
+            const targetId = link.getAttribute('href').substring(1);
+            navLinks.forEach(l => l.parentElement.classList.remove('active'));
+            link.parentElement.classList.add('active');
+            views.forEach(v => {
+                v.classList.remove('active');
+                if (v.id === `${targetId}-view`) v.classList.add('active');
+            });
+            if (targetId === 'models') fetchModels();
+        });
+    });
+    // Start
+    initTheme();
+    initSidebar();
+    initSettings();
+    initLanguages();
+    setInterval(initLanguages, 10000); // Pulse health check
+});

quickmt/gui/static/index.html ADDED Viewed

	@@ -0,0 +1,212 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>QuickMT Machine Translation</title>
+    <link rel="preconnect" href="https://fonts.googleapis.com">
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+    <link
+        href="https://fonts.googleapis.com/css2?family=Outfit:wght@300;400;500;600;700&family=Inter:wght@300;400;500;600&display=swap"
+        rel="stylesheet">
+    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.1/css/all.min.css">
+    <link rel="stylesheet" href="style.css">
+</head>
+<body>
+    <div class="bg-blur"></div>
+    <div class="top-nav-links">
+        <a href="https://huggingface.co/quickmt" target="_blank" title="Hugging Face Models" class="glass-btn">
+            <span class="btn-icon">🤗</span>
+        </a>
+        <a href="https://github.com/quickmt/quickmt" target="_blank" title="GitHub Repository"
+            class="glass-btn icon-only">
+            <span class="btn-icon"><i class="fa-brands fa-github"></i></span>
+        </a>
+    </div>
+    <main class="app-container">
+        <!-- Sidebar Navigation -->
+        <nav class="sidebar glass">
+            <div class="logo">
+                <div class="logo-icon">Q</div>
+                <span>QuickMT</span>
+            </div>
+            <ul class="nav-links">
+                <li class="active">
+                    <a href="#translate">
+                        <span class="nav-icon">🔁</span>
+                        <span class="nav-text">Translate</span>
+                    </a>
+                </li>
+                <li>
+                    <a href="#models">
+                        <span class="nav-icon">🧩</span>
+                        <span class="nav-text">Models</span>
+                    </a>
+                </li>
+                <li>
+                    <a href="#settings">
+                        <span class="nav-icon">⚙️</span>
+                        <span class="nav-text">Settings</span>
+                    </a>
+                </li>
+            </ul>
+            <div class="sidebar-footer">
+                <div id="health-indicator" class="status-pill status-loading">
+                    <span class="dot"></span>
+                    <span class="status-text">Connecting...</span>
+                </div>
+                <div class="theme-toggle-container">
+                    <button id="theme-toggle" title="Toggle Light/Dark Mode">
+                        <span class="mode-icon">◑</span>
+                        <span class="mode-text">Dark Mode</span>
+                    </button>
+                </div>
+                <button id="sidebar-toggle" class="icon-btn collapse-btn" title="Toggle Sidebar">«</button>
+            </div>
+        </nav>
+        <!-- Main Content -->
+        <section class="content">
+            <!-- Translate View -->
+            <div id="translate-view" class="view active">
+                <header class="view-header">
+                    <h1>QuickMT Neural MachineTranslation</h1>
+                </header>
+                <div class="translation-grid">
+                    <!-- Source Column -->
+                    <div class="card glass translation-card">
+                        <div class="card-header">
+                            <div class="lang-group">
+                                <span class="lang-label">From</span>
+                                <select id="src-lang-select" class="lang-select">
+                                    <option value="">Auto-detect</option>
+                                </select>
+                            </div>
+                            <div id="detected-badge" class="detected-badge"></div>
+                        </div>
+                        <div class="card-body">
+                            <textarea id="src-text" placeholder="Enter text to translate..." autofocus></textarea>
+                        </div>
+                        <div class="card-footer">
+                            <span id="char-count">0 characters</span>
+                        </div>
+                    </div>
+                    <!-- Target Column -->
+                    <div class="card glass translation-card target-card">
+                        <div class="card-header">
+                            <div class="lang-group">
+                                <span class="lang-label">To</span>
+                                <select id="tgt-lang-select" class="lang-select">
+                                    <option value="en">English</option>
+                                </select>
+                            </div>
+                        </div>
+                        <div class="card-body">
+                            <textarea id="tgt-text" readonly placeholder="Translation will appear here..."></textarea>
+                            <div id="translation-loader" class="loader-overlay hidden">
+                                <div class="spinner"></div>
+                            </div>
+                        </div>
+                        <div class="card-footer">
+                            <span id="timing-info">Ready</span>
+                            <button id="copy-btn" class="action-btn">Copy</button>
+                        </div>
+                    </div>
+                </div>
+            </div>
+            <!-- Models View -->
+            <div id="models-view" class="view">
+                <header class="view-header">
+                    <h1>Available Models</h1>
+                    <p>Browse models from the quickmt Hugging Face collection</p>
+                </header>
+                <div id="models-list" class="models-grid">
+                    <!-- Model cards will be injected here -->
+                </div>
+            </div>
+            <!-- Settings View -->
+            <div id="settings-view" class="view">
+                <header class="view-header">
+                    <h1>Inference Settings</h1>
+                    <p>Fine-tune the translation engine for your needs</p>
+                </header>
+                <div class="settings-container glass">
+                    <div class="settings-grid">
+                        <!-- Beam Size -->
+                        <div class="setting-item">
+                            <div class="setting-info">
+                                <label>Beam Size</label>
+                                <span class="setting-desc">Number of hypotheses to explore (1-10)</span>
+                            </div>
+                            <div class="setting-control">
+                                <input type="range" id="setting-beam-size" min="1" max="10" step="1" value="2">
+                                <span class="setting-val">2</span>
+                            </div>
+                        </div>
+                        <!-- Patience -->
+                        <div class="setting-item">
+                            <div class="setting-info">
+                                <label>Patience</label>
+                                <span class="setting-desc">Stopping criterion factor (1-10)</span>
+                            </div>
+                            <div class="setting-control">
+                                <input type="range" id="setting-patience" min="1" max="10" step="1" value="1">
+                                <span class="setting-val">1</span>
+                            </div>
+                        </div>
+                        <!-- Length Penalty -->
+                        <div class="setting-item">
+                            <div class="setting-info">
+                                <label>Length Penalty</label>
+                                <span class="setting-desc">Favour longer or shorter sentences (default 1.0)</span>
+                            </div>
+                            <div class="setting-control">
+                                <input type="number" id="setting-length-penalty" step="0.1" value="1.0">
+                            </div>
+                        </div>
+                        <!-- Coverage Penalty -->
+                        <div class="setting-item">
+                            <div class="setting-info">
+                                <label>Coverage Penalty</label>
+                                <span class="setting-desc">Ensure all source words are translated (default 0.0)</span>
+                            </div>
+                            <div class="setting-control">
+                                <input type="number" id="setting-coverage-penalty" step="0.1" value="0.0">
+                            </div>
+                        </div>
+                        <!-- Repetition Penalty -->
+                        <div class="setting-item">
+                            <div class="setting-info">
+                                <label>Repetition Penalty</label>
+                                <span class="setting-desc">Prevent repeating words (default 1.0)</span>
+                            </div>
+                            <div class="setting-control">
+                                <input type="number" id="setting-repetition-penalty" step="0.1" value="1.0">
+                            </div>
+                        </div>
+                    </div>
+                    <div class="settings-actions">
+                        <button id="reset-settings" class="action-btn secondary">Reset to Defaults</button>
+                    </div>
+                </div>
+            </div>
+        </section>
+    </main>
+    <script src="app.js"></script>
+</body>
+</html>

quickmt/gui/static/style.css ADDED Viewed

	@@ -0,0 +1,668 @@

+:root {
+    --primary: #6366f1;
+    --primary-glow: rgba(99, 102, 241, 0.5);
+    --bg-gradient: linear-gradient(135deg, #0f172a 0%, #1e1b4b 100%);
+    --glass-bg: rgba(255, 255, 255, 0.03);
+    --glass-border: rgba(255, 255, 255, 0.1);
+    --text-main: #f8fafc;
+    --text-muted: #94a3b8;
+    --card-shadow: 0 8px 32px 0 rgba(0, 0, 0, 0.37);
+    --transition: none;
+    --sidebar-active-bg: rgba(255, 255, 255, 0.05);
+    --input-bg: rgba(255, 255, 255, 0.05);
+    --btn-hover-bg: rgba(255, 255, 255, 0.1);
+}
+body.light-mode {
+    --bg-gradient: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%);
+    --glass-bg: rgba(255, 255, 255, 0.7);
+    --glass-border: rgba(99, 102, 241, 0.1);
+    --text-main: #1e293b;
+    --text-muted: #64748b;
+    --card-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.07);
+    --sidebar-active-bg: rgba(99, 102, 241, 0.05);
+    --input-bg: rgba(0, 0, 0, 0.02);
+    --btn-hover-bg: rgba(99, 102, 241, 0.1);
+}
+* {
+    margin: 0;
+    padding: 0;
+    box-sizing: border-box;
+    font-family: 'Inter', sans-serif;
+}
+h1,
+h2,
+h3,
+.logo {
+    font-family: 'Outfit', sans-serif;
+}
+body {
+    background: var(--bg-gradient);
+    color: var(--text-main);
+    min-height: 100vh;
+    overflow: hidden;
+}
+.bg-blur {
+    position: fixed;
+    top: 0;
+    left: 0;
+    width: 100%;
+    height: 100%;
+    z-index: -1;
+    background: radial-gradient(circle at 20% 30%, rgba(99, 102, 241, 0.15) 0%, transparent 40%),
+        radial-gradient(circle at 80% 70%, rgba(168, 85, 247, 0.15) 0%, transparent 40%);
+}
+.top-nav-links {
+    position: fixed;
+    top: 1.5rem;
+    right: 2.5rem;
+    display: flex;
+    gap: 0.75rem;
+    z-index: 100;
+}
+.glass-btn {
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    padding: 0.6rem 1rem;
+    min-width: 44px;
+    height: 44px;
+    background: var(--glass-bg);
+    backdrop-filter: blur(12px);
+    -webkit-backdrop-filter: blur(12px);
+    border: 1px solid var(--glass-border);
+    border-radius: 0.75rem;
+    color: var(--text-main);
+    text-decoration: none;
+    font-size: 0.85rem;
+    font-weight: 600;
+    transition: none;
+}
+.glass-btn.icon-only {
+    padding: 0;
+    width: 44px;
+}
+.glass-btn .btn-icon i {
+    font-size: 1.25rem;
+}
+.glass-btn:hover {
+    background: var(--btn-hover-bg);
+}
+.glass-btn .btn-icon {
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+}
+.app-container {
+    display: flex;
+    height: 100vh;
+    padding: 1.5rem;
+    gap: 1.5rem;
+}
+/* Glass Effect */
+.glass {
+    background: var(--glass-bg);
+    backdrop-filter: blur(12px);
+    -webkit-backdrop-filter: blur(12px);
+    border: 1px solid var(--glass-border);
+    border-radius: 1.25rem;
+}
+/* Sidebar */
+.sidebar {
+    width: 280px;
+    display: flex;
+    flex-direction: column;
+    padding: 2rem;
+    transition: none;
+    overflow: hidden;
+}
+.sidebar.collapsed {
+    width: 90px;
+    padding: 2rem 1.25rem;
+}
+.logo {
+    display: flex;
+    align-items: center;
+    gap: 1rem;
+    font-size: 1.5rem;
+    font-weight: 700;
+    margin-bottom: 3rem;
+    position: relative;
+}
+.logo span {
+    transition: none;
+    white-space: nowrap;
+}
+.sidebar.collapsed .logo span {
+    opacity: 0;
+    pointer-events: none;
+}
+.logo-icon {
+    width: 40px;
+    height: 40px;
+    min-width: 40px;
+    background: var(--primary);
+    border-radius: 10px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    color: white;
+    box-shadow: 0 0 20px var(--primary-glow);
+}
+.nav-links {
+    list-style: none;
+    flex: 1;
+}
+.nav-links li {
+    margin-bottom: 0.5rem;
+}
+.nav-links a {
+    display: flex;
+    align-items: center;
+    padding: 0.75rem 1rem;
+    color: var(--text-muted);
+    text-decoration: none;
+    border-radius: 0.75rem;
+    transition: none;
+}
+.nav-links .nav-text {
+    transition: none;
+}
+.sidebar.collapsed .nav-links .nav-text {
+    opacity: 0;
+    pointer-events: none;
+    width: 0;
+}
+.nav-icon {
+    font-size: 1.25rem;
+    min-width: 24px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    margin-right: 0.75rem;
+    transition: none;
+}
+.sidebar.collapsed .nav-icon {
+    margin-right: 0;
+    width: 100%;
+}
+.sidebar.collapsed .nav-links a {
+    justify-content: center;
+    padding: 0.75rem 0;
+}
+.nav-links li.active a,
+.nav-links a:hover {
+    background: var(--sidebar-active-bg);
+    color: var(--text-main);
+}
+.nav-links li.active a {
+    border-left: 3px solid var(--primary);
+}
+.sidebar.collapsed .nav-links li.active a {
+    border-left: none;
+    background: var(--sidebar-active-bg);
+    box-shadow: inset 0 0 10px rgba(99, 102, 241, 0.2);
+}
+/* Content Area */
+.content {
+    flex: 1;
+    overflow-y: auto;
+    padding-right: 0.5rem;
+}
+.view {
+    display: none;
+}
+.view.active {
+    display: flex;
+    flex-direction: column;
+    height: 100%;
+}
+.view-header {
+    margin-bottom: 2rem;
+}
+.view-header h1 {
+    font-size: 2.25rem;
+    margin-bottom: 0.5rem;
+}
+.view-header p {
+    color: var(--text-muted);
+}
+/* Prediction / Translation Grid */
+.translation-grid {
+    display: grid;
+    grid-template-columns: 1fr 1fr;
+    gap: 1.5rem;
+    flex: 1;
+    min-height: 0;
+}
+.card {
+    display: flex;
+    flex-direction: column;
+    box-shadow: var(--card-shadow);
+}
+.card-header {
+    padding: 1rem 1.5rem;
+    border-bottom: 1px solid var(--glass-border);
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+}
+.card-body {
+    flex: 1;
+    position: relative;
+}
+.card-footer {
+    padding: 1rem 1.5rem;
+    border-top: 1px solid var(--glass-border);
+    font-size: 0.85rem;
+    color: var(--text-muted);
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+}
+textarea {
+    width: 100%;
+    height: 100%;
+    background: transparent;
+    border: none;
+    resize: none;
+    padding: 1.5rem;
+    color: var(--text-main);
+    font-size: 1.1rem;
+    line-height: 1.6;
+    outline: none;
+}
+.lang-group {
+    display: flex;
+    align-items: center;
+    gap: 0.75rem;
+}
+.lang-label {
+    font-size: 0.7rem;
+    font-weight: 700;
+    text-transform: uppercase;
+    letter-spacing: 0.05em;
+    color: var(--text-muted);
+}
+.lang-select {
+    background: var(--input-bg);
+    border: 1px solid var(--glass-border);
+    color: var(--text-main);
+    padding: 0.5rem 1rem;
+    border-radius: 0.5rem;
+    outline: none;
+    cursor: pointer;
+}
+.detected-badge {
+    font-size: 0.75rem;
+    background: var(--primary);
+    padding: 0.2rem 0.6rem;
+    border-radius: 1rem;
+    color: white;
+    opacity: 0;
+    transition: none;
+}
+.detected-badge.visible {
+    opacity: 1;
+    transition: none;
+}
+/* Loader */
+.loader-overlay {
+    position: absolute;
+    top: 0;
+    left: 0;
+    width: 100%;
+    height: 100%;
+    background: rgba(15, 23, 42, 0.5);
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    border-radius: 0 0 1.25rem 1.25rem;
+}
+.hidden {
+    display: none !important;
+}
+.spinner {
+    width: 30px;
+    height: 30px;
+    border: 3px solid rgba(255, 255, 255, 0.1);
+    border-top-color: var(--primary);
+    border-radius: 50%;
+    animation: none;
+}
+@keyframes spin {
+    to {
+        transform: rotate(360deg);
+    }
+}
+/* Success Pills */
+.status-pill {
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+    padding: 0.5rem 1rem;
+    border-radius: 2rem;
+    background: rgba(0, 0, 0, 0.2);
+    font-size: 0.85rem;
+}
+.dot {
+    width: 8px;
+    height: 8px;
+    border-radius: 50%;
+}
+.status-online .dot {
+    background: #10b981;
+    box-shadow: 0 0 10px #10b981;
+}
+.status-loading .dot {
+    background: #f59e0b;
+    animation: none;
+}
+@keyframes pulse {
+    0% {
+        transform: scale(1);
+        opacity: 1;
+    }
+    50% {
+        transform: scale(1.2);
+        opacity: 0.5;
+    }
+    100% {
+        transform: scale(1);
+        opacity: 1;
+    }
+}
+/* Models Grid */
+.models-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
+    gap: 1.5rem;
+}
+.model-card {
+    padding: 1.5rem;
+    transition: none;
+    /* Performance optimizations */
+    contain: layout style paint;
+    will-change: background;
+    /* Remove expensive backdrop-filter for better scrolling */
+    background: var(--glass-bg);
+    border: 1px solid var(--glass-border);
+    border-radius: 1.25rem;
+    box-shadow: var(--card-shadow);
+}
+.model-card:hover {
+    background: var(--sidebar-active-bg);
+}
+.model-lang-pair {
+    display: flex;
+    align-items: center;
+    gap: 0.5rem;
+    font-weight: 600;
+    margin-bottom: 0.5rem;
+}
+.model-id {
+    font-size: 0.8rem;
+    color: var(--text-muted);
+    word-break: break-all;
+}
+.loaded-badge {
+    display: inline-block;
+    padding: 0.2rem 0.5rem;
+    background: rgba(16, 185, 129, 0.1);
+    color: #10b981;
+    border: 1px solid rgba(16, 185, 129, 0.2);
+    border-radius: 0.4rem;
+    font-size: 0.7rem;
+    margin-top: 1rem;
+}
+/* Buttons */
+.action-btn {
+    background: var(--primary);
+    color: white;
+    border: none;
+    padding: 0.5rem 1rem;
+    border-radius: 0.5rem;
+    cursor: pointer;
+    font-weight: 500;
+    transition: none;
+}
+.action-btn:hover {
+    background: #4f46e5;
+}
+.icon-btn {
+    background: transparent;
+    border: none;
+    color: var(--text-muted);
+    font-size: 1.25rem;
+    cursor: pointer;
+    transition: none;
+}
+.icon-btn:hover {
+    color: var(--text-main);
+}
+/* Settings View */
+.settings-container {
+    max-width: 800px;
+    padding: 2rem;
+    margin-top: 1rem;
+}
+.settings-grid {
+    display: flex;
+    flex-direction: column;
+    gap: 2rem;
+}
+.setting-item {
+    display: grid;
+    grid-template-columns: 1fr 200px;
+    align-items: center;
+    gap: 2rem;
+    padding-bottom: 2rem;
+    border-bottom: 1px solid var(--glass-border);
+}
+.setting-item:last-child {
+    border-bottom: none;
+}
+.setting-info {
+    display: flex;
+    flex-direction: column;
+    gap: 0.25rem;
+}
+.setting-info label {
+    font-weight: 600;
+    font-size: 1.1rem;
+}
+.setting-desc {
+    color: var(--text-muted);
+    font-size: 0.85rem;
+}
+.setting-control {
+    display: flex;
+    align-items: center;
+    gap: 1rem;
+}
+.setting-val {
+    min-width: 30px;
+    font-family: monospace;
+    font-weight: 600;
+    color: var(--primary);
+}
+input[type="range"] {
+    flex: 1;
+    cursor: pointer;
+    accent-color: var(--primary);
+}
+input[type="number"] {
+    width: 100%;
+    background: var(--input-bg);
+    border: 1px solid var(--glass-border);
+    color: var(--text-main);
+    padding: 0.5rem;
+    border-radius: 0.5rem;
+    outline: none;
+    font-family: inherit;
+}
+.settings-actions {
+    margin-top: 3rem;
+    display: flex;
+    justify-content: flex-end;
+}
+.action-btn.secondary {
+    background: rgba(255, 255, 255, 0.05);
+    border: 1px solid var(--glass-border);
+}
+.action-btn.secondary:hover {
+    background: rgba(255, 255, 255, 0.1);
+}
+/* Theme Toggle Button */
+.theme-toggle-container {
+    margin-top: 1rem;
+}
+#theme-toggle {
+    width: 100%;
+    display: flex;
+    align-items: center;
+    gap: 0.75rem;
+    background: var(--input-bg);
+    border: 1px solid var(--glass-border);
+    color: var(--text-main);
+    padding: 0.75rem 1rem;
+    border-radius: 0.75rem;
+    cursor: pointer;
+    font-size: 0.9rem;
+    transition: none;
+}
+#theme-toggle:hover {
+    background: var(--sidebar-active-bg);
+}
+.mode-icon {
+    font-size: 1.1rem;
+}
+.sidebar.collapsed .mode-text {
+    display: none;
+}
+.sidebar.collapsed .theme-toggle-container {
+    display: flex;
+    justify-content: center;
+}
+#sidebar-toggle {
+    margin-top: 1rem;
+    width: 40px;
+    height: 40px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    background: var(--input-bg);
+    border-radius: 50%;
+    margin-left: auto;
+    font-size: 1.25rem;
+    transition: none;
+    z-index: 10;
+}
+#sidebar-toggle:hover {
+    background: var(--sidebar-active-bg);
+}
+.sidebar.collapsed #sidebar-toggle {
+    transform: rotate(180deg);
+    margin-left: 0.5rem;
+}
+.sidebar.collapsed .sidebar-footer .status-text {
+    display: none;
+}
+.sidebar.collapsed .status-pill {
+    padding: 0.5rem;
+    justify-content: center;
+}

quickmt/langid.py ADDED Viewed

	@@ -0,0 +1,150 @@

+from typing import List, Tuple, Union, Optional
+from pathlib import Path
+import os
+import urllib.request
+import fasttext
+# Suppress fasttext's warning about being loaded in a way that doesn't
+# allow querying its version (common in some environments)
+fasttext.FastText.eprint = lambda x: None
+class LanguageIdentification:
+    """Detect language using a FastText langid model.
+    This class provides a wrapper around the FastText library for efficient
+    language identification, supporting both single-string and batch processing.
+    """
+    def __init__(self, model_path: Optional[Union[str, Path]] = None):
+        """Initialize the LanguageIdentification model.
+        Args:
+            model_path: Path to the pre-trained FastText model file.
+                 If None, defaults to 'models/lid.176.bin' and downloads if missing.
+        """
+        if model_path is None:
+            cache_dir = Path(os.getenv("XDG_CACHE_HOME", Path.home() / ".cache"))
+            model_dir = cache_dir / "fasttext_language_id"
+            model_path = model_dir / "lid.176.bin"
+        model_path = Path(model_path)
+        if not model_path.exists():
+            model_path.parent.mkdir(parents=True, exist_ok=True)
+            url = "https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin"
+            print(f"Downloading FastText model from {url} to {model_path}...")
+            urllib.request.urlretrieve(url, str(model_path))
+            print("Download complete.")
+        self.ft = fasttext.load_model(str(model_path))
+    def predict(
+        self,
+        text: Union[str, List[str]],
+        k: int = 1,
+        threshold: float = 0.0
+    ) -> Union[List[Tuple[str, float]], List[List[Tuple[str, float]]]]:
+        """Predict the language(s) for the given text or list of texts.
+        Args:
+            text: A single string or a list of strings to identify.
+            k: Number of most likely languages to return. Defaults to 1.
+            threshold: Minimum score for a language to be included in the results.
+                Defaults to 0.0 (return all k results regardless of score).
+        Returns:
+            If input is a string: A list of (lang, score) tuples.
+            If input is a list of strings: A list of lists of (lang, score) tuples,
+                maintaining the input order.
+        """
+        is_single = isinstance(text, str)
+        items = [text] if is_single else text
+        # Sanitize inputs: FastText errors on newlines
+        items = [t.replace("\n", " ") for t in items]
+        # FastText predict handles lists natively and is faster than looping
+        ft_output = self.ft.predict(items, k=k, threshold=threshold)
+        # FastText returns ([['__label__en', ...], ...], [[0.9, ...], ...])
+        labels, scores = ft_output
+        results = []
+        for item_labels, item_scores in zip(labels, scores):
+            item_results = [
+                (label.replace("__label__", ""), float(score))
+                for label, score in zip(item_labels, item_scores)
+            ]
+            results.append(item_results)
+        return results[0] if is_single else results
+    def predict_best(
+        self,
+        text: Union[str, List[str]],
+        threshold: float = 0.0
+    ) -> Union[Optional[str], List[Optional[str]]]:
+        """Predict the most likely language for the given text or list of texts.
+        This is a convenience wrapper around `predict` that returns only the
+        top-scoring language label (or None if no language exceeds the threshold).
+        Args:
+            text: A single string or a list of strings to identify.
+            threshold: Minimum score for a language to be selected.
+        Returns:
+            If input is a string: The language code (e.g., 'en') or None.
+            If input is a list: A list of language codes or None.
+        """
+        results = self.predict(text, k=1, threshold=threshold)
+        if isinstance(text, str):
+            # results is List[Tuple[str, float]]
+            return results[0][0] if results else None
+        else:
+            # results is List[List[Tuple[str, float]]]
+            return [r[0][0] if r else None for r in results]
+def ensure_model_exists(model_path: Optional[Union[str, Path]] = None):
+    """Ensure the FastText model exists on disk, downloading if necessary.
+    This should be called from the main process before starting worker pools.
+    """
+    if model_path is None:
+        cache_dir = Path(os.getenv("XDG_CACHE_HOME", Path.home() / ".cache"))
+        model_dir = cache_dir / "fasttext_language_id"
+        model_path = model_dir / "lid.176.bin"
+    model_path = Path(model_path)
+    if not model_path.exists():
+        model_path.parent.mkdir(parents=True, exist_ok=True)
+        url = "https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin"
+        print(f"Downloading FastText model from {url} to {model_path}...")
+        urllib.request.urlretrieve(url, str(model_path))
+        print("Download complete.")
+# Global detector instance for process pool workers
+_detector: Optional[LanguageIdentification] = None
+def init_worker(model_path: Optional[Union[str, Path]] = None):
+    """Initialize the global detector instance for a worker process."""
+    global _detector
+    # We assume ensure_model_exists was already called in the main process
+    _detector = LanguageIdentification(model_path)
+def predict_worker(
+    text: Union[str, List[str]],
+    k: int = 1,
+    threshold: float = 0.0
+) -> Union[List[Tuple[str, float]], List[List[Tuple[str, float]]]]:
+    """Prediction function to be run in a worker process."""
+    if _detector is None:
+        # Fallback if init_worker failed or wasn't called
+        init_worker()
+    return _detector.predict(text, k=k, threshold=threshold)

quickmt/manager.py ADDED Viewed

	@@ -0,0 +1,367 @@

+import asyncio
+import logging
+import time
+from pathlib import Path
+from typing import Dict, List, Optional
+from collections import OrderedDict
+from functools import lru_cache
+from fastapi import HTTPException
+from huggingface_hub import HfApi, snapshot_download
+from cachetools import TTLCache, cached, LRUCache
+from quickmt.translator import Translator
+from quickmt.settings import settings
+logger = logging.getLogger(__name__)
+class BatchTranslator:
+    def __init__(
+        self,
+        model_id: str,
+        model_path: str,
+        device: str = "cpu",
+        compute_type: str = "default",
+        inter_threads: int = 1,
+        intra_threads: int = 0,
+    ):
+        self.model_id = model_id
+        self.model_path = model_path
+        self.device = device
+        self.compute_type = compute_type
+        self.inter_threads = inter_threads
+        self.intra_threads = intra_threads
+        self.translator: Optional[Translator] = None
+        self.queue: asyncio.Queue = asyncio.Queue()
+        self.worker_task: Optional[asyncio.Task] = None
+        # LRU cache for translations
+        self.translation_cache: LRUCache = LRUCache(
+            maxsize=settings.translation_cache_size
+        )
+    async def start_worker(self):
+        if self.worker_task:
+            return
+        # Load model in main process (or worker thread if needed)
+        # For now, Translator handles its own loading
+        self.translator = Translator(
+            Path(self.model_path),
+            device=self.device,
+            compute_type=self.compute_type,
+            inter_threads=self.inter_threads,
+            intra_threads=self.intra_threads,
+        )
+        self.worker_task = asyncio.create_task(self._worker())
+        logger.info(f"Started translation worker for model: {self.model_id}")
+    async def stop_worker(self):
+        if not self.worker_task:
+            return
+        # Send sentinel to stop worker
+        await self.queue.put(None)
+        await self.worker_task
+        self.worker_task = None
+        if self.translator:
+            self.translator.unload()
+            self.translator = None
+        logger.info(f"Stopped translation worker for model: {self.model_id}")
+    async def _worker(self):
+        while True:
+            item = await self.queue.get()
+            if item is None:
+                self.queue.task_done()
+                break
+            src, src_lang, tgt_lang, kwargs, future = item
+            try:
+                # 1. Collect batch
+                batch_texts = [src]
+                futures = [future]
+                # Try to grab more items up to MAX_BATCH_SIZE or timeout
+                start_time = time.time()
+                while len(batch_texts) < settings.max_batch_size:
+                    wait_time = (settings.batch_timeout_ms / 1000.0) - (
+                        time.time() - start_time
+                    )
+                    if wait_time <= 0:
+                        break
+                    try:
+                        next_item = await asyncio.wait_for(
+                            self.queue.get(), timeout=wait_time
+                        )
+                        if next_item is None:
+                            # Re-add sentinel to handle later
+                            await self.queue.put(None)
+                            break
+                        n_src, n_sl, n_tl, n_kw, n_fut = next_item
+                        # Only batch if parameters match exactly
+                        if n_sl == src_lang and n_tl == tgt_lang and n_kw == kwargs:
+                            batch_texts.append(n_src)
+                            futures.append(n_fut)
+                        else:
+                            # Re-queue item for a later batch/worker cycle
+                            await self.queue.put(next_item)
+                            break
+                    except asyncio.TimeoutError:
+                        break
+                # 2. Process batch
+                # Run in executor to avoid blocking the asyncio loop during inference
+                loop = asyncio.get_running_loop()
+                results = await loop.run_in_executor(
+                    None,
+                    lambda: self.translator(
+                        batch_texts, src_lang=src_lang, tgt_lang=tgt_lang, **kwargs
+                    ),
+                )
+                # result can be string or list
+                if isinstance(results, str):
+                    results = [results]
+                # 3. Resolve futures
+                for res, fut in zip(results, futures):
+                    if not fut.done():
+                        fut.set_result(res)
+                # Mark done for all processed items
+                for _ in range(len(batch_texts)):
+                    self.queue.task_done()
+            except Exception as e:
+                logger.error(f"Error in translation worker for {self.model_id}: {e}")
+                if not future.done():
+                    future.set_exception(e)
+                # TODO: handle others if batched
+    async def translate(
+        self, src: str, src_lang: str = None, tgt_lang: str = None, **kwargs
+    ) -> str:
+        if not self.worker_task:
+            await self.start_worker()
+        # Create cache key from input parameters
+        # Convert kwargs to a sorted tuple for hashability
+        kwargs_tuple = tuple(sorted(kwargs.items()))
+        cache_key = (src, src_lang, tgt_lang, kwargs_tuple)
+        # Check cache first
+        if cache_key in self.translation_cache:
+            return self.translation_cache[cache_key]
+        # Cache miss - perform translation
+        future = asyncio.get_running_loop().create_future()
+        await self.queue.put((src, src_lang, tgt_lang, kwargs, future))
+        result = await future
+        # Store in cache
+        self.translation_cache[cache_key] = result
+        return result
+class ModelManager:
+    def __init__(
+        self,
+        max_loaded: int,
+        device: str,
+        compute_type: str = "default",
+        inter_threads: int = 1,
+        intra_threads: int = 0,
+    ):
+        self.max_loaded = max_loaded
+        self.device = device
+        self.compute_type = compute_type
+        self.inter_threads = inter_threads
+        self.intra_threads = intra_threads
+        # cache key: src-tgt string
+        self.models: OrderedDict[str, BatchTranslator] = OrderedDict()
+        self.pending_loads: Dict[str, asyncio.Event] = {}
+        self.lock = asyncio.Lock()
+        self.hf_collection_models: List[Dict] = []
+        self.api = HfApi()
+    @cached(cache=TTLCache(maxsize=1, ttl=3600))
+    async def fetch_hf_models(self):
+        """Fetch available models from the quickmt collection on Hugging Face."""
+        try:
+            loop = asyncio.get_running_loop()
+            collection = await loop.run_in_executor(
+                None, lambda: self.api.get_collection("quickmt/quickmt-models")
+            )
+            hf_models = []
+            for item in collection.items:
+                if item.item_type == "model":
+                    model_id = item.item_id
+                    # Expecting format: quickmt/quickmt-en-fr
+                    parts = model_id.split("/")[-1].replace("quickmt-", "").split("-")
+                    if len(parts) == 2:
+                        src, tgt = parts
+                        hf_models.append(
+                            {"model_id": model_id, "src_lang": src, "tgt_lang": tgt}
+                        )
+            self.hf_collection_models = hf_models
+            logger.info(
+                f"Discovered {len(hf_models)} models from Hugging Face collection"
+            )
+        except Exception as e:
+            logger.error(f"Failed to fetch models from Hugging Face: {e}")
+    async def get_model(self, src_lang: str, tgt_lang: str) -> BatchTranslator:
+        model_name = f"{src_lang}-{tgt_lang}"
+        async with self.lock:
+            # 1. Check if loaded
+            if model_name in self.models:
+                self.models.move_to_end(model_name)
+                return self.models[model_name]
+            # 2. Check if currently loading
+            if model_name in self.pending_loads:
+                event = self.pending_loads[model_name]
+            else:
+                # NEW: Pre-check existence before starting task to ensure clean 404
+                hf_model = next(
+                    (
+                        m
+                        for m in self.hf_collection_models
+                        if m["src_lang"] == src_lang and m["tgt_lang"] == tgt_lang
+                    ),
+                    None,
+                )
+                if not hf_model:
+                    raise HTTPException(
+                        status_code=404,
+                        detail=f"Model for {src_lang}->{tgt_lang} not found in Hugging Face collection",
+                    )
+                event = asyncio.Event()
+                self.pending_loads[model_name] = event
+                # This task will do the actual loading
+                asyncio.create_task(self._load_model_task(src_lang, tgt_lang, event))
+        # 3. Wait for load
+        await event.wait()
+        # 4. Return from cache
+        async with self.lock:
+            return self.models[model_name]
+    async def _load_model_task(
+        self, src_lang: str, tgt_lang: str, new_event: asyncio.Event
+    ):
+        model_name = f"{src_lang}-{tgt_lang}"
+        try:
+            try:
+                # Find matching model from HF collection (already checked in get_model)
+                hf_model = next(
+                    m
+                    for m in self.hf_collection_models
+                    if m["src_lang"] == src_lang and m["tgt_lang"] == tgt_lang
+                )
+                logger.info(f"Accessing Hugging Face model: {hf_model['model_id']}")
+                loop = asyncio.get_running_loop()
+                # snapshot_download returns the local path in the HF cache.
+                # Try local only first to speed up loading
+                try:
+                    cached_path = await loop.run_in_executor(
+                        None,
+                        lambda: snapshot_download(
+                            repo_id=hf_model["model_id"],
+                            ignore_patterns=["eole-model/*", "eole_model/*"],
+                            local_files_only=True,
+                        ),
+                    )
+                except Exception:
+                    # Fallback to checking online
+                    logger.info(
+                        f"Model {hf_model['model_id']} not fully cached, checking online..."
+                    )
+                    cached_path = await loop.run_in_executor(
+                        None,
+                        lambda: snapshot_download(
+                            repo_id=hf_model["model_id"],
+                            ignore_patterns=["eole-model/*", "eole_model/*"],
+                        ),
+                    )
+                model_path = Path(cached_path)
+                # Prepare for eviction
+                evicted_model = None
+                async with self.lock:
+                    if len(self.models) >= self.max_loaded:
+                        oldest_name, evicted_model = self.models.popitem(last=False)
+                        logger.info(f"Evicting model: {oldest_name}")
+                if evicted_model:
+                    await evicted_model.stop_worker()
+                # Load new model (SLOW, outside lock)
+                logger.info(
+                    f"Loading model: {hf_model['model_id']} (device: {self.device}, compute: {self.compute_type})"
+                )
+                new_model = BatchTranslator(
+                    model_id=hf_model["model_id"],
+                    model_path=str(model_path),
+                    device=self.device,
+                    compute_type=self.compute_type,
+                    inter_threads=self.inter_threads,
+                    intra_threads=self.intra_threads,
+                )
+                await new_model.start_worker()
+                # Add to cache
+                async with self.lock:
+                    self.models[model_name] = new_model
+            except Exception as e:
+                logger.error(f"Error loading model {model_name}: {e}")
+                # We still need to set the event to unblock waiters,
+                # but we should probably handle errors better in get_model
+                raise e
+        finally:
+            async with self.lock:
+                if model_name in self.pending_loads:
+                    del self.pending_loads[model_name]
+                    new_event.set()
+    def list_available_models(self) -> List[Dict]:
+        """List all models discovered from Hugging Face."""
+        available = []
+        for m in self.hf_collection_models:
+            lang_pair = f"{m['src_lang']}-{m['tgt_lang']}"
+            available.append(
+                {
+                    "model_id": m["model_id"],
+                    "src_lang": m["src_lang"],
+                    "tgt_lang": m["tgt_lang"],
+                    "loaded": lang_pair in self.models,
+                }
+            )
+        return available
+    @lru_cache(maxsize=1)
+    def get_language_pairs(self) -> Dict[str, List[str]]:
+        """Return a dictionary of source languages to list of supported target languages."""
+        pairs: Dict[str, set] = {}
+        for m in self.hf_collection_models:
+            src = m["src_lang"]
+            tgt = m["tgt_lang"]
+            if src not in pairs:
+                pairs[src] = set()
+            pairs[src].add(tgt)
+        # Convert sets to sorted lists
+        return {src: sorted(list(tgts)) for src, tgts in sorted(pairs.items())}
+    async def shutdown(self):
+        for name, model in self.models.items():
+            await model.stop_worker()
+        self.models.clear()

quickmt/rest_server.py ADDED Viewed

	@@ -0,0 +1,358 @@

+import asyncio
+import logging
+import os
+import time
+from contextlib import asynccontextmanager
+from typing import List, Optional, Union, Dict
+from concurrent.futures import ProcessPoolExecutor
+from fastapi import FastAPI, HTTPException, APIRouter
+from fastapi.responses import ORJSONResponse
+from fastapi.staticfiles import StaticFiles
+from pydantic import BaseModel, model_validator
+from quickmt.langid import init_worker, predict_worker, ensure_model_exists
+from quickmt.manager import ModelManager
+from quickmt.settings import settings
+logger = logging.getLogger("uvicorn.error")
+class TranslationRequest(BaseModel):
+    src: Union[str, List[str]]
+    src_lang: Optional[Union[str, List[str]]] = None
+    tgt_lang: str = "en"
+    beam_size: int = 5
+    patience: int = 1
+    length_penalty: float = 1.0
+    coverage_penalty: float = 0.0
+    repetition_penalty: float = 1.0
+    max_decoding_length: int = 256
+    @model_validator(mode="after")
+    def validate_patience(self):
+        if self.patience > self.beam_size:
+            raise ValueError("patience cannot be greater than beam_size")
+        return self
+class TranslationResponse(BaseModel):
+    translation: Union[str, List[str]]
+    src_lang: Union[str, List[str]]
+    src_lang_score: Union[float, List[float]]
+    tgt_lang: str
+    processing_time: float
+    model_used: Union[str, List[str]]
+class DetectionRequest(BaseModel):
+    src: Union[str, List[str]]
+    k: int = 1
+    threshold: float = 0.0
+class DetectionResult(BaseModel):
+    lang: str
+    score: float
+class DetectionResponse(BaseModel):
+    results: Union[List[DetectionResult], List[List[DetectionResult]]]
+    processing_time: float
+class BatchItem:
+    def __init__(
+        self,
+        src: List[str],
+        src_lang: str,
+        tgt_lang: str,
+        beam_size: int,
+        max_decoding_length: int,
+        future: asyncio.Future,
+    ):
+        self.src = src
+        self.src_lang = src_lang
+        self.tgt_lang = tgt_lang
+        self.beam_size = beam_size
+        self.max_decoding_length = max_decoding_length
+        self.future = future
+# Global instances initialized in lifespan
+model_manager: Optional[ModelManager] = None
+langid_executor: Optional[ProcessPoolExecutor] = None
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    global model_manager, langid_executor
+    model_manager = ModelManager(
+        max_loaded=settings.max_loaded_models,
+        device=settings.device,
+        compute_type=settings.compute_type,
+        inter_threads=settings.inter_threads,
+        intra_threads=settings.intra_threads,
+    )
+    # 1. Fetch available models from Hugging Face
+    await model_manager.fetch_hf_models()
+    # 2. Ensure langid model is downloaded in main process before starting workers
+    loop = asyncio.get_running_loop()
+    await loop.run_in_executor(None, ensure_model_exists, settings.langid_model_path)
+    # Initialize langid process pool
+    langid_executor = ProcessPoolExecutor(
+        max_workers=settings.langid_workers,
+        initializer=init_worker,
+        initargs=(settings.langid_model_path,),
+    )
+    yield
+    if langid_executor:
+        langid_executor.shutdown()
+    await model_manager.shutdown()
+app = FastAPI(
+    title="quickmt Multi-Model API",
+    lifespan=lifespan,
+    default_response_class=ORJSONResponse,
+)
+api_router = APIRouter(prefix="/api")
+@api_router.post("/translate", response_model=TranslationResponse)
+async def translate_endpoint(request: TranslationRequest):
+    if not model_manager:
+        raise HTTPException(status_code=503, detail="Model manager not initialized")
+    start_time = time.time()
+    src_list = [request.src] if isinstance(request.src, str) else request.src
+    if not src_list:
+        return TranslationResponse(
+            translation="" if isinstance(request.src, str) else [],
+            src_lang="" if isinstance(request.src, str) else [],
+            src_lang_score=0.0 if isinstance(request.src, str) else [],
+            tgt_lang=request.tgt_lang,
+            processing_time=time.time() - start_time,
+            model_used="none",
+        )
+    try:
+        loop = asyncio.get_running_loop()
+        # 1. Determine source languages and confidence scores
+        if request.src_lang:
+            if isinstance(request.src_lang, list):
+                if not isinstance(src_list, list) or len(request.src_lang) != len(
+                    src_list
+                ):
+                    raise HTTPException(
+                        status_code=422,
+                        detail="src_lang list length must match src list length",
+                    )
+                src_langs = request.src_lang
+                src_lang_scores = [1.0] * len(src_list)
+            else:
+                src_langs = [request.src_lang] * len(src_list)
+                src_lang_scores = [1.0] * len(src_list)
+        else:
+            if not langid_executor:
+                raise HTTPException(
+                    status_code=503, detail="Language identification not initialized"
+                )
+            # Batch detect languages
+            raw_langid_results = await loop.run_in_executor(
+                langid_executor,
+                predict_worker,
+                src_list,
+                1,  # k=1 (best guess)
+                0.0,  # threshold
+            )
+            # results are List[List[Tuple[str, float]]], extract labels and scores
+            src_langs = [r[0][0] if r else "unknown" for r in raw_langid_results]
+            src_lang_scores = [float(r[0][1]) if r else 0.0 for r in raw_langid_results]
+        # 2. Group indices by source language
+        # groups: { "fr": [0, 2, ...], "es": [1, ...] }
+        groups: Dict[str, List[int]] = {}
+        for idx, lang in enumerate(src_langs):
+            if lang not in groups:
+                groups[lang] = []
+            groups[lang].append(idx)
+        # 3. Process each group
+        final_translations = [""] * len(src_list)
+        final_models = [""] * len(src_list)
+        tasks = []
+        # We need a way to track which lang pairs were actually used for the 'model_used' string
+        used_pairs = set()
+        for lang, indices in groups.items():
+            group_src = [src_list[i] for i in indices]
+            # Optimization: If src == tgt, skip translation
+            if lang == request.tgt_lang:
+                for src_idx, idx in enumerate(indices):
+                    final_translations[idx] = group_src[src_idx]
+                    final_models[idx] = "identity"
+                continue
+            # Load model and translate for this group
+            async def process_group_task(l=lang, i_list=indices, g_src=group_src):
+                try:
+                    translator = await model_manager.get_model(l, request.tgt_lang)
+                    used_pairs.add(translator.model_id)
+                    # Call translate for each sentence; BatchTranslator will handle opportunistic batching
+                    translation_tasks = [
+                        translator.translate(
+                            s,
+                            src_lang=l,
+                            tgt_lang=request.tgt_lang,
+                            beam_size=request.beam_size,
+                            patience=request.patience,
+                            length_penalty=request.length_penalty,
+                            coverage_penalty=request.coverage_penalty,
+                            repetition_penalty=request.repetition_penalty,
+                            max_decoding_length=request.max_decoding_length,
+                        )
+                        for s in g_src
+                    ]
+                    results = await asyncio.gather(*translation_tasks)
+                    for result_idx, original_idx in enumerate(i_list):
+                        final_translations[original_idx] = results[result_idx]
+                        final_models[original_idx] = translator.model_id
+                except HTTPException as e:
+                    # If a specific model is missing, we could either fail the whole batch
+                    # or keep original text. Here we fail for consistency with previous behavior.
+                    raise e
+                except Exception as e:
+                    logger.error(f"Error translating {l} to {request.tgt_lang}: {e}")
+                    raise e
+            tasks.append(process_group_task())
+        if tasks:
+            await asyncio.gather(*tasks)
+        # 4. Prepare response
+        if isinstance(request.src, str):
+            result = final_translations[0]
+            src_lang_res = src_langs[0]
+            src_lang_score_res = src_lang_scores[0]
+            model_used_res = final_models[0]
+        else:
+            result = final_translations
+            src_lang_res = src_langs
+            src_lang_score_res = src_lang_scores
+            model_used_res = final_models
+        return TranslationResponse(
+            translation=result,
+            src_lang=src_lang_res,
+            src_lang_score=src_lang_score_res,
+            tgt_lang=request.tgt_lang,
+            processing_time=time.time() - start_time,
+            model_used=model_used_res,
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.exception("Unexpected error in translate_endpoint")
+        raise HTTPException(status_code=500, detail=str(e))
+@api_router.post("/identify-language", response_model=DetectionResponse)
+async def identify_language_endpoint(request: DetectionRequest):
+    if not langid_executor:
+        raise HTTPException(
+            status_code=503, detail="Language identification not initialized"
+        )
+    start_time = time.time()
+    try:
+        loop = asyncio.get_running_loop()
+        # Offload detection to process pool to avoid GIL issues
+        raw_results = await loop.run_in_executor(
+            langid_executor, predict_worker, request.src, request.k, request.threshold
+        )
+        # Convert raw tuples to Pydantic models
+        if isinstance(request.src, str):
+            results = [
+                DetectionResult(lang=lang, score=score) for lang, score in raw_results
+            ]
+        else:
+            results = [
+                [
+                    DetectionResult(lang=lang, score=score)
+                    for lang, score in item_results
+                ]
+                for item_results in raw_results
+            ]
+        return DetectionResponse(
+            results=results, processing_time=time.time() - start_time
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@api_router.get("/models")
+async def get_models():
+    if not model_manager:
+        raise HTTPException(status_code=503, detail="Model manager not initialized")
+    return {"models": model_manager.list_available_models()}
+@api_router.get("/languages")
+async def get_languages():
+    if not model_manager:
+        raise HTTPException(status_code=503, detail="Model manager not initialized")
+    return model_manager.get_language_pairs()
+@api_router.get("/health")
+async def health_check():
+    loaded_models = list(model_manager.models.keys()) if model_manager else []
+    return {
+        "status": "ok",
+        "loaded_models": loaded_models,
+        "max_models": settings.max_loaded_models,
+    }
+app.include_router(api_router)
+# Serve static files for the GUI
+static_dir = os.path.join(os.path.dirname(__file__), "gui", "static")
+if os.path.exists(static_dir):
+    app.mount("/", StaticFiles(directory=static_dir, html=True), name="static")
+def start():
+    """Entry point for the quickmt-serve CLI."""
+    import uvicorn
+    uvicorn.run("quickmt.rest_server:app", host="0.0.0.0", port=8000, reload=False)
+def start_gui():
+    """Entry point for the quickmt-gui CLI."""
+    import uvicorn
+    import webbrowser
+    import threading
+    import time
+    def open_browser():
+        time.sleep(1.5)
+        webbrowser.open("http://127.0.0.1:8000")
+    threading.Thread(target=open_browser, daemon=True).start()
+    uvicorn.run("quickmt.rest_server:app", host="0.0.0.0", port=8000, reload=False)

quickmt/settings.py ADDED Viewed

	@@ -0,0 +1,69 @@

+"""Centralized configuration management using pydantic-settings.
+This module provides a type-safe, centralized way to manage all configuration
+settings for the quickmt library. Settings can be configured via:
+- Environment variables (e.g., MAX_LOADED_MODELS=10)
+- .env file in the project root
+- Runtime modification of the global settings object
+All environment variables are case-insensitive.
+"""
+from typing import Optional
+from pydantic_settings import BaseSettings, SettingsConfigDict
+class Settings(BaseSettings):
+    """Application settings with environment variable support.
+    All settings can be overridden via environment variables.
+    For example, to set max_loaded_models, use MAX_LOADED_MODELS=10
+    """
+    # Model Manager Settings
+    max_loaded_models: int = 5
+    """Maximum number of translation models to keep loaded in memory"""
+    device: str = "cpu"
+    """Device to use for inference: 'cpu', 'cuda', or 'auto'"""
+    compute_type: str = "default"
+    """CTranslate2 compute type: 'default', 'int8', 'int8_float16', 'int16', 'float16', 'float32'"""
+    inter_threads: int = 1
+    """Number of threads to use for inter-op parallelism (simultaneous translations)"""
+    intra_threads: int = 4
+    """Number of threads to use for intra-op parallelism (within each translation)"""
+    # Batch Processing Settings
+    max_batch_size: int = 32
+    """Maximum batch size for translation requests"""
+    batch_timeout_ms: int = 5
+    """Timeout in milliseconds to wait for batching additional requests"""
+    # Language Identification Settings
+    langid_model_path: Optional[str] = None
+    """Path to FastText language identification model. If None, uses default cache location"""
+    langid_workers: int = 2
+    """Number of worker processes for language identification"""
+    # Translation Cache Settings
+    translation_cache_size: int = 10000
+    """Maximum number of translations to cache (LRU eviction)"""
+    model_config = SettingsConfigDict(
+        env_prefix="",
+        case_sensitive=False,
+        env_file=".env",
+        env_file_encoding="utf-8",
+        extra="ignore",
+    )
+# Global settings instance
+# This can be imported and used throughout the application
+# Settings can be modified at runtime: settings.max_loaded_models = 10
+settings = Settings()

quickmt/translator.py ADDED Viewed

	@@ -0,0 +1,390 @@

+from abc import ABC, abstractmethod
+from pathlib import Path
+from time import time
+from typing import List, Optional, Union
+import ctranslate2
+import sentencepiece
+from blingfire import text_to_sentences
+from pydantic import DirectoryPath, validate_call
+class TranslatorABC(ABC):
+    def __init__(self, model_path: DirectoryPath, **kwargs):
+        """Create quickmt translation object
+        Args:
+            model_path (DirectoryPath): Path to quickmt model folder
+            **kwargs: CTranslate2 Translator arguments - see https://opennmt.net/CTranslate2/python/ctranslate2.Translator.html
+        """
+        self.model_path = Path(model_path)
+        self.translator = ctranslate2.Translator(str(model_path), **kwargs)
+    @staticmethod
+    @validate_call
+    def _sentence_split(src: List[str]):
+        """Split sentences with Blingfire
+        Args:
+            src (List[str]): Input list of strings to split by sentences
+        Returns:
+            List[int], List[int], List[str]: List of input ids, list of paragraph ids and sentences
+        """
+        input_ids = []
+        paragraph_ids = []
+        sentences = []
+        for idx, i in enumerate(src):
+            for paragraph, j in enumerate(i.splitlines(keepends=True)):
+                sents = text_to_sentences(j).splitlines()
+                for sent in sents:
+                    stripped_sent = sent.strip()
+                    if len(stripped_sent) > 0:
+                        if (
+                            len(stripped_sent) < 5
+                            and len(paragraph_ids) > 0
+                            and paragraph == paragraph_ids[-1]
+                            and len(input_ids) > 0
+                            and input_ids[-1] == idx
+                        ):
+                            sentences[-1] += " " + stripped_sent
+                        else:
+                            input_ids.append(idx)
+                            paragraph_ids.append(paragraph)
+                            sentences.append(stripped_sent)
+        return input_ids, paragraph_ids, sentences
+    @staticmethod
+    @validate_call
+    def _sentence_join(
+        input_ids: List[int],
+        paragraph_ids: List[int],
+        sentences: List[str],
+        paragraph_join_str: str = "\n",
+        sent_join_str: str = " ",
+        length: Optional[int] = None,
+    ):
+        """Sentence joiner
+        Args:
+            input_ids (List[int]): List of input IDs
+            paragraph_ids (List[int]): List of paragraph IDs
+            sentences (List[str]): List of sentences to join up by input and paragraph ids
+            paragraph_join_str (str, optional): str to use to join paragraphs. Defaults to "\n".
+            sent_join_str (str, optional): str to join up sentences. Defaults to " ".
+        Returns:
+            List[str]: Joined up sentences
+        """
+        if not input_ids:
+            return [""] * (length or 0)
+        target_len = length if length is not None else (max(input_ids) + 1)
+        ret = [""] * target_len
+        last_paragraph = 0
+        for idx, paragraph, text in zip(input_ids, paragraph_ids, sentences):
+            if len(ret[idx]) > 0:
+                if paragraph == last_paragraph:
+                    ret[idx] += sent_join_str + text
+                else:
+                    ret[idx] += paragraph_join_str + text
+                last_paragraph = paragraph
+            else:
+                ret[idx] = text
+                last_paragraph = paragraph
+        return ret
+    @abstractmethod
+    def tokenize(
+        self,
+        sentences: List[str],
+        src_lang: Optional[str] = None,
+        tgt_lang: Optional[str] = None,
+    ): ...
+    @abstractmethod
+    def detokenize(
+        self,
+        sentences: List[List[str]],
+        src_lang: Optional[str] = None,
+        tgt_lang: Optional[str] = None,
+    ): ...
+    @abstractmethod
+    def translate_batch(
+        self,
+        sentences: List[List[str]],
+        src_lang: Optional[str] = None,
+        tgt_lang: Optional[str] = None,
+    ): ...
+    @abstractmethod
+    def unload(self): ...
+    @validate_call
+    def __call__(
+        self,
+        src: Union[str, List[str]],
+        max_batch_size: int = 32,
+        max_decoding_length: int = 256,
+        beam_size: int = 2,
+        patience: int = 1,
+        length_penalty: float = 1.0,
+        coverage_penalty: float = 0.0,
+        repetition_penalty: float = 1.0,
+        verbose: bool = False,
+        src_lang: Union[None, str] = None,
+        tgt_lang: Union[None, str] = None,
+        **kwargs,
+    ) -> Union[str, List[str]]:
+        """Translate a list of strings with quickmt model
+        Args:
+            src (List[str]): Input list of strings to translate
+            max_batch_size (int, optional): Maximum batch size, to constrain RAM utilization. Defaults to 32.
+            beam_size (int, optional): CTranslate2 Beam size. Defaults to 5.
+            patience (int, optional): CTranslate2 Patience. Defaults to 1.
+            max_decoding_length (int, optional): Maximum length of translation
+            **args: Other CTranslate2 translate_batch args, see https://opennmt.net/CTranslate2/python/ctranslate2.Translator.html#ctranslate2.Translator.translate_batch
+        Returns:
+            Union[str, List[str]]: Translation of the input
+        """
+        if isinstance(src, str):
+            return_string = True
+            src = [src]
+        else:
+            return_string = False
+        indices, paragraphs, sentences = self._sentence_split(src)
+        if not sentences:
+            return "" if return_string else [""] * len(src)
+        if verbose:
+            print(f"Split sentences: {sentences}")
+        input_text = self.tokenize(sentences, src_lang=src_lang, tgt_lang=tgt_lang)
+        if verbose:
+            print(f"Tokenized input: {input_text}")
+        t1 = time()
+        results = self.translate_batch(
+            input_text,
+            beam_size=beam_size,
+            patience=patience,
+            length_penalty=length_penalty,
+            coverage_penalty=coverage_penalty,
+            repetition_penalty=repetition_penalty,
+            max_decoding_length=max_decoding_length,
+            max_batch_size=max_batch_size,
+            src_lang=src_lang,
+            tgt_lang=tgt_lang,
+            **kwargs,
+        )
+        t2 = time()
+        if verbose:
+            print(f"Translation time: {t2 - t1}")
+        output_tokens = [i.hypotheses[0] for i in results]
+        if verbose:
+            print(f"Tokenized output: {output_tokens}")
+        translated_sents = self.detokenize(
+            output_tokens, src_lang=src_lang, tgt_lang=tgt_lang
+        )
+        ret = self._sentence_join(
+            indices, paragraphs, translated_sents, length=len(src)
+        )
+        if return_string:
+            return ret[0]
+        else:
+            return ret
+    @validate_call
+    def translate_file(self, input_file: str, output_file: str, **kwargs) -> None:
+        """Translate a file with a quickmt model
+        Args:
+            file_path (str): Path to plain-text file to translate
+        """
+        with open(input_file, "rt") as myfile:
+            src = myfile.readlines()
+        # Remove newlines
+        src = [i.strip() for i in src]
+        # Translate
+        mt = self(src, **kwargs)
+        # Replace newlines to ensure output is the same number of lines
+        mt = [i.replace("\n", "\t") for i in mt]
+        with open(output_file, "wt") as myfile:
+            myfile.write("".join([i + "\n" for i in mt]))
+    @validate_call
+    def translate_stream(
+        self,
+        src: Union[str, List[str]],
+        max_batch_size: int = 32,
+        max_decoding_length: int = 256,
+        beam_size: int = 5,
+        patience: int = 1,
+        src_lang: Union[None, str] = None,
+        tgt_lang: Union[None, str] = None,
+        **kwargs,
+    ):
+        """Translate a list of strings with quickmt model
+        Args:
+            src (List[str]): Input list of strings to translate
+            max_batch_size (int, optional): Maximum batch size, to constrain RAM utilization. Defaults to 32.
+            beam_size (int, optional): CTranslate2 Beam size. Defaults to 5.
+            patience (int, optional): CTranslate2 Patience. Defaults to 1.
+            max_decoding_length (int, optional): Maximum length of translation
+            **args: Other CTranslate2 translate_batch args, see https://opennmt.net/CTranslate2/python/ctranslate2.Translator.html#ctranslate2.Translator.translate_batch
+        """
+        if isinstance(src, str):
+            src = [src]
+        indices, paragraphs, sentences = self._sentence_split(src)
+        input_text = self.tokenize(sentences, src_lang=src_lang, tgt_lang=tgt_lang)
+        translations_iterator = self.translator.translate_iterable(
+            input_text,
+            beam_size=beam_size,
+            patience=patience,
+            max_decoding_length=max_decoding_length,
+            max_batch_size=max_batch_size,
+            **kwargs,
+        )
+        for idx, para, sent, output in zip(
+            indices, paragraphs, sentences, translations_iterator
+        ):
+            yield {
+                "input_idx": idx,
+                "sentence_idx": para,
+                "input_text": sent,
+                "translation": self.detokenize([output.hypotheses[0]])[0],
+            }
+class Translator(TranslatorABC):
+    def __init__(
+        self,
+        model_path: DirectoryPath,
+        inter_threads: int = 1,
+        intra_threads: int = 0,
+        **kwargs,
+    ):
+        """Create quickmt translation object
+        Args:
+            model_path (DirectoryPath): Path to quickmt model folder
+            inter_threads (int): Number of simultaneous translations
+            intra_threads (int): Number of threads for each translation
+            **kwargs: CTranslate2 Translator arguments - see https://opennmt.net/CTranslate2/python/ctranslate2.Translator.html
+        """
+        super().__init__(
+            model_path,
+            inter_threads=inter_threads,
+            intra_threads=intra_threads,
+            **kwargs,
+        )
+        joint_tokenizer_path = self.model_path / "joint.spm.model"
+        if joint_tokenizer_path.exists():
+            self.source_tokenizer = sentencepiece.SentencePieceProcessor(
+                model_file=str(self.model_path / "joint.spm.model")
+            )
+            self.target_tokenizer = sentencepiece.SentencePieceProcessor(
+                model_file=str(self.model_path / "joint.spm.model")
+            )
+        else:
+            self.source_tokenizer = sentencepiece.SentencePieceProcessor(
+                model_file=str(self.model_path / "src.spm.model")
+            )
+            self.target_tokenizer = sentencepiece.SentencePieceProcessor(
+                model_file=str(self.model_path / "tgt.spm.model")
+            )
+    def __del__(self):
+        self.unload()
+    def tokenize(
+        self,
+        sentences: List[str],
+        src_lang: Optional[str] = None,
+        tgt_lang: Optional[str] = None,
+    ):
+        # Default implementation ignores lang tags unless explicitly handled
+        return [
+            i + ["</s>"] for i in self.source_tokenizer.encode(sentences, out_type=str)
+        ]
+    def detokenize(
+        self,
+        sentences: List[List[str]],
+        src_lang: Optional[str] = None,
+        tgt_lang: Optional[str] = None,
+    ):
+        return self.target_tokenizer.decode(sentences)
+    def unload(self):
+        """Explicitly release CTranslate2 translator resources"""
+        if hasattr(self, "translator"):
+            del self.translator
+    def translate_batch(
+        self,
+        input_text: List[List[str]],
+        beam_size: int = 5,
+        patience: int = 1,
+        max_decoding_length: int = 256,
+        max_batch_size: int = 32,
+        disable_unk: bool = True,
+        replace_unknowns: bool = False,
+        length_penalty: float = 1.0,
+        coverage_penalty: float = 0.0,
+        repetition_penalty: float = 1.0,
+        src_lang: str = None,
+        tgt_lang: str = None,
+        **kwargs,
+    ):
+        """Translate a list of strings
+        Args:
+            input_text (List[List[str]]): Input text to be translated
+            beam_size (int, optional): Beam size for beam search. Defaults to 5.
+            patience (int, optional): Stop beam search when `patience` beams finish. Defaults to 1.
+            max_decoding_length (int, optional): Max decoding length for model. Defaults to 256.
+            max_batch_size (int, optional): Max batch size. Reduce to limit RAM usage. Increase for faster speed. Defaults to 32.
+            disable_unk (bool, optional): Disable generating unk token. Defaults to True.
+            replace_unknowns (bool, optional): Replace unk tokens with src token that has the highest attention value. Defaults to False.
+            length_penalty (float, optional): Length penalty. Defaults to 1.0.
+            coverage_penalty (float, optional): Coverage penalty. Defaults to 0.0.
+            src_lang (str, optional): Source language. Only needed for multilingual models. Defaults to None.
+            tgt_lang (str, optional): Target language. Only needed for multilingual models. Defaults to None.
+        Returns:
+            List[str]: Translated text
+        """
+        return self.translator.translate_batch(
+            input_text,
+            beam_size=beam_size,
+            patience=patience,
+            max_decoding_length=max_decoding_length,
+            max_batch_size=max_batch_size,
+            disable_unk=disable_unk,
+            replace_unknowns=replace_unknowns,
+            length_penalty=length_penalty,
+            coverage_penalty=coverage_penalty,
+            repetition_penalty=repetition_penalty,
+            **kwargs,
+        )

requirements-dev.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+pytest
+pytest-asyncio
+httpx
+locust
+sacrebleu

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+blingfire
+cachetools
+fastapi
+uvicorn[standard]
+ctranslate2
+sentencepiece
+huggingface_hub
+fasttext-wheel
+orjson
+uvloop
+httptools
+pydantic
+pydantic-settings

tests/__init__.py ADDED Viewed

File without changes

tests/conftest.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import pytest
+import os
+from typing import AsyncGenerator
+from quickmt.rest_server import app
+from httpx import AsyncClient
+@pytest.fixture(scope="session")
+def base_url() -> str:
+    return os.getenv("TEST_BASE_URL", "http://127.0.0.1:8000")
+@pytest.fixture
+async def client(base_url: str) -> AsyncGenerator[AsyncClient, None]:
+    async with AsyncClient(base_url=base_url, timeout=60.0) as client:
+        yield client

tests/test_api.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import pytest
+import asyncio
+from httpx import AsyncClient
+@pytest.mark.asyncio
+async def test_health_check(client: AsyncClient):
+    response = await client.get("/api/health")
+    assert response.status_code == 200
+    data = response.json()
+    assert data["status"] == "ok"
+    assert "loaded_models" in data
+@pytest.mark.asyncio
+async def test_get_models(client: AsyncClient):
+    response = await client.get("/api/models")
+    assert response.status_code == 200
+    data = response.json()
+    assert "models" in data
+    assert isinstance(data["models"], list)
+@pytest.mark.asyncio
+async def test_get_languages(client: AsyncClient):
+    response = await client.get("/api/languages")
+    assert response.status_code == 200
+    data = response.json()
+    assert isinstance(data, dict)
+    # Check structure if models exist
+    if data:
+        src = list(data.keys())[0]
+        assert isinstance(data[src], list)
+@pytest.mark.asyncio
+async def test_translate_single(client: AsyncClient):
+    # First, find an available model
+    models_res = await client.get("/api/models")
+    models = models_res.json()["models"]
+    if not models:
+        pytest.skip("No models available in MODELS_DIR")
+    model = models[0]
+    payload = {
+        "src": "Hello world",
+        "src_lang": model["src_lang"],
+        "tgt_lang": model["tgt_lang"],
+    }
+    response = await client.post("/api/translate", json=payload)
+    assert response.status_code == 200
+    data = response.json()
+    assert "translation" in data
+    assert "processing_time" in data
+    assert data["src_lang"] == model["src_lang"]
+    assert data["src_lang_score"] == 1.0
+    assert data["tgt_lang"] == model["tgt_lang"]
+    assert data["model_used"] == model["model_id"]
+@pytest.mark.asyncio
+async def test_translate_list(client: AsyncClient):
+    models_res = await client.get("/api/models")
+    models = models_res.json()["models"]
+    if not models:
+        pytest.skip("No models available")
+    model = models[0]
+    payload = {
+        "src": ["Hello", "World"],
+        "src_lang": model["src_lang"],
+        "tgt_lang": model["tgt_lang"],
+    }
+    response = await client.post("/api/translate", json=payload)
+    assert response.status_code == 200
+    data = response.json()
+    assert isinstance(data["translation"], list)
+    assert len(data["translation"]) == 2
+    assert data["src_lang"] == [model["src_lang"], model["src_lang"]]
+    assert data["src_lang_score"] == [1.0, 1.0]
+    assert data["tgt_lang"] == model["tgt_lang"]
+    assert data["model_used"] == [model["model_id"], model["model_id"]]
+@pytest.mark.asyncio
+async def test_dynamic_batching(client: AsyncClient):
+    """Verify that multiple concurrent requests work correctly (triggering batching logic)."""
+    models_res = await client.get("/api/models")
+    models = models_res.json()["models"]
+    if not models:
+        pytest.skip("No models available")
+    model = models[0]
+    src, tgt = model["src_lang"], model["tgt_lang"]
+    texts = [f"Sentence number {i}" for i in range(5)]
+    tasks = []
+    for text in texts:
+        payload = {"src": text, "src_lang": src, "tgt_lang": tgt}
+        tasks.append(client.post("/api/translate", json=payload))
+    responses = await asyncio.gather(*tasks)
+    for response in responses:
+        assert response.status_code == 200
+        assert "translation" in response.json()

tests/test_auto_translate.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import pytest
+from httpx import AsyncClient
+@pytest.mark.asyncio
+async def test_auto_detect_src_lang(client: AsyncClient):
+    """Verify that src_lang is auto-detected if missing."""
+    # Ensure some models are available
+    models_res = await client.get("/api/models")
+    available_models = models_res.json()["models"]
+    if not any(
+        m["src_lang"] == "fr" and m["tgt_lang"] == "en" for m in available_models
+    ):
+        pytest.skip("fr-en model needed for this test")
+    payload = {"src": "Bonjour tout le monde", "tgt_lang": "en"}
+    response = await client.post("/api/translate", json=payload)
+    assert response.status_code == 200
+    data = response.json()
+    assert "translation" in data
+    assert data["src_lang"] == "fr"
+    assert 0.0 < data["src_lang_score"] <= 1.0
+    assert data["tgt_lang"] == "en"
+    assert "quickmt/quickmt-fr-en" in data["model_used"]
+@pytest.mark.asyncio
+async def test_default_tgt_lang(client: AsyncClient):
+    """Verify that tgt_lang defaults to 'en'."""
+    models_res = await client.get("/api/models")
+    available_models = models_res.json()["models"]
+    if not any(
+        m["src_lang"] == "fr" and m["tgt_lang"] == "en" for m in available_models
+    ):
+        pytest.skip("fr-en model needed for this test")
+    payload = {"src": "Bonjour", "src_lang": "fr"}
+    response = await client.post("/api/translate", json=payload)
+    assert response.status_code == 200
+    data = response.json()
+    assert data["src_lang"] == "fr"
+    assert data["src_lang_score"] == 1.0
+    assert data["tgt_lang"] == "en"
+    assert "quickmt/quickmt-fr-en" in data["model_used"]
+@pytest.mark.asyncio
+async def test_mixed_language_batch(client: AsyncClient):
+    """Verify that a batch with mixed languages is handled correctly."""
+    models_res = await client.get("/api/models")
+    available_models = models_res.json()["models"]
+    needed = [("fr", "en"), ("es", "en")]
+    for src, tgt in needed:
+        if not any(
+            m["src_lang"] == src and m["tgt_lang"] == tgt for m in available_models
+        ):
+            pytest.skip(f"Mixed batch test needs both fr-en and es-en models")
+    payload = {"src": ["Bonjour tout le monde", "Hola amigos"], "tgt_lang": "en"}
+    response = await client.post("/api/translate", json=payload)
+    assert response.status_code == 200
+    data = response.json()
+    assert isinstance(data["translation"], list)
+    assert len(data["translation"]) == 2
+    assert data["src_lang"] == ["fr", "es"]
+    assert len(data["src_lang_score"]) == 2
+    assert all(0.0 < s <= 1.0 for s in data["src_lang_score"])
+    assert data["tgt_lang"] == "en"
+    assert "quickmt/quickmt-fr-en" in data["model_used"]
+    assert "quickmt/quickmt-es-en" in data["model_used"]
+@pytest.mark.asyncio
+async def test_identity_translation(client: AsyncClient):
+    """Verify that translation is skipped if src_lang == tgt_lang."""
+    payload = {"src": "This is already English", "src_lang": "en", "tgt_lang": "en"}
+    response = await client.post("/api/translate", json=payload)
+    assert response.status_code == 200
+    data = response.json()
+    assert data["translation"] == "This is already English"
+    assert data["src_lang"] == "en"
+    assert data["src_lang_score"] == 1.0
+    assert data["tgt_lang"] == "en"
+    assert data["model_used"] == "identity"
+@pytest.mark.asyncio
+async def test_auto_detect_mixed_identity(client: AsyncClient):
+    """Verify mixed batch with some items needing translation and some remaining as-is."""
+    models_res = await client.get("/api/models")
+    available_models = models_res.json()["models"]
+    if not any(
+        m["src_lang"] == "fr" and m["tgt_lang"] == "en" for m in available_models
+    ):
+        pytest.skip("fr-en model needed for this test")
+    payload = {"src": ["Bonjour", "Hello world"], "tgt_lang": "en"}
+    response = await client.post("/api/translate", json=payload)
+    assert response.status_code == 200
+    data = response.json()
+    assert len(data["translation"]) == 2
+    assert data["src_lang"] == ["fr", "en"]
+    assert len(data["src_lang_score"]) == 2
+    # First should be auto-detected, second should be auto-detected (and high confidence)
+    assert all(0.0 < s <= 1.0 for s in data["src_lang_score"])
+    assert data["tgt_lang"] == "en"
+    assert data["model_used"] == ["quickmt/quickmt-fr-en", "identity"]

tests/test_cache.py ADDED Viewed

	@@ -0,0 +1,68 @@

+"""
+Simple test to demonstrate the translation cache functionality.
+Run this to verify cache hits provide instant responses.
+"""
+import asyncio
+import time
+from quickmt.manager import BatchTranslator
+from quickmt.settings import settings
+async def test_translation_cache():
+    print("=== Translation Cache Test ===\n")
+    # Create a mock BatchTranslator (would normally be created by ModelManager)
+    # For this test, we'll just verify the cache mechanism
+    print(f"Cache size configured: {settings.translation_cache_size}")
+    # Simulate cache behavior
+    from cachetools import LRUCache
+    cache = LRUCache(maxsize=settings.translation_cache_size)
+    # Test data
+    test_text = "Hello, world!"
+    src_lang = "en"
+    tgt_lang = "fr"
+    kwargs_tuple = tuple(sorted({"beam_size": 5, "patience": 1}.items()))
+    cache_key = (test_text, src_lang, tgt_lang, kwargs_tuple)
+    # First request - cache miss
+    print("\n1. First translation (cache miss):")
+    print(f"   Key: {cache_key}")
+    if cache_key in cache:
+        print("   ✓ Cache HIT")
+    else:
+        print("   ✗ Cache MISS (expected)")
+        # Simulate translation and caching
+        cache[cache_key] = "Bonjour, monde!"
+        print("   → Cached result")
+    # Second request - cache hit
+    print("\n2. Repeated translation (cache hit):")
+    print(f"   Key: {cache_key}")
+    if cache_key in cache:
+        print("   ✓ Cache HIT (instant!)")
+        print(f"   → Result: {cache[cache_key]}")
+    else:
+        print("   ✗ Cache MISS (unexpected)")
+    # Different parameters - cache miss
+    different_kwargs = tuple(sorted({"beam_size": 10, "patience": 2}.items()))
+    different_key = (test_text, src_lang, tgt_lang, different_kwargs)
+    print("\n3. Same text, different parameters (cache miss):")
+    print(f"   Key: {different_key}")
+    if different_key in cache:
+        print("   ✓ Cache HIT")
+    else:
+        print("   ✗ Cache MISS (expected - different params)")
+    print("\n✅ Cache test complete!")
+    print(f"Cache size: {len(cache)}/{settings.translation_cache_size}")
+if __name__ == "__main__":
+    asyncio.run(test_translation_cache())

tests/test_identify_language.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import pytest
+from httpx import AsyncClient
+@pytest.mark.asyncio
+async def test_identify_language_single(client: AsyncClient):
+    """Verify single string language identification."""
+    payload = {"src": "Hello, how are you?", "k": 1}
+    response = await client.post("/api/identify-language", json=payload)
+    assert response.status_code == 200
+    data = response.json()
+    assert "results" in data
+    assert "processing_time" in data
+    assert isinstance(data["results"], list)
+    # FastText should identify this as English
+    assert data["results"][0]["lang"] == "en"
+@pytest.mark.asyncio
+async def test_identify_language_batch(client: AsyncClient):
+    """Verify batch language identification."""
+    payload = {"src": ["Bonjour tout le monde", "Hola amigos"], "k": 1}
+    response = await client.post("/api/identify-language", json=payload)
+    assert response.status_code == 200
+    data = response.json()
+    assert len(data["results"]) == 2
+    assert data["results"][0][0]["lang"] == "fr"
+    assert data["results"][1][0]["lang"] == "es"
+@pytest.mark.asyncio
+async def test_identify_language_threshold(client: AsyncClient):
+    """Verify threshold filtering in the endpoint."""
+    payload = {"src": "This is definitely English", "k": 5, "threshold": 0.9}
+    response = await client.post("/api/identify-language", json=payload)
+    assert response.status_code == 200
+    data = response.json()
+    # Only 'en' should probably be above 0.9
+    assert len(data["results"]) == 1
+    assert data["results"][0]["lang"] == "en"

tests/test_langid.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import pytest
+from unittest.mock import MagicMock, patch
+from pathlib import Path
+from quickmt.langid import LanguageIdentification
+@pytest.fixture
+def mock_fasttext():
+    with patch("fasttext.load_model") as mock_load:
+        mock_model = MagicMock()
+        mock_load.return_value = mock_model
+        # Configure default behavior for predict
+        def mock_predict(items, k=1, threshold=0.0):
+            # Return ([['__label__en', ...]], [[0.9, ...]])
+            labels = [["__label__en"] * k for _ in items]
+            scores = [[0.9] * k for _ in items]
+            return labels, scores
+        mock_model.predict.side_effect = mock_predict
+        yield mock_model
+@pytest.fixture
+def langid_model(mock_fasttext, tmp_path):
+    # Create a dummy model file so the existence check passes
+    model_path = tmp_path / "model.bin"
+    model_path.write_text("dummy content")
+    return LanguageIdentification(model_path)
+def test_predict_single(langid_model, mock_fasttext):
+    result = langid_model.predict("Hello world")
+    assert isinstance(result, list)
+    assert len(result) == 1
+    assert result[0] == ("en", 0.9)
+    mock_fasttext.predict.assert_called_once_with(["Hello world"], k=1, threshold=0.0)
+def test_predict_batch(langid_model, mock_fasttext):
+    texts = ["Hello", "Bonjour"]
+    results = langid_model.predict(texts, k=2)
+    assert isinstance(results, list)
+    assert len(results) == 2
+    for r in results:
+        assert len(r) == 2
+        assert r[0] == ("en", 0.9)
+    mock_fasttext.predict.assert_called_once_with(texts, k=2, threshold=0.0)
+def test_predict_best_single(langid_model):
+    result = langid_model.predict_best("Hello")
+    assert result == "en"
+def test_predict_best_batch(langid_model):
+    results = langid_model.predict_best(["Hello", "World"])
+    assert results == ["en", "en"]
+def test_predict_threshold(langid_model, mock_fasttext):
+    # Configure mock to return nothing if threshold is high (simulated)
+    def mock_predict_low_score(items, k=1, threshold=0.0):
+        if threshold > 0.9:
+            return [[] for _ in items], [[] for _ in items]
+        return [["__label__en"] for _ in items], [[0.9] for _ in items]
+    mock_fasttext.predict.side_effect = mock_predict_low_score
+    result = langid_model.predict_best("Hello", threshold=0.95)
+    assert result is None
+    result = langid_model.predict_best("Hello", threshold=0.5)
+    assert result == "en"

tests/test_langid_batch.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import pytest
+from httpx import AsyncClient
+@pytest.mark.asyncio
+async def test_langid_batch(client: AsyncClient):
+    """Verify that language identification works for a list of strings."""
+    payload = {"src": ["This is English text.", "Ceci est un texte français."]}
+    response = await client.post("/api/identify-language", json=payload)
+    assert response.status_code == 200
+    data = response.json()
+    # Expect a list of lists of DetectionResult
+    results = data["results"]
+    assert isinstance(results, list)
+    assert len(results) == 2
+    # First item: English
+    assert len(results[0]) >= 1
+    assert results[0][0]["lang"] == "en"
+    # Second item: French
+    assert len(results[1]) >= 1
+    assert results[1][0]["lang"] == "fr"
+@pytest.mark.asyncio
+async def test_langid_newline_handling(client: AsyncClient):
+    """Verify that inputs with newlines are handled gracefully (no 500 error)."""
+    # Single string with newline
+    payload_single = {"src": "This text\nhas a newline."}
+    response = await client.post("/api/identify-language", json=payload_single)
+    assert response.status_code == 200
+    data = response.json()
+    assert data["results"][0]["lang"] == "en"
+    # Batch with newlines
+    payload_batch = {"src": ["Line 1\nLine 2", "Another\nline"]}
+    response = await client.post("/api/identify-language", json=payload_batch)
+    assert response.status_code == 200
+    data = response.json()
+    assert len(data["results"]) == 2

tests/test_langid_path.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from pathlib import Path
+import os
+from unittest.mock import patch
+from quickmt.langid import ensure_model_exists, LanguageIdentification
+def test_langid_default_path():
+    """Verify that LanguageIdentification uses the XDG cache path by default."""
+    # Mock os.getenv to ensure we test the default behavior, but respect XDG_CACHE_HOME if we want to mock it.
+    # Here we simulate no explicit model path provided.
+    with patch("quickmt.langid.fasttext.load_model") as mock_load, \
+         patch("quickmt.langid.urllib.request.urlretrieve") as mock_retrieve, \
+         patch("pathlib.Path.exists") as mock_exists, \
+         patch("pathlib.Path.mkdir") as mock_mkdir:
+        # Simulate model cached and exists
+        mock_exists.return_value = True
+        lid = LanguageIdentification(model_path=None)
+        # Verify load_model was called with a path in the cache
+        args, _ = mock_load.call_args
+        loaded_path = str(args[0])
+        expected_part = os.path.join(".cache", "fasttext_language_id", "lid.176.bin")
+        assert expected_part in loaded_path
+        # Old path should not be used
+        assert "models/lid.176.ftz" not in loaded_path
+def test_ensure_model_exists_path():
+    """Verify ensure_model_exists resolves to cache path."""
+    with patch("quickmt.langid.urllib.request.urlretrieve") as mock_retrieve, \
+         patch("pathlib.Path.exists") as mock_exists, \
+         patch("pathlib.Path.mkdir") as mock_mkdir:
+        # Simulate model missing to trigger download logic path check
+        mock_exists.return_value = False
+        ensure_model_exists(None)
+        # Check download target
+        args, _ = mock_retrieve.call_args
+        download_target = str(args[1])
+        expected_part = os.path.join(".cache", "fasttext_language_id", "lid.176.bin")
+        assert expected_part in download_target

tests/test_lru.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import pytest
+from httpx import AsyncClient
+@pytest.mark.asyncio
+async def test_lru_eviction(client: AsyncClient):
+    """
+    Test that the server correctly unloads the least recently used model
+    when MAX_LOADED_MODELS is exceeded.
+    """
+    # 1. Get available models
+    models_res = await client.get("/api/models")
+    available_models = models_res.json()["models"]
+    # 2. Get MAX_LOADED_MODELS from health
+    health_res = await client.get("/api/health")
+    max_models = health_res.json()["max_models"]
+    if len(available_models) <= max_models:
+        pytest.skip(
+            f"Not enough models in MODELS_DIR to test eviction (need > {max_models})"
+        )
+    # 3. Load max_models + 1 models sequentially
+    loaded_in_order = []
+    for i in range(max_models + 1):
+        model = available_models[i]
+        payload = {
+            "src": "test",
+            "src_lang": model["src_lang"],
+            "tgt_lang": model["tgt_lang"],
+        }
+        await client.post("/api/translate", json=payload)
+        loaded_in_order.append(f"{model['src_lang']}-{model['tgt_lang']}")
+    # 4. Check currently loaded models
+    health_after = await client.get("/api/health")
+    currently_loaded = health_after.json()["loaded_models"]
+    # The first model should have been evicted
+    first_model = loaded_in_order[0]
+    assert first_model not in currently_loaded
+    assert len(currently_loaded) == max_models
+    # The most recently requested model should be there
+    last_model = loaded_in_order[-1]
+    assert last_model in currently_loaded

tests/test_manager.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import pytest
+import asyncio
+from pathlib import Path
+from unittest.mock import MagicMock, patch, AsyncMock
+from quickmt.manager import ModelManager, BatchTranslator
+from quickmt.translator import Translator
+@pytest.fixture
+def mock_translator():
+    with patch("quickmt.manager.Translator") as mock:
+        instance = MagicMock()
+        mock.return_value = instance
+        yield instance
+@pytest.fixture
+def mock_hf():
+    with patch("quickmt.manager.snapshot_download") as mock_dl, \
+         patch("quickmt.manager.HfApi") as mock_api:
+        # Mock collection fetch
+        coll = MagicMock()
+        coll.items = [
+            MagicMock(item_id="quickmt/quickmt-en-fr", item_type="model"),
+            MagicMock(item_id="quickmt/quickmt-fr-en", item_type="model")
+        ]
+        mock_api.return_value.get_collection.return_value = coll
+        mock_dl.return_value = "/tmp/mock-model-path"
+        yield mock_api, mock_dl
+class TestBatchTranslator:
+    @pytest.mark.asyncio
+    async def test_translate_single(self, mock_translator):
+        bt = BatchTranslator("test-id", "/tmp/path")
+        # Mock translator call
+        mock_translator.return_value = "Hola"
+        result = await bt.translate("Hello", src_lang="en", tgt_lang="es")
+        assert result == "Hola"
+        assert bt.worker_task is not None
+        await bt.stop_worker()
+        assert bt.worker_task is None
+class TestModelManager:
+    @pytest.mark.asyncio
+    async def test_fetch_hf_models(self, mock_hf):
+        mm = ModelManager(max_loaded=2, device="cpu")
+        await mm.fetch_hf_models()
+        assert len(mm.hf_collection_models) == 2
+        assert mm.hf_collection_models[0]["src_lang"] == "en"
+        assert mm.hf_collection_models[0]["tgt_lang"] == "fr"
+    @pytest.mark.asyncio
+    async def test_get_model_lazy_load(self, mock_hf, mock_translator):
+        mm = ModelManager(max_loaded=2, device="cpu")
+        await mm.fetch_hf_models()
+        # This should trigger download and start worker
+        bt = await mm.get_model("en", "fr")
+        assert isinstance(bt, BatchTranslator)
+        assert "en-fr" in mm.models
+        assert bt.model_id == "quickmt/quickmt-en-fr"
+    @pytest.mark.asyncio
+    async def test_lru_eviction(self, mock_hf, mock_translator):
+        # Set max_loaded to 1 to trigger eviction immediately
+        mm = ModelManager(max_loaded=1, device="cpu")
+        await mm.fetch_hf_models()
+        # Load first
+        bt1 = await mm.get_model("en", "fr")
+        assert len(mm.models) == 1
+        # Load second (should evict first)
+        bt2 = await mm.get_model("fr", "en")
+        assert len(mm.models) == 1
+        assert "fr-en" in mm.models
+        assert "en-fr" not in mm.models
+    @pytest.mark.asyncio
+    async def test_get_model_cache_first(self, mock_hf, mock_translator):
+        mock_api, mock_dl = mock_hf
+        mm = ModelManager(max_loaded=2, device="cpu")
+        await mm.fetch_hf_models()
+        # Scenario 1: Local cache hit
+        # Reset mock to track new calls
+        mock_dl.reset_mock()
+        mock_dl.return_value = "/tmp/mock-model-path"
+        await mm.get_model("en", "fr")
+        # Verify it tried local_files_only=True first
+        assert mock_dl.call_count == 1
+        args, kwargs = mock_dl.call_args
+        assert kwargs.get("local_files_only") is True
+    @pytest.mark.asyncio
+    async def test_get_model_fallback(self, mock_hf, mock_translator):
+        mock_api, mock_dl = mock_hf
+        mm = ModelManager(max_loaded=2, device="cpu")
+        await mm.fetch_hf_models()
+        # Scenario 2: Local cache miss, fallback to online
+        # First call fails, second succeeds
+        mock_dl.side_effect = [Exception("Not found locally"), "/tmp/mock-model-path"]
+        await mm.get_model("fr", "en")
+        assert mock_dl.call_count == 2
+        # First call was local only
+        args1, kwargs1 = mock_dl.call_args_list[0]
+        assert kwargs1.get("local_files_only") is True
+        # Second call was online (no local_files_only or False)
+        args2, kwargs2 = mock_dl.call_args_list[1]
+        assert not kwargs2.get("local_files_only")

tests/test_mixed_src.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import pytest
+from httpx import AsyncClient
+@pytest.mark.asyncio
+async def test_explicit_mixed_languages(client: AsyncClient):
+    """Verify explicit src_lang list for a mixed batch."""
+    # Ensure needed models are available
+    models_res = await client.get("/api/models")
+    available_models = models_res.json()["models"]
+    needed = [("fr", "en"), ("es", "en")]
+    for src, tgt in needed:
+        if not any(
+            m["src_lang"] == src and m["tgt_lang"] == tgt for m in available_models
+        ):
+            pytest.skip(f"Mixed batch test needs both fr-en and es-en models")
+    # Explicitly specify languages for each input
+    payload = {"src": ["Bonjour", "Hola"], "src_lang": ["fr", "es"], "tgt_lang": "en"}
+    response = await client.post("/api/translate", json=payload)
+    assert response.status_code == 200
+    data = response.json()
+    assert data["src_lang"] == ["fr", "es"]
+    assert data["model_used"] == ["quickmt/quickmt-fr-en", "quickmt/quickmt-es-en"]
+    assert len(data["translation"]) == 2
+@pytest.mark.asyncio
+async def test_src_lang_length_mismatch(client: AsyncClient):
+    """Verify 422 error when src and src_lang lengths differ."""
+    payload = {
+        "src": ["Hello", "World"],
+        "src_lang": ["en"],  # Only 1 language for 2 inputs
+        "tgt_lang": "es",
+    }
+    response = await client.post("/api/translate", json=payload)
+    assert response.status_code == 422
+    assert (
+        "src_lang list length must match src list length" in response.json()["detail"]
+    )
+@pytest.mark.asyncio
+async def test_src_lang_list_with_single_src(client: AsyncClient):
+    """Verify single src string with single-item src_lang list is not allowed or handled gracefully."""
+    # The Pydantic model allows this, but our logic checks lengths.
+    # If src is str, src_list has len 1. If src_lang is list, it must have len 1.
+    # Needs a model
+    models_res = await client.get("/api/models")
+    models = models_res.json()["models"]
+    if not models:
+        pytest.skip("No models available")
+    model = models[0]
+    payload = {
+        "src": "Hello",
+        "src_lang": [model["src_lang"]],
+        "tgt_lang": model["tgt_lang"],
+    }
+    response = await client.post("/api/translate", json=payload)
+    assert response.status_code == 200
+    data = response.json()
+    assert data["src_lang"] == model["src_lang"]

tests/test_robustness.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import pytest
+import asyncio
+import time
+from httpx import AsyncClient
+@pytest.mark.asyncio
+async def test_model_not_found(client: AsyncClient):
+    """Verify that requesting a non-existent model returns 404."""
+    payload = {
+        "src": "Hello",
+        "src_lang": "en",
+        "tgt_lang": "zz",  # Non-existent
+    }
+    response = await client.post("/api/translate", json=payload)
+    assert response.status_code == 404
+    assert "not found" in response.json()["detail"]
+@pytest.mark.asyncio
+async def test_empty_input_string(client: AsyncClient):
+    """Verify handling of empty string input."""
+    models_res = await client.get("/api/models")
+    model = models_res.json()["models"][0]
+    payload = {"src": "", "src_lang": model["src_lang"], "tgt_lang": model["tgt_lang"]}
+    response = await client.post("/api/translate", json=payload)
+    assert response.status_code == 200
+    assert response.json()["translation"] == ""
+@pytest.mark.asyncio
+async def test_empty_input_list(client: AsyncClient):
+    """Verify handling of empty list input."""
+    models_res = await client.get("/api/models")
+    model = models_res.json()["models"][0]
+    payload = {"src": [], "src_lang": model["src_lang"], "tgt_lang": model["tgt_lang"]}
+    response = await client.post("/api/translate", json=payload)
+    assert response.status_code == 200
+    assert response.json()["translation"] == []
+@pytest.mark.asyncio
+async def test_invalid_input_type(client: AsyncClient):
+    """Verify that invalid input types are rejected by Pydantic."""
+    payload = {
+        "src": 123,  # Should be string or list of strings
+        "src_lang": "en",
+        "tgt_lang": "fr",
+    }
+    response = await client.post("/api/translate", json=payload)
+    assert response.status_code == 422  # Unprocessable Entity (Validation Error)
+@pytest.mark.asyncio
+async def test_concurrent_model_load(client: AsyncClient):
+    """
+    Test that concurrent requests for a new model are handled correctly
+    (only one load should happen, others wait on the event).
+    """
+    # Find a model that is definitely NOT loaded
+    health_res = await client.get("/api/health")
+    loaded = health_res.json()["loaded_models"]
+    models_res = await client.get("/api/models")
+    available = models_res.json()["models"]
+    target_model = None
+    for m in available:
+        lang_pair = f"{m['src_lang']}-{m['tgt_lang']}"
+        if lang_pair not in loaded:
+            target_model = m
+            break
+    if not target_model:
+        pytest.skip("No unloaded models available to test concurrent loading")
+    # Send multiple concurrent requests for the same new model
+    payload = {
+        "src": "Concurrent test",
+        "src_lang": target_model["src_lang"],
+        "tgt_lang": target_model["tgt_lang"],
+    }
+    tasks = [client.post("/api/translate", json=payload) for _ in range(3)]
+    responses = await asyncio.gather(*tasks)
+    for resp in responses:
+        assert resp.status_code == 200
+        assert "translation" in resp.json()
+@pytest.mark.asyncio
+async def test_parameter_overrides(client: AsyncClient):
+    """Verify that request-level parameters are respected."""
+    models_res = await client.get("/api/models")
+    model = models_res.json()["models"][0]
+    payload = {
+        "src": "This is a test of parameter overrides.",
+        "src_lang": model["src_lang"],
+        "tgt_lang": model["tgt_lang"],
+        "beam_size": 1,
+        "max_decoding_length": 5,
+    }
+    response = await client.post("/api/translate", json=payload)
+    assert response.status_code == 200
+    # With max_decoding_length=5, the translation should be very short
+    # Note: tokens != words, but usually it translates to 1-3 words
+    trans = response.json()["translation"]
+    # We can't strictly assert word count but we can check it's non-empty
+    assert len(trans) > 0
+@pytest.mark.asyncio
+async def test_large_batch_processing(client: AsyncClient):
+    """Verify processing of a batch larger than MAX_BATCH_SIZE."""
+    models_res = await client.get("/api/models")
+    models = models_res.json()["models"]
+    if not models:
+        pytest.skip("No translation models available")
+    model = models[0]
+    # Send 50 sentences (default MAX_BATCH_SIZE is 32)
+    sentences = [f"This is sentence {i}" for i in range(50)]
+    payload = {
+        "src": sentences,
+        "src_lang": model["src_lang"],
+        "tgt_lang": model["tgt_lang"],
+    }
+    response = await client.post("/api/translate", json=payload)
+    assert response.status_code == 200
+    data = response.json()
+    assert len(data["translation"]) == 50

tests/test_threading_config.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import pytest
+from unittest.mock import patch
+from quickmt.manager import ModelManager
+@pytest.mark.asyncio
+async def test_threading_config_propagation():
+    """Verify that inter_threads and intra_threads are passed to CTranslate2."""
+    # Mocking components to prevent actual model loading
+    with patch("quickmt.manager.Translator") as mock_translator_cls:
+        # Configuration
+        inter = 2
+        intra = 4
+        manager = ModelManager(
+            max_loaded=1,
+            device="cpu",
+            compute_type="int8",
+            inter_threads=inter,
+            intra_threads=intra,
+        )
+        # Inject a dummy model to collection
+        manager.hf_collection_models = [
+            {"model_id": "test/model", "src_lang": "en", "tgt_lang": "fr"}
+        ]
+        # Mock snapshot_download
+        with patch("quickmt.manager.snapshot_download", return_value="/tmp/model"):
+            # Trigger model load
+            await manager.get_model("en", "fr")
+            # Verify Translator was instantiated with correct parameters
+            args, kwargs = mock_translator_cls.call_args
+            assert kwargs["inter_threads"] == inter
+            assert kwargs["intra_threads"] == intra
+            assert kwargs["device"] == "cpu"
+            assert kwargs["compute_type"] == "int8"

tests/test_translation_quality.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import pytest
+from httpx import AsyncClient
+import sacrebleu
+# 10 Diverse English -> French pairs
+EN_FR_PAIRS = [
+    ("Hello world", "Bonjour le monde"),
+    ("The cat sits on the mat.", "Le chat est assis sur le tapis."),
+    ("I would like a coffee, please.", "Je voudrais un café, s'il vous plaît."),
+    ("Where is the nearest train station?", "Où est la gare la plus proche ?"),
+    (
+        "Artificial intelligence is fascinating.",
+        "L'intelligence artificielle est fascinante.",
+    ),
+    ("Can you help me translate this?", "Pouvez-vous m'aider à traduire ceci ?"),
+    ("It is raining today.", "Il pleut aujourd'hui."),
+    ("Programming is fun.", "La programmation est amusante."),
+    ("I am learning French.", "J'apprends le français."),
+    ("Have a nice day.", "Bonne journée."),
+]
+# 10 Diverse French -> English pairs
+FR_EN_PAIRS = [
+    ("Bonjour tout le monde", "Hello everyone"),
+    ("La vie est belle", "Life is beautiful"),
+    ("Je suis fatigué", "I am tired"),
+    ("Quelle heure est-il ?", "What time is it?"),
+    ("J'aime manger des croissants", "I like eating croissants"),
+    ("Merci beaucoup", "Thank you very much"),
+    ("À bientôt", "See you soon"),
+    ("Le livre est sur la table", "The book is on the table"),
+    ("Je ne comprends pas", "I do not understand"),
+    ("C'est magnifique", "It is magnificent"),
+]
+async def translate_batch(client, texts, src, tgt):
+    payload = {"src": texts, "src_lang": src, "tgt_lang": tgt}
+    response = await client.post("/api/translate", json=payload)
+    if response.status_code != 200:
+        return []
+    return response.json()["translation"]
+@pytest.mark.asyncio
+async def test_quality_en_fr(client: AsyncClient):
+    """Assess translation quality for English to French."""
+    # Check model availability first
+    models_res = await client.get("/api/models")
+    available_models = models_res.json()["models"]
+    if not any(
+        m["src_lang"] == "en" and m["tgt_lang"] == "fr" for m in available_models
+    ):
+        pytest.skip("en-fr model needed for this test")
+    sources = [p[0] for p in EN_FR_PAIRS]
+    refs = [
+        [p[1] for p in EN_FR_PAIRS]
+    ]  # sacrebleu expects list of lists of references
+    hyps = await translate_batch(client, sources, "en", "fr")
+    assert len(hyps) == len(sources)
+    # Calculate BLEU
+    bleu = sacrebleu.corpus_bleu(hyps, refs)
+    chrf = sacrebleu.corpus_chrf(hyps, refs)
+    print(f"\nEN->FR Quality: BLEU={bleu.score:.2f}, CHRF={chrf.score:.2f}")
+    # Assert minimum quality (adjust baselines based on model capability)
+    # Generic models should at least get > 10 BLEU on simple sentences
+    assert bleu.score > 40.0
+    assert chrf.score > 70.0
+@pytest.mark.asyncio
+async def test_quality_fr_en(client: AsyncClient):
+    """Assess translation quality for French to English."""
+    # Check model availability first
+    models_res = await client.get("/api/models")
+    available_models = models_res.json()["models"]
+    if not any(
+        m["src_lang"] == "fr" and m["tgt_lang"] == "en" for m in available_models
+    ):
+        pytest.skip("fr-en model needed for this test")
+    sources = [p[0] for p in FR_EN_PAIRS]
+    refs = [[p[1] for p in FR_EN_PAIRS]]
+    hyps = await translate_batch(client, sources, "fr", "en")
+    assert len(hyps) == len(sources)
+    # Calculate BLEU
+    bleu = sacrebleu.corpus_bleu(hyps, refs)
+    chrf = sacrebleu.corpus_chrf(hyps, refs)
+    print(f"\nFR->EN Quality: BLEU={bleu.score:.2f}, CHRF={chrf.score:.2f}")
+    # Assert minimum quality
+    assert bleu.score > 40.0
+    assert chrf.score > 70.0

tests/test_translator.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import pytest
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+from quickmt.translator import Translator, TranslatorABC
+# Mock objects
+@pytest.fixture
+def mock_ctranslate2():
+    with patch("ctranslate2.Translator") as mock:
+        yield mock
+@pytest.fixture
+def mock_sentencepiece():
+    with patch("sentencepiece.SentencePieceProcessor") as mock:
+        yield mock
+@pytest.fixture
+def temp_model_dir(tmp_path):
+    """Create a dummy model directory with required files."""
+    model_dir = tmp_path / "dummy-model"
+    model_dir.mkdir()
+    (model_dir / "src.spm.model").write_text("dummy")
+    (model_dir / "tgt.spm.model").write_text("dummy")
+    return model_dir
+@pytest.fixture
+def translator_instance(temp_model_dir, mock_ctranslate2, mock_sentencepiece):
+    return Translator(temp_model_dir)
+class TestTranslatorABC:
+    def test_sentence_split(self):
+        src = ["Hello world. This is a test.", "Another paragraph."]
+        input_ids, paragraph_ids, sentences = TranslatorABC._sentence_split(src)
+        assert len(sentences) == 3
+        assert input_ids == [0, 0, 1]
+        assert paragraph_ids == [0, 0, 0]
+        assert sentences[0] == "Hello world."
+        assert sentences[1] == "This is a test."
+        assert sentences[2] == "Another paragraph."
+    def test_sentence_join(self):
+        input_ids = [0, 0, 1]
+        paragraph_ids = [0, 0, 0]
+        sentences = ["Hello world.", "This is a test.", "Another paragraph."]
+        joined = TranslatorABC._sentence_join(input_ids, paragraph_ids, sentences)
+        assert len(joined) == 2
+        assert joined[0] == "Hello world. This is a test."
+        assert joined[1] == "Another paragraph."
+    def test_sentence_join_empty(self):
+        assert TranslatorABC._sentence_join([], [], [], length=5) == [""] * 5
+class TestTranslator:
+    def test_init_joint_tokens(self, tmp_path, mock_ctranslate2, mock_sentencepiece):
+        model_dir = tmp_path / "joint-model"
+        model_dir.mkdir()
+        (model_dir / "joint.spm.model").write_text("dummy")
+        translator = Translator(model_dir)
+        assert mock_sentencepiece.call_count == 2
+        # Verify it used the joint model for both
+        args, kwargs = mock_sentencepiece.call_args_list[0]
+        assert "joint.spm.model" in kwargs["model_file"]
+    def test_tokenize(self, translator_instance):
+        translator_instance.source_tokenizer.encode.return_value = [
+            ["token1", "token2"]
+        ]
+        result = translator_instance.tokenize(["Hello"])
+        assert result == [["token1", "token2", "</s>"]]
+        translator_instance.source_tokenizer.encode.assert_called_with(
+            ["Hello"], out_type=str
+        )
+    def test_detokenize(self, translator_instance):
+        translator_instance.target_tokenizer.decode.return_value = ["Hello"]
+        result = translator_instance.detokenize([["token1", "token2"]])
+        assert result == ["Hello"]
+        translator_instance.target_tokenizer.decode.assert_called_with(
+            [["token1", "token2"]]
+        )
+    def test_unload(self, translator_instance):
+        del translator_instance.translator
+        # Should not raise
+        translator_instance.unload()
+    def test_call_full_pipeline(self, translator_instance):
+        # Mock the steps
+        with (
+            patch.object(Translator, "tokenize") as mock_tok,
+            patch.object(Translator, "translate_batch") as mock_trans,
+            patch.object(Translator, "detokenize") as mock_detok,
+        ):
+            mock_tok.return_value = [["tok"]]
+            mock_res = MagicMock()
+            mock_res.hypotheses = [["hypo"]]
+            mock_trans.return_value = [mock_res]
+            mock_detok.return_value = ["Translated sentence."]
+            result = translator_instance("Source text.")
+            assert result == "Translated sentence."
+            mock_tok.assert_called_once()
+            mock_trans.assert_called_once()
+            mock_detok.assert_called_once()
+    def test_translate_stream(self, translator_instance):
+        translator_instance.translator.translate_iterable = MagicMock(
+            return_value=[
+                MagicMock(hypotheses=[["hypo1"]]),
+                MagicMock(hypotheses=[["hypo2"]]),
+            ]
+        )
+        with (
+            patch.object(Translator, "tokenize") as mock_tok,
+            patch.object(Translator, "detokenize") as mock_detok,
+        ):
+            mock_tok.return_value = [["tok1"], ["tok2"]]
+            mock_detok.side_effect = lambda x: [f"Detok {x[0][0]}"]
+            results = list(translator_instance.translate_stream(["Sent 1.", "Sent 2."]))
+            assert len(results) == 2
+            assert results[0]["translation"] == "Detok hypo1"
+            assert results[1]["translation"] == "Detok hypo2"
+    def test_translate_file(self, translator_instance, tmp_path):
+        input_file = tmp_path / "input.txt"
+        output_file = tmp_path / "output.txt"
+        input_file.write_text("Line 1\nLine 2")
+        with patch.object(Translator, "__call__") as mock_call:
+            mock_call.return_value = ["Trans 1", "Trans 2"]
+            translator_instance.translate_file(str(input_file), str(output_file))
+            content = output_file.read_text()
+            assert content == "Trans 1\nTrans 2\n"
+    def test_translate_batch(self, translator_instance):
+        translator_instance.translate_batch(
+            [["tok"]],
+            beam_size=10,
+            patience=2,
+            max_batch_size=16,
+            num_hypotheses=5,  # kwargs
+        )
+        translator_instance.translator.translate_batch.assert_called_once()
+        args, kwargs = translator_instance.translator.translate_batch.call_args
+        assert kwargs["beam_size"] == 10
+        assert kwargs["patience"] == 2
+        assert kwargs["max_batch_size"] == 16
+        assert kwargs["num_hypotheses"] == 5