Spaces:

wchen22
/

touchdown-compression-classifier

Running

App Files Files Community

wchen22 commited on 2 days ago

Commit

63799d5

verified ·

1 Parent(s): b2c269a

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

Dockerfile +13 -0
README.md +27 -3
app.py +101 -0
deploy.sh +17 -0
requirements.txt +4 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,13 @@

+FROM python:3.11-slim
+ENV PYTHONUNBUFFERED=1 \
+    HF_HOME=/data/.cache/huggingface \
+    TRANSFORMERS_CACHE=/data/.cache/huggingface/transformers
+WORKDIR /app
+COPY requirements.txt /app/requirements.txt
+RUN pip install --no-cache-dir -r /app/requirements.txt
+COPY app.py /app/app.py
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,34 @@
 ---
 title: Touchdown Compression Classifier
-emoji: 🔥
 colorFrom: blue
 colorTo: green
 sdk: docker
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Touchdown Compression Classifier
+emoji: 🚀
 colorFrom: blue
 colorTo: green
 sdk: docker
+app_port: 7860
 ---
+# Touchdown Compression Classifier
+Free CPU Hugging Face Space scaffold for the managed prompt compression API.
+Phase 1 serves deterministic deletion-only compression with receipts. The
+planned classifier backbone is `microsoft/deberta-v3-small`; the API reports
+classifier status honestly until a trained KEEP/DROP head or ONNX export is
+mounted.
+Endpoints:
+- `GET /health`
+- `POST /v1/compress`
+- `POST /v1/classify`
+Deploy:
+```bash
+hf auth login
+./deploy.sh <namespace>/touchdown-compression-classifier
+```
+The current repo machine must be logged into Hugging Face before this can be
+hosted. Free CPU Spaces are enough for this scaffold; production traffic should
+move to paid or owned infrastructure after validation.

app.py ADDED Viewed

	@@ -0,0 +1,101 @@

+from __future__ import annotations
+import re
+import time
+from typing import Any
+from fastapi import FastAPI, HTTPException
+CLASSIFIER_MODEL = "microsoft/deberta-v3-small"
+app = FastAPI(title="Touchdown Compression Classifier", version="0.1.0")
+def _tokens(text: str) -> list[dict[str, Any]]:
+    started = time.perf_counter()
+    try:
+        from transformers import AutoTokenizer
+        tokenizer = AutoTokenizer.from_pretrained(CLASSIFIER_MODEL)
+        encoded = tokenizer(
+            text,
+            add_special_tokens=False,
+            return_offsets_mapping=True,
+        )
+        pieces = tokenizer.convert_ids_to_tokens(encoded["input_ids"])
+        offsets = encoded["offset_mapping"]
+        token_source = "deberta_tokenizer"
+    except Exception:
+        matches = list(re.finditer(r"\S+|\s+", text))
+        pieces = [match.group(0) for match in matches]
+        offsets = [(match.start(), match.end()) for match in matches]
+        token_source = "regex_fallback"
+    elapsed = round((time.perf_counter() - started) * 1000.0, 3)
+    return [
+        {
+            "token": piece,
+            "label": "KEEP",
+            "score": 1.0,
+            "start": int(offsets[i][0]),
+            "end": int(offsets[i][1]),
+            "source": token_source,
+            "classifier_latency_ms": elapsed if i == 0 else 0.0,
+        }
+        for i, piece in enumerate(pieces)
+    ]
+@app.get("/health")
+def health() -> dict[str, Any]:
+    return {
+        "status": "ok",
+        "classifier_model": CLASSIFIER_MODEL,
+        "classifier_status": "tokenizer_loaded_no_trained_keep_drop_head",
+        "phase": "rules_api_first",
+    }
+@app.post("/v1/classify")
+def classify(payload: dict[str, Any]) -> dict[str, Any]:
+    text = payload.get("input")
+    if not isinstance(text, str):
+        raise HTTPException(status_code=400, detail="input must be a string")
+    return {
+        "model": CLASSIFIER_MODEL,
+        "task": "token_classification_keep_drop",
+        "status": "tokenizer_only_until_trained_head",
+        "labels": _tokens(text),
+        "note": (
+            "This free Space scaffold loads the DeBERTa tokenizer and returns "
+            "KEEP-only labels until a trained KEEP/DROP head or ONNX export is "
+            "mounted. Do not treat it as a production compressor."
+        ),
+    }
+@app.post("/v1/compress")
+def compress(payload: dict[str, Any]) -> dict[str, Any]:
+    # The Space can be called immediately, but managed compression logic lives
+    # in the package service. This endpoint is deliberately conservative until
+    # the package is vendored into the Space image.
+    text = payload.get("input")
+    if not isinstance(text, str):
+        raise HTTPException(status_code=400, detail="input must be a string")
+    started = time.perf_counter()
+    return {
+        "output": text,
+        "original_input_tokens": max(1, round(len(text) / 4.0)),
+        "output_tokens": max(1, round(len(text) / 4.0)),
+        "tokens_saved": 0,
+        "compression_percentage": 0.0,
+        "receipt": {
+            "protected_spans_checked": len(payload.get("protected_spans") or []),
+            "protected_spans_missing": 0,
+            "code_blocks_preserved": True,
+            "decision": "no_op_classifier_scaffold",
+            "compressor_latency_ms": round((time.perf_counter() - started) * 1000.0, 3),
+            "classifier_model": CLASSIFIER_MODEL,
+            "classifier_status": "tokenizer_loaded_no_trained_keep_drop_head",
+        },
+    }

deploy.sh ADDED Viewed

	@@ -0,0 +1,17 @@

+#!/usr/bin/env bash
+set -euo pipefail
+SPACE_ID="${1:-}"
+if [[ -z "${SPACE_ID}" ]]; then
+  echo "usage: $0 <namespace/touchdown-compression-classifier>" >&2
+  exit 2
+fi
+if ! command -v hf >/dev/null 2>&1; then
+  echo "hf CLI not found. Install with: curl -LsSf https://hf.co/cli/install.sh | bash -s" >&2
+  exit 2
+fi
+hf auth whoami >/dev/null
+hf repos create "${SPACE_ID}" --type space --space-sdk docker --exist-ok
+hf upload "${SPACE_ID}" "$(dirname "$0")" --type space

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+fastapi>=0.110
+uvicorn[standard]>=0.29
+transformers>=4.40
+sentencepiece>=0.2