Upload folder using huggingface_hub
Browse files- Dockerfile +13 -0
- README.md +27 -3
- app.py +101 -0
- deploy.sh +17 -0
- requirements.txt +4 -0
Dockerfile
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
ENV PYTHONUNBUFFERED=1 \
|
| 4 |
+
HF_HOME=/data/.cache/huggingface \
|
| 5 |
+
TRANSFORMERS_CACHE=/data/.cache/huggingface/transformers
|
| 6 |
+
|
| 7 |
+
WORKDIR /app
|
| 8 |
+
COPY requirements.txt /app/requirements.txt
|
| 9 |
+
RUN pip install --no-cache-dir -r /app/requirements.txt
|
| 10 |
+
COPY app.py /app/app.py
|
| 11 |
+
|
| 12 |
+
EXPOSE 7860
|
| 13 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -1,10 +1,34 @@
|
|
| 1 |
---
|
| 2 |
title: Touchdown Compression Classifier
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: green
|
| 6 |
sdk: docker
|
| 7 |
-
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
title: Touchdown Compression Classifier
|
| 3 |
+
emoji: 🚀
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: green
|
| 6 |
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
---
|
| 9 |
|
| 10 |
+
# Touchdown Compression Classifier
|
| 11 |
+
|
| 12 |
+
Free CPU Hugging Face Space scaffold for the managed prompt compression API.
|
| 13 |
+
|
| 14 |
+
Phase 1 serves deterministic deletion-only compression with receipts. The
|
| 15 |
+
planned classifier backbone is `microsoft/deberta-v3-small`; the API reports
|
| 16 |
+
classifier status honestly until a trained KEEP/DROP head or ONNX export is
|
| 17 |
+
mounted.
|
| 18 |
+
|
| 19 |
+
Endpoints:
|
| 20 |
+
|
| 21 |
+
- `GET /health`
|
| 22 |
+
- `POST /v1/compress`
|
| 23 |
+
- `POST /v1/classify`
|
| 24 |
+
|
| 25 |
+
Deploy:
|
| 26 |
+
|
| 27 |
+
```bash
|
| 28 |
+
hf auth login
|
| 29 |
+
./deploy.sh <namespace>/touchdown-compression-classifier
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
The current repo machine must be logged into Hugging Face before this can be
|
| 33 |
+
hosted. Free CPU Spaces are enough for this scaffold; production traffic should
|
| 34 |
+
move to paid or owned infrastructure after validation.
|
app.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import re
|
| 4 |
+
import time
|
| 5 |
+
from typing import Any
|
| 6 |
+
|
| 7 |
+
from fastapi import FastAPI, HTTPException
|
| 8 |
+
|
| 9 |
+
CLASSIFIER_MODEL = "microsoft/deberta-v3-small"
|
| 10 |
+
|
| 11 |
+
app = FastAPI(title="Touchdown Compression Classifier", version="0.1.0")
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def _tokens(text: str) -> list[dict[str, Any]]:
|
| 15 |
+
started = time.perf_counter()
|
| 16 |
+
try:
|
| 17 |
+
from transformers import AutoTokenizer
|
| 18 |
+
|
| 19 |
+
tokenizer = AutoTokenizer.from_pretrained(CLASSIFIER_MODEL)
|
| 20 |
+
encoded = tokenizer(
|
| 21 |
+
text,
|
| 22 |
+
add_special_tokens=False,
|
| 23 |
+
return_offsets_mapping=True,
|
| 24 |
+
)
|
| 25 |
+
pieces = tokenizer.convert_ids_to_tokens(encoded["input_ids"])
|
| 26 |
+
offsets = encoded["offset_mapping"]
|
| 27 |
+
token_source = "deberta_tokenizer"
|
| 28 |
+
except Exception:
|
| 29 |
+
matches = list(re.finditer(r"\S+|\s+", text))
|
| 30 |
+
pieces = [match.group(0) for match in matches]
|
| 31 |
+
offsets = [(match.start(), match.end()) for match in matches]
|
| 32 |
+
token_source = "regex_fallback"
|
| 33 |
+
|
| 34 |
+
elapsed = round((time.perf_counter() - started) * 1000.0, 3)
|
| 35 |
+
return [
|
| 36 |
+
{
|
| 37 |
+
"token": piece,
|
| 38 |
+
"label": "KEEP",
|
| 39 |
+
"score": 1.0,
|
| 40 |
+
"start": int(offsets[i][0]),
|
| 41 |
+
"end": int(offsets[i][1]),
|
| 42 |
+
"source": token_source,
|
| 43 |
+
"classifier_latency_ms": elapsed if i == 0 else 0.0,
|
| 44 |
+
}
|
| 45 |
+
for i, piece in enumerate(pieces)
|
| 46 |
+
]
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
@app.get("/health")
|
| 50 |
+
def health() -> dict[str, Any]:
|
| 51 |
+
return {
|
| 52 |
+
"status": "ok",
|
| 53 |
+
"classifier_model": CLASSIFIER_MODEL,
|
| 54 |
+
"classifier_status": "tokenizer_loaded_no_trained_keep_drop_head",
|
| 55 |
+
"phase": "rules_api_first",
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
@app.post("/v1/classify")
|
| 60 |
+
def classify(payload: dict[str, Any]) -> dict[str, Any]:
|
| 61 |
+
text = payload.get("input")
|
| 62 |
+
if not isinstance(text, str):
|
| 63 |
+
raise HTTPException(status_code=400, detail="input must be a string")
|
| 64 |
+
return {
|
| 65 |
+
"model": CLASSIFIER_MODEL,
|
| 66 |
+
"task": "token_classification_keep_drop",
|
| 67 |
+
"status": "tokenizer_only_until_trained_head",
|
| 68 |
+
"labels": _tokens(text),
|
| 69 |
+
"note": (
|
| 70 |
+
"This free Space scaffold loads the DeBERTa tokenizer and returns "
|
| 71 |
+
"KEEP-only labels until a trained KEEP/DROP head or ONNX export is "
|
| 72 |
+
"mounted. Do not treat it as a production compressor."
|
| 73 |
+
),
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
@app.post("/v1/compress")
|
| 78 |
+
def compress(payload: dict[str, Any]) -> dict[str, Any]:
|
| 79 |
+
# The Space can be called immediately, but managed compression logic lives
|
| 80 |
+
# in the package service. This endpoint is deliberately conservative until
|
| 81 |
+
# the package is vendored into the Space image.
|
| 82 |
+
text = payload.get("input")
|
| 83 |
+
if not isinstance(text, str):
|
| 84 |
+
raise HTTPException(status_code=400, detail="input must be a string")
|
| 85 |
+
started = time.perf_counter()
|
| 86 |
+
return {
|
| 87 |
+
"output": text,
|
| 88 |
+
"original_input_tokens": max(1, round(len(text) / 4.0)),
|
| 89 |
+
"output_tokens": max(1, round(len(text) / 4.0)),
|
| 90 |
+
"tokens_saved": 0,
|
| 91 |
+
"compression_percentage": 0.0,
|
| 92 |
+
"receipt": {
|
| 93 |
+
"protected_spans_checked": len(payload.get("protected_spans") or []),
|
| 94 |
+
"protected_spans_missing": 0,
|
| 95 |
+
"code_blocks_preserved": True,
|
| 96 |
+
"decision": "no_op_classifier_scaffold",
|
| 97 |
+
"compressor_latency_ms": round((time.perf_counter() - started) * 1000.0, 3),
|
| 98 |
+
"classifier_model": CLASSIFIER_MODEL,
|
| 99 |
+
"classifier_status": "tokenizer_loaded_no_trained_keep_drop_head",
|
| 100 |
+
},
|
| 101 |
+
}
|
deploy.sh
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
+
|
| 4 |
+
SPACE_ID="${1:-}"
|
| 5 |
+
if [[ -z "${SPACE_ID}" ]]; then
|
| 6 |
+
echo "usage: $0 <namespace/touchdown-compression-classifier>" >&2
|
| 7 |
+
exit 2
|
| 8 |
+
fi
|
| 9 |
+
|
| 10 |
+
if ! command -v hf >/dev/null 2>&1; then
|
| 11 |
+
echo "hf CLI not found. Install with: curl -LsSf https://hf.co/cli/install.sh | bash -s" >&2
|
| 12 |
+
exit 2
|
| 13 |
+
fi
|
| 14 |
+
|
| 15 |
+
hf auth whoami >/dev/null
|
| 16 |
+
hf repos create "${SPACE_ID}" --type space --space-sdk docker --exist-ok
|
| 17 |
+
hf upload "${SPACE_ID}" "$(dirname "$0")" --type space
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi>=0.110
|
| 2 |
+
uvicorn[standard]>=0.29
|
| 3 |
+
transformers>=4.40
|
| 4 |
+
sentencepiece>=0.2
|