wchen22 commited on
Commit
63799d5
·
verified ·
1 Parent(s): b2c269a

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. Dockerfile +13 -0
  2. README.md +27 -3
  3. app.py +101 -0
  4. deploy.sh +17 -0
  5. requirements.txt +4 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ ENV PYTHONUNBUFFERED=1 \
4
+ HF_HOME=/data/.cache/huggingface \
5
+ TRANSFORMERS_CACHE=/data/.cache/huggingface/transformers
6
+
7
+ WORKDIR /app
8
+ COPY requirements.txt /app/requirements.txt
9
+ RUN pip install --no-cache-dir -r /app/requirements.txt
10
+ COPY app.py /app/app.py
11
+
12
+ EXPOSE 7860
13
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,34 @@
1
  ---
2
  title: Touchdown Compression Classifier
3
- emoji: 🔥
4
  colorFrom: blue
5
  colorTo: green
6
  sdk: docker
7
- pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Touchdown Compression Classifier
3
+ emoji: 🚀
4
  colorFrom: blue
5
  colorTo: green
6
  sdk: docker
7
+ app_port: 7860
8
  ---
9
 
10
+ # Touchdown Compression Classifier
11
+
12
+ Free CPU Hugging Face Space scaffold for the managed prompt compression API.
13
+
14
+ Phase 1 serves deterministic deletion-only compression with receipts. The
15
+ planned classifier backbone is `microsoft/deberta-v3-small`; the API reports
16
+ classifier status honestly until a trained KEEP/DROP head or ONNX export is
17
+ mounted.
18
+
19
+ Endpoints:
20
+
21
+ - `GET /health`
22
+ - `POST /v1/compress`
23
+ - `POST /v1/classify`
24
+
25
+ Deploy:
26
+
27
+ ```bash
28
+ hf auth login
29
+ ./deploy.sh <namespace>/touchdown-compression-classifier
30
+ ```
31
+
32
+ The current repo machine must be logged into Hugging Face before this can be
33
+ hosted. Free CPU Spaces are enough for this scaffold; production traffic should
34
+ move to paid or owned infrastructure after validation.
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ import time
5
+ from typing import Any
6
+
7
+ from fastapi import FastAPI, HTTPException
8
+
9
+ CLASSIFIER_MODEL = "microsoft/deberta-v3-small"
10
+
11
+ app = FastAPI(title="Touchdown Compression Classifier", version="0.1.0")
12
+
13
+
14
+ def _tokens(text: str) -> list[dict[str, Any]]:
15
+ started = time.perf_counter()
16
+ try:
17
+ from transformers import AutoTokenizer
18
+
19
+ tokenizer = AutoTokenizer.from_pretrained(CLASSIFIER_MODEL)
20
+ encoded = tokenizer(
21
+ text,
22
+ add_special_tokens=False,
23
+ return_offsets_mapping=True,
24
+ )
25
+ pieces = tokenizer.convert_ids_to_tokens(encoded["input_ids"])
26
+ offsets = encoded["offset_mapping"]
27
+ token_source = "deberta_tokenizer"
28
+ except Exception:
29
+ matches = list(re.finditer(r"\S+|\s+", text))
30
+ pieces = [match.group(0) for match in matches]
31
+ offsets = [(match.start(), match.end()) for match in matches]
32
+ token_source = "regex_fallback"
33
+
34
+ elapsed = round((time.perf_counter() - started) * 1000.0, 3)
35
+ return [
36
+ {
37
+ "token": piece,
38
+ "label": "KEEP",
39
+ "score": 1.0,
40
+ "start": int(offsets[i][0]),
41
+ "end": int(offsets[i][1]),
42
+ "source": token_source,
43
+ "classifier_latency_ms": elapsed if i == 0 else 0.0,
44
+ }
45
+ for i, piece in enumerate(pieces)
46
+ ]
47
+
48
+
49
+ @app.get("/health")
50
+ def health() -> dict[str, Any]:
51
+ return {
52
+ "status": "ok",
53
+ "classifier_model": CLASSIFIER_MODEL,
54
+ "classifier_status": "tokenizer_loaded_no_trained_keep_drop_head",
55
+ "phase": "rules_api_first",
56
+ }
57
+
58
+
59
+ @app.post("/v1/classify")
60
+ def classify(payload: dict[str, Any]) -> dict[str, Any]:
61
+ text = payload.get("input")
62
+ if not isinstance(text, str):
63
+ raise HTTPException(status_code=400, detail="input must be a string")
64
+ return {
65
+ "model": CLASSIFIER_MODEL,
66
+ "task": "token_classification_keep_drop",
67
+ "status": "tokenizer_only_until_trained_head",
68
+ "labels": _tokens(text),
69
+ "note": (
70
+ "This free Space scaffold loads the DeBERTa tokenizer and returns "
71
+ "KEEP-only labels until a trained KEEP/DROP head or ONNX export is "
72
+ "mounted. Do not treat it as a production compressor."
73
+ ),
74
+ }
75
+
76
+
77
+ @app.post("/v1/compress")
78
+ def compress(payload: dict[str, Any]) -> dict[str, Any]:
79
+ # The Space can be called immediately, but managed compression logic lives
80
+ # in the package service. This endpoint is deliberately conservative until
81
+ # the package is vendored into the Space image.
82
+ text = payload.get("input")
83
+ if not isinstance(text, str):
84
+ raise HTTPException(status_code=400, detail="input must be a string")
85
+ started = time.perf_counter()
86
+ return {
87
+ "output": text,
88
+ "original_input_tokens": max(1, round(len(text) / 4.0)),
89
+ "output_tokens": max(1, round(len(text) / 4.0)),
90
+ "tokens_saved": 0,
91
+ "compression_percentage": 0.0,
92
+ "receipt": {
93
+ "protected_spans_checked": len(payload.get("protected_spans") or []),
94
+ "protected_spans_missing": 0,
95
+ "code_blocks_preserved": True,
96
+ "decision": "no_op_classifier_scaffold",
97
+ "compressor_latency_ms": round((time.perf_counter() - started) * 1000.0, 3),
98
+ "classifier_model": CLASSIFIER_MODEL,
99
+ "classifier_status": "tokenizer_loaded_no_trained_keep_drop_head",
100
+ },
101
+ }
deploy.sh ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ SPACE_ID="${1:-}"
5
+ if [[ -z "${SPACE_ID}" ]]; then
6
+ echo "usage: $0 <namespace/touchdown-compression-classifier>" >&2
7
+ exit 2
8
+ fi
9
+
10
+ if ! command -v hf >/dev/null 2>&1; then
11
+ echo "hf CLI not found. Install with: curl -LsSf https://hf.co/cli/install.sh | bash -s" >&2
12
+ exit 2
13
+ fi
14
+
15
+ hf auth whoami >/dev/null
16
+ hf repos create "${SPACE_ID}" --type space --space-sdk docker --exist-ok
17
+ hf upload "${SPACE_ID}" "$(dirname "$0")" --type space
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi>=0.110
2
+ uvicorn[standard]>=0.29
3
+ transformers>=4.40
4
+ sentencepiece>=0.2