test_model / script.py
eduardosanchez's picture
Upload script.py
1406ddd verified
"""Submission script for the WMT25 MULR linguistic-reasoning competition.
This is what participants put in their submitted Hugging Face MODEL repo. The HF
Competitions backend:
1. downloads the hidden test dataset to /tmp/data,
2. runs `python script.py` in your repo (no internet, no tokens),
3. uploads the `submission.csv` you write here for scoring.
You never see the test data. You only get /tmp/data/test.csv with columns:
id, prompt
and you must write `submission.csv` with columns:
id, prediction
Every row must have a non-empty prediction (empty/NaN is rejected).
Put your model weights in this same repo (downloads are blocked at run time) and load
them below. The placeholder model just returns "UNKNOWN" for every item.
"""
import os
import re
import pandas as pd
TEST_PATH = "/tmp/data/test.csv" # hidden test, provided by the backend
SUBMISSION_PATH = "submission.csv" # must be written to the working directory
# Pandas treats tokens like "N/A", "NA", "null", "None" as NaN; this one is safe.
EMPTY_PLACEHOLDER = "UNKNOWN"
def extract_answer(model_output: str) -> str:
"""Pull the content of the last [...] (the prompt asks for the answer in brackets)."""
if not isinstance(model_output, str):
return EMPTY_PLACEHOLDER
matches = re.findall(r"\[([^\[\]]*)\]", model_output, flags=re.DOTALL)
if matches:
ans = matches[-1].strip()
else:
lines = [ln.strip() for ln in model_output.splitlines() if ln.strip()]
ans = lines[-1] if lines else ""
return ans or EMPTY_PLACEHOLDER
# --- Load your model ONCE here -------------------------------------------------
# from transformers import AutoModelForCausalLM, AutoTokenizer
# tok = AutoTokenizer.from_pretrained(".") # weights bundled in this repo
# model = AutoModelForCausalLM.from_pretrained(".", device_map="auto")
def solve(prompt: str) -> str:
"""Return the answer string for one puzzle item. Replace with real inference."""
# inputs = tok(prompt, return_tensors="pt").to(model.device)
# out = model.generate(**inputs, max_new_tokens=64, do_sample=False)
# raw = tok.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
# return extract_answer(raw)
return EMPTY_PLACEHOLDER
def main() -> None:
test = pd.read_csv(TEST_PATH, dtype=str, keep_default_na=False)
preds = []
for _, row in test.iterrows():
pred = solve(row["prompt"])
pred = (str(pred).strip() or EMPTY_PLACEHOLDER)
preds.append({"id": row["id"], "prediction": pred})
submission = pd.DataFrame(preds, columns=["id", "prediction"])
assert submission["id"].is_unique, "duplicate ids in submission"
assert set(submission["id"]) == set(test["id"]), "submission ids must match the test set"
assert (submission["prediction"].str.len() > 0).all(), "predictions must be non-empty"
submission.to_csv(SUBMISSION_PATH, index=False)
print(f"Wrote {SUBMISSION_PATH} with {len(submission)} rows")
if __name__ == "__main__":
main()