caliby / file_utils.py
Justine Yuan
Caliby HuggingFace example
3beba17
"""File path helpers, ZIP operations, and CSV export."""
import re
import tempfile
import zipfile
from pathlib import Path
import pandas as pd
def _get_file_path(f):
if isinstance(f, str):
return Path(f)
if hasattr(f, "path"):
return Path(f.path)
if isinstance(f, dict) and "path" in f:
return Path(f["path"])
return Path(str(f))
def _sanitize_download_stem(stem: str) -> str:
sanitized = re.sub(r"[^A-Za-z0-9._-]+", "_", stem).strip("._-")
return sanitized or "caliby"
def _make_named_download_path(stem: str, suffix: str) -> str:
download_dir = Path(tempfile.mkdtemp(prefix="caliby_download_"))
return str(download_dir / f"{_sanitize_download_stem(stem)}{suffix}")
def _get_results_stem(df: pd.DataFrame) -> str:
if "Sample" not in df.columns:
return "caliby"
sample_name = str(df.iloc[0]["Sample"])
return _sanitize_download_stem(re.sub(r"_sample\d+$", "", sample_name))
def _copy_uploaded_files(pdb_files: list, tmpdir: Path) -> list[str]:
pdb_paths = []
for f in pdb_files:
src = _get_file_path(f)
path = tmpdir / src.name
path.write_bytes(src.read_bytes())
pdb_paths.append(str(path))
return pdb_paths
def _write_zip_from_paths(paths: list[str], download_stem: str, suffix: str) -> str | None:
if not paths:
return None
zip_path = _make_named_download_path(download_stem, suffix)
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
for path in paths:
zf.write(path, Path(path).name)
return zip_path
def _write_zip_from_dir(directory: Path, download_stem: str, suffix: str) -> str:
zip_path = _make_named_download_path(download_stem, suffix)
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
for path in directory.rglob("*"):
if path.is_file():
zf.write(path, path.relative_to(directory))
return zip_path
def _df_to_csv(df: pd.DataFrame | None) -> str | None:
if df is None or df.empty:
return None
path = _make_named_download_path(_get_results_stem(df), "_results.csv")
df.to_csv(path, index=False)
return path