Image Segmentation
Transformers
English
clipseg
segmentation
construction
drywall
quality-assurance
text-conditioned
binary-mask
Instructions to use youngPhilosopher/drywall-qa-clipseg with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use youngPhilosopher/drywall-qa-clipseg with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-segmentation", model="youngPhilosopher/drywall-qa-clipseg")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("youngPhilosopher/drywall-qa-clipseg", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| """Standalone single-image inference for CLIPSeg.""" | |
| import argparse | |
| from pathlib import Path | |
| import numpy as np | |
| import torch | |
| import yaml | |
| from PIL import Image | |
| from src.model.clipseg_wrapper import load_model_and_processor | |
| from src.train import get_device | |
| PROJECT_ROOT = Path(__file__).resolve().parents[1] | |
| def predict(image_path: str, prompt: str, config_path: str | None = None, output_path: str | None = None): | |
| config_path = config_path or str(PROJECT_ROOT / "configs" / "train_config.yaml") | |
| with open(config_path) as f: | |
| config = yaml.safe_load(f) | |
| device = get_device() | |
| model, processor = load_model_and_processor(config["model"]["name"], config["model"]["freeze_backbone"]) | |
| ckpt = PROJECT_ROOT / "outputs" / "checkpoints" / "best_model.pt" | |
| model.load_state_dict(torch.load(ckpt, map_location="cpu", weights_only=True)) | |
| model = model.to(device).eval() | |
| image = Image.open(image_path).convert("RGB") | |
| orig_w, orig_h = image.size | |
| inputs = processor(text=[prompt], images=[image], return_tensors="pt", padding=True) | |
| inputs = {k: v.to(device) for k, v in inputs.items()} | |
| with torch.no_grad(): | |
| logits = model(**inputs).logits | |
| pred = (torch.sigmoid(logits[0]) > config["evaluation"]["threshold"]).cpu().numpy().astype(np.uint8) | |
| mask = Image.fromarray(pred * 255, mode="L").resize((orig_w, orig_h), Image.NEAREST) | |
| if output_path is None: | |
| stem = Path(image_path).stem | |
| slug = prompt.replace(" ", "_") | |
| output_path = str(PROJECT_ROOT / "outputs" / "masks" / f"{stem}__{slug}.png") | |
| Path(output_path).parent.mkdir(parents=True, exist_ok=True) | |
| mask.save(output_path) | |
| print(f"Saved mask to {output_path}") | |
| return mask | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("image", help="Path to input image") | |
| parser.add_argument("prompt", help="Text prompt, e.g. 'segment crack'") | |
| parser.add_argument("--output", help="Output mask path") | |
| args = parser.parse_args() | |
| predict(args.image, args.prompt, output_path=args.output) | |