""" Download sample CT scans for the Hugging Face Space demo. """ import os import urllib.request import zipfile import shutil import time # Direct samples from MONAI Model Zoo MONAI_SAMPLES = [ { "url": "https://raw.githubusercontent.com/Project-MONAI/model-zoo/dev/models/wholeBody_ct_segmentation/sampledata/imagesTr/s0037.nii.gz", "filename": "sample_ct_s0037.nii.gz", "description": "MONAI Model Zoo Sample s0037" }, { "url": "https://raw.githubusercontent.com/Project-MONAI/model-zoo/dev/models/wholeBody_ct_segmentation/sampledata/imagesTr/s0038.nii.gz", "filename": "sample_ct_s0038.nii.gz", "description": "MONAI Model Zoo Sample s0038" } ] # Fallback: Zenodo Small Subset ZENODO_URL = "https://zenodo.org/records/10047263/files/Totalsegmentator_dataset_small_v201.zip?download=1" def download_file(url, output_path, description): print(f"Downloading {description}...") print(f" Url: {url}") try: # User-Agent needed for some servers opener = urllib.request.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] urllib.request.install_opener(opener) urllib.request.urlretrieve(url, output_path) # Verify file size (sometimes GitHub returns 404 text file) size = os.path.getsize(output_path) if size < 1000: # < 1KB likely error text with open(output_path, 'r') as f: content = f.read(100) if "404: Not Found" in content or "Not Found" in content: print(f" ✗ Downloaded file appears to be a 404 page.") os.remove(output_path) return False print(f" ✓ Success! Saved to {output_path} ({size/1024/1024:.2f} MB)") return True except Exception as e: print(f" ✗ Failed: {e}") return False def setup_examples(): examples_dir = os.path.join(os.path.dirname(__file__), "examples") os.makedirs(examples_dir, exist_ok=True) success_count = 0 # 1. Try Direct MONAI Samples print("\n--- Attempting to download direct samples from MONAI Model Zoo ---") for sample in MONAI_SAMPLES: dest = os.path.join(examples_dir, sample["filename"]) if not os.path.exists(dest): if download_file(sample["url"], dest, sample["description"]): success_count += 1 else: print(f" ✓ {sample['filename']} already exists") success_count += 1 # 2. If NO samples found/downloaded, try Zenodo Zip # We only do this if we really need data, as it's 3GB if success_count == 0: print("\n--- Direct downloads failed. Downloading Zenodo subset (WARNING: ~3.2GB) ---") zip_path = os.path.join(examples_dir, "temp_zenodo.zip") print(f"Downloading Zenodo zip to {zip_path}...") # Note: This might timeout on some systems, simpler logic here if download_file(ZENODO_URL, zip_path, "Zenodo TotalSegmentator Small Subset"): try: print("Extracting random samples from zip...") with zipfile.ZipFile(zip_path, 'r') as zf: # Find ct.nii.gz files inside the structure # Structure is usually: Totalsegmentator_dataset_small_v201/subject_id/ct.nii.gz files = zf.namelist() ct_files = [f for f in files if f.endswith('ct.nii.gz')] extracted = 0 for i, ct_file in enumerate(ct_files[:3]): # Get first 3 out_name = f"sample_ct_zenodo_{i+1}.nii.gz" out_path = os.path.join(examples_dir, out_name) with zf.open(ct_file) as source, open(out_path, 'wb') as target: shutil.copyfileobj(source, target) print(f" ✓ Extracted {out_name}") extracted += 1 if extracted > 0: success_count += extracted except Exception as e: print(f" ✗ Extraction failed: {e}") # Cleanup zip if os.path.exists(zip_path): print("Cleaning up zip file...") os.remove(zip_path) # 3. Check what we have final_files = [f for f in os.listdir(examples_dir) if f.endswith('.nii.gz')] print(f"\nTotal example files in {examples_dir}: {len(final_files)}") print(final_files) return final_files if __name__ == "__main__": setup_examples()