CT_Segmentation / download_samples.py
TDHarshithReddy's picture
Add teeth CT sample, upgrade Gradio to 4.44.1, and fix app config
c4c873f
"""
Download sample CT scans for the Hugging Face Space demo.
"""
import os
import urllib.request
import zipfile
import shutil
import time
# Direct samples from MONAI Model Zoo
MONAI_SAMPLES = [
{
"url": "https://raw.githubusercontent.com/Project-MONAI/model-zoo/dev/models/wholeBody_ct_segmentation/sampledata/imagesTr/s0037.nii.gz",
"filename": "sample_ct_s0037.nii.gz",
"description": "MONAI Model Zoo Sample s0037"
},
{
"url": "https://raw.githubusercontent.com/Project-MONAI/model-zoo/dev/models/wholeBody_ct_segmentation/sampledata/imagesTr/s0038.nii.gz",
"filename": "sample_ct_s0038.nii.gz",
"description": "MONAI Model Zoo Sample s0038"
}
]
# Fallback: Zenodo Small Subset
ZENODO_URL = "https://zenodo.org/records/10047263/files/Totalsegmentator_dataset_small_v201.zip?download=1"
def download_file(url, output_path, description):
print(f"Downloading {description}...")
print(f" Url: {url}")
try:
# User-Agent needed for some servers
opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib.request.install_opener(opener)
urllib.request.urlretrieve(url, output_path)
# Verify file size (sometimes GitHub returns 404 text file)
size = os.path.getsize(output_path)
if size < 1000: # < 1KB likely error text
with open(output_path, 'r') as f:
content = f.read(100)
if "404: Not Found" in content or "Not Found" in content:
print(f" ✗ Downloaded file appears to be a 404 page.")
os.remove(output_path)
return False
print(f" ✓ Success! Saved to {output_path} ({size/1024/1024:.2f} MB)")
return True
except Exception as e:
print(f" ✗ Failed: {e}")
return False
def setup_examples():
examples_dir = os.path.join(os.path.dirname(__file__), "examples")
os.makedirs(examples_dir, exist_ok=True)
success_count = 0
# 1. Try Direct MONAI Samples
print("\n--- Attempting to download direct samples from MONAI Model Zoo ---")
for sample in MONAI_SAMPLES:
dest = os.path.join(examples_dir, sample["filename"])
if not os.path.exists(dest):
if download_file(sample["url"], dest, sample["description"]):
success_count += 1
else:
print(f" ✓ {sample['filename']} already exists")
success_count += 1
# 2. If NO samples found/downloaded, try Zenodo Zip
# We only do this if we really need data, as it's 3GB
if success_count == 0:
print("\n--- Direct downloads failed. Downloading Zenodo subset (WARNING: ~3.2GB) ---")
zip_path = os.path.join(examples_dir, "temp_zenodo.zip")
print(f"Downloading Zenodo zip to {zip_path}...")
# Note: This might timeout on some systems, simpler logic here
if download_file(ZENODO_URL, zip_path, "Zenodo TotalSegmentator Small Subset"):
try:
print("Extracting random samples from zip...")
with zipfile.ZipFile(zip_path, 'r') as zf:
# Find ct.nii.gz files inside the structure
# Structure is usually: Totalsegmentator_dataset_small_v201/subject_id/ct.nii.gz
files = zf.namelist()
ct_files = [f for f in files if f.endswith('ct.nii.gz')]
extracted = 0
for i, ct_file in enumerate(ct_files[:3]): # Get first 3
out_name = f"sample_ct_zenodo_{i+1}.nii.gz"
out_path = os.path.join(examples_dir, out_name)
with zf.open(ct_file) as source, open(out_path, 'wb') as target:
shutil.copyfileobj(source, target)
print(f" ✓ Extracted {out_name}")
extracted += 1
if extracted > 0:
success_count += extracted
except Exception as e:
print(f" ✗ Extraction failed: {e}")
# Cleanup zip
if os.path.exists(zip_path):
print("Cleaning up zip file...")
os.remove(zip_path)
# 3. Check what we have
final_files = [f for f in os.listdir(examples_dir) if f.endswith('.nii.gz')]
print(f"\nTotal example files in {examples_dir}: {len(final_files)}")
print(final_files)
return final_files
if __name__ == "__main__":
setup_examples()