Spaces:
Running
Running
| import os | |
| import shutil | |
| from sklearn.model_selection import train_test_split | |
| # Current raw output folders | |
| IMAGE_DIR = 'data/processed/images' | |
| LABEL_DIR = 'data/processed/labels' | |
| # New YOLO-compliant folders | |
| YOLO_DIR = 'data/yolo_dataset' | |
| def setup_yolo_folders(): | |
| print("Creating YOLO folder structure...") | |
| for split in ['train', 'val']: | |
| os.makedirs(os.path.join(YOLO_DIR, 'images', split), exist_ok=True) | |
| os.makedirs(os.path.join(YOLO_DIR, 'labels', split), exist_ok=True) | |
| # Grab all the images we just generated | |
| images = [f for f in os.listdir(IMAGE_DIR) if f.endswith('.jpg')] | |
| print(f"Found {len(images)} total images. Splitting 80/20...") | |
| # Classic 80/20 split using scikit-learn | |
| train_imgs, val_imgs = train_test_split(images, test_size=0.2, random_state=42) | |
| def copy_files(file_list, split_name): | |
| print(f"Copying {len(file_list)} files to {split_name} set...") | |
| for img_name in file_list: | |
| # Copy Image | |
| shutil.copy( | |
| os.path.join(IMAGE_DIR, img_name), | |
| os.path.join(YOLO_DIR, 'images', split_name, img_name) | |
| ) | |
| # Copy matching Label | |
| label_name = img_name.replace('.jpg', '.txt') | |
| if os.path.exists(os.path.join(LABEL_DIR, label_name)): | |
| shutil.copy( | |
| os.path.join(LABEL_DIR, label_name), | |
| os.path.join(YOLO_DIR, 'labels', split_name, label_name) | |
| ) | |
| copy_files(train_imgs, 'train') | |
| copy_files(val_imgs, 'val') | |
| print("Done! Dataset is formatted and ready for YOLOv8.") | |
| if __name__ == "__main__": | |
| setup_yolo_folders() |