Spaces:
Running
Running
File size: 1,697 Bytes
a985b94 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | import os
import shutil
from sklearn.model_selection import train_test_split
# Current raw output folders
IMAGE_DIR = 'data/processed/images'
LABEL_DIR = 'data/processed/labels'
# New YOLO-compliant folders
YOLO_DIR = 'data/yolo_dataset'
def setup_yolo_folders():
print("Creating YOLO folder structure...")
for split in ['train', 'val']:
os.makedirs(os.path.join(YOLO_DIR, 'images', split), exist_ok=True)
os.makedirs(os.path.join(YOLO_DIR, 'labels', split), exist_ok=True)
# Grab all the images we just generated
images = [f for f in os.listdir(IMAGE_DIR) if f.endswith('.jpg')]
print(f"Found {len(images)} total images. Splitting 80/20...")
# Classic 80/20 split using scikit-learn
train_imgs, val_imgs = train_test_split(images, test_size=0.2, random_state=42)
def copy_files(file_list, split_name):
print(f"Copying {len(file_list)} files to {split_name} set...")
for img_name in file_list:
# Copy Image
shutil.copy(
os.path.join(IMAGE_DIR, img_name),
os.path.join(YOLO_DIR, 'images', split_name, img_name)
)
# Copy matching Label
label_name = img_name.replace('.jpg', '.txt')
if os.path.exists(os.path.join(LABEL_DIR, label_name)):
shutil.copy(
os.path.join(LABEL_DIR, label_name),
os.path.join(YOLO_DIR, 'labels', split_name, label_name)
)
copy_files(train_imgs, 'train')
copy_files(val_imgs, 'val')
print("Done! Dataset is formatted and ready for YOLOv8.")
if __name__ == "__main__":
setup_yolo_folders() |