File size: 1,697 Bytes
a985b94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import os
import shutil
from sklearn.model_selection import train_test_split

# Current raw output folders
IMAGE_DIR = 'data/processed/images'
LABEL_DIR = 'data/processed/labels'

# New YOLO-compliant folders
YOLO_DIR = 'data/yolo_dataset'

def setup_yolo_folders():
    print("Creating YOLO folder structure...")
    for split in ['train', 'val']:
        os.makedirs(os.path.join(YOLO_DIR, 'images', split), exist_ok=True)
        os.makedirs(os.path.join(YOLO_DIR, 'labels', split), exist_ok=True)
        
    # Grab all the images we just generated
    images = [f for f in os.listdir(IMAGE_DIR) if f.endswith('.jpg')]
    
    print(f"Found {len(images)} total images. Splitting 80/20...")
    
    # Classic 80/20 split using scikit-learn
    train_imgs, val_imgs = train_test_split(images, test_size=0.2, random_state=42)
    
    def copy_files(file_list, split_name):
        print(f"Copying {len(file_list)} files to {split_name} set...")
        for img_name in file_list:
            # Copy Image
            shutil.copy(
                os.path.join(IMAGE_DIR, img_name), 
                os.path.join(YOLO_DIR, 'images', split_name, img_name)
            )
            # Copy matching Label
            label_name = img_name.replace('.jpg', '.txt')
            if os.path.exists(os.path.join(LABEL_DIR, label_name)):
                shutil.copy(
                    os.path.join(LABEL_DIR, label_name), 
                    os.path.join(YOLO_DIR, 'labels', split_name, label_name)
                )

    copy_files(train_imgs, 'train')
    copy_files(val_imgs, 'val')
    print("Done! Dataset is formatted and ready for YOLOv8.")

if __name__ == "__main__":
    setup_yolo_folders()