File size: 10,463 Bytes
a985b94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
"""
Robotic Control Middleware β€” Full Production Scan
Phase 1: Loads Mixed-type Wafer Defect Dataset, runs YOLOv8 on defective wafers.
Phase 2: Loads ALL passed wafers from WM-811K dataset (direct insert, no YOLO needed).
"""

import os
import sys
import pickle
import sqlite3
import random
import cv2
import time
import numpy as np
from datetime import datetime, timedelta
from ultralytics import YOLO

# Fix for old Pandas architecture in WM-811K pickle
import pandas.core.indexes
sys.modules['pandas.indexes'] = pandas.core.indexes
import pandas as pd

# --- CONFIGURATION ---
NPZ_PATH = os.path.expanduser(
    '~/.cache/kagglehub/datasets/co1d7era/mixedtype-wafer-defect-datasets/versions/4/Wafer_Map_Datasets.npz'
)
WM811K_PATH = os.path.expanduser(
    '~/.cache/kagglehub/datasets/qingyi/wm811k-wafer-map/versions/1/LSWMD.pkl'
)
MODEL_PATH = 'middleware/best.pt'
DB_PATH = os.path.join('middleware', 'wafer_control.db')

# Defect names matching the 8-dim one-hot encoding order in the dataset
DEFECT_NAMES = ['Center', 'Donut', 'Edge-Loc', 'Edge-Ring', 'Loc', 'Near-full', 'Random', 'Scratch']


def setup_database():
    """Creates a fresh wafer_logs table with ground_truth column."""
    os.makedirs('middleware', exist_ok=True)
    conn = sqlite3.connect(DB_PATH)
    cursor = conn.cursor()

    cursor.execute('DROP TABLE IF EXISTS wafer_logs')
    cursor.execute('''
        CREATE TABLE wafer_logs (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            wafer_id TEXT,
            batch_id TEXT,
            scan_time TEXT,
            status TEXT,
            ground_truth TEXT,
            defect_type TEXT,
            action TEXT,
            confidence REAL,
            roi_coordinates TEXT,
            defect_area_px INTEGER,
            material_wasted_pct REAL
        )
    ''')
    conn.commit()
    return conn


def decode_label(one_hot):
    """Convert 8-dim one-hot label to human-readable defect string."""
    active = np.where(one_hot == 1)[0]
    if len(active) == 0:
        return 'Normal'
    return '+'.join([DEFECT_NAMES[i] for i in active])


def wafer_to_image(wafer_map):
    """Convert a 52x52 wafer map array to a 3-channel BGR image for YOLOv8."""
    img = np.zeros(wafer_map.shape, dtype=np.uint8)
    img[wafer_map == 1] = 127   # Normal die β†’ gray
    img[wafer_map == 2] = 255   # Broken die β†’ white
    img[wafer_map == 3] = 255   # Treat 3 as defect too (rare edge artifact)
    # YOLOv8 expects 3-channel (BGR) images
    img_bgr = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    return img_bgr


def compute_defect_area(coords):
    """Calculate bounding box area in pixels from [x1, y1, x2, y2]."""
    if not coords or len(coords) != 4:
        return 0
    x1, y1, x2, y2 = coords
    return max(0, (x2 - x1) * (y2 - y1))


def run_production_scan(conn, model, wafer_maps, labels, batch_id, start_time):
    """Process all wafers: YOLO inference on defective, direct insert for normals."""
    cursor = conn.cursor()
    total = len(wafer_maps)

    for i in range(total):
        wafer_id = f"wafer_{i}"
        ground_truth = decode_label(labels[i])

        # Distribute realistic timestamps with Gaussian noise to create natural defect spikes
        base_day = (i / total) * 30
        noisy_day = base_day + random.gauss(0, 4) # High variance for defects
        days_offset = int(max(0, min(29, noisy_day)))
        scan_time = start_time + timedelta(
            days=days_offset,
            seconds=random.randint(0, 68)
        )
        scan_time_str = scan_time.strftime("%Y-%m-%d %H:%M:%S")

        if ground_truth == 'Normal':
            # PASS wafer β€” no YOLO needed
            cursor.execute('''
                INSERT INTO wafer_logs 
                (wafer_id, batch_id, scan_time, status, ground_truth, defect_type, action, confidence, roi_coordinates, defect_area_px, material_wasted_pct)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            ''', (wafer_id, batch_id, scan_time_str, "PASS", ground_truth, "None", "ROUTE_TO_ASSEMBLY", 1.0, "[]", 0, 0.0))
        else:
            # Defective wafer β€” convert to image and run YOLO
            img = wafer_to_image(wafer_maps[i])
            wafer_area_px = img.shape[0] * img.shape[1]  # 52*52 = 2704

            results = model.predict(source=img, conf=0.25, verbose=False)
            boxes = results[0].boxes

            if len(boxes) > 0:
                box = boxes[0]
                class_id = int(box.cls[0].item())
                defect_type = model.names[class_id]
                confidence = round(box.conf[0].item(), 2)

                x1, y1, x2, y2 = [int(x) for x in box.xyxy[0].tolist()]
                coords = [x1, y1, x2, y2]

                status = "FAIL"
                action = "ROUTE_TO_SCRAP" if defect_type in ["Center", "Near-full"] else "MOVE_TO_MICRO_STAGE"
            else:
                # YOLO didn't detect anything (could be mixed pattern it can't see)
                status = "FAIL"
                defect_type = "Undetected"
                action = "MOVE_TO_MICRO_STAGE"
                confidence = 0.0
                coords = []

            defect_area = compute_defect_area(coords)
            material_wasted_pct = round((defect_area / wafer_area_px) * 100, 2) if defect_area > 0 else 0.0

            cursor.execute('''
                INSERT INTO wafer_logs 
                (wafer_id, batch_id, scan_time, status, ground_truth, defect_type, action, confidence, roi_coordinates, defect_area_px, material_wasted_pct)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            ''', (wafer_id, batch_id, scan_time_str, status, ground_truth, defect_type, action, confidence, str(coords), defect_area, material_wasted_pct))

        # Commit in batches
        if (i + 1) % 500 == 0:
            conn.commit()
            print(f"  Processed {i + 1}/{total} wafers...")

    conn.commit()


def insert_wm811k_passed(conn, batch_id, start_time):
    """Load ALL passed wafers from WM-811K and insert directly into DB."""
    print("Loading WM-811K dataset...")
    with open(WM811K_PATH, 'rb') as f:
        wm_df = pickle.load(f, encoding='latin1')

    wm_df['failure_class'] = wm_df['failureType'].apply(lambda x: x[0][0] if len(x) > 0 else 'None')
    passed = wm_df[(wm_df['failure_class'] == 'None') | (wm_df['failure_class'] == 'none')]
    passed = passed[passed['waferMap'].apply(lambda x: isinstance(x, np.ndarray) and x.size > 0)]

    total = len(passed)
    print(f"  Found {total:,} passed wafers in WM-811K")
    print(f"  Inserting all into database...")

    cursor = conn.cursor()
    rows = []
    for i, (index, row) in enumerate(passed.iterrows()):
        # Stable production schedule with low variance for normal wafers
        base_day = (i / total) * 30
        noisy_day = base_day + random.gauss(0, 0.5) 
        days_offset = int(max(0, min(29, noisy_day)))
        scan_time = start_time + timedelta(
            days=days_offset,
            seconds=random.randint(0, 3)
        )
        rows.append((
            f"wm811k_{index}", batch_id, scan_time.strftime("%Y-%m-%d %H:%M:%S"),
            "PASS", "Normal", "None", "ROUTE_TO_ASSEMBLY", 1.0, "[]", 0, 0.0
        ))

        if len(rows) >= 10000:
            cursor.executemany('''
                INSERT INTO wafer_logs
                (wafer_id, batch_id, scan_time, status, ground_truth, defect_type, action, confidence, roi_coordinates, defect_area_px, material_wasted_pct)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            ''', rows)
            conn.commit()
            rows = []
            print(f"  Inserted {i + 1:,}/{total:,} passed wafers...")

    if rows:
        cursor.executemany('''
            INSERT INTO wafer_logs
            (wafer_id, batch_id, scan_time, status, ground_truth, defect_type, action, confidence, roi_coordinates, defect_area_px, material_wasted_pct)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        ''', rows)
        conn.commit()

    print(f"  Done! Inserted {total:,} passed wafers.")
    return total


if __name__ == '__main__':
    print("=" * 60)
    print("  ROBOTIC CONTROL MIDDLEWARE β€” HYBRID PRODUCTION SCAN")
    print("=" * 60)

    # 1. Load Mixed-type dataset
    print("\nPhase 1: Loading Mixed-type Wafer Defect Dataset...")
    data = np.load(NPZ_PATH)
    X = data['arr_0']
    Y = data['arr_1']

    normals_mixed = sum(1 for y in Y if np.sum(y) == 0)
    defective = len(X) - normals_mixed

    print(f"  Mixed-type: {len(X):,} total ({defective:,} defective + {normals_mixed:,} normal)")
    print(f"  WM-811K:    ~786K passed wafers")

    # 2. Setup
    db_connection = setup_database()
    wafer_model = YOLO(MODEL_PATH)

    batch_id = f"BATCH_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    start_time = datetime.now() - timedelta(days=30)

    print(f"\n  Batch ID: {batch_id}")
    print(f"  Scan window: {start_time.strftime('%Y-%m-%d')} β†’ {datetime.now().strftime('%Y-%m-%d')}")

    try:
        # PHASE 1: YOLOv8 on Mixed-type defective wafers
        print(f"\n{'=' * 60}")
        print(f"  PHASE 1: YOLOv8 Inference ({len(X):,} Mixed-type wafers)")
        print(f"{'=' * 60}\n")

        t0 = time.time()
        run_production_scan(db_connection, wafer_model, X, Y, batch_id, start_time)
        t1 = time.time()
        print(f"\n  Phase 1 complete: {t1 - t0:.1f}s")

        # PHASE 2: All passed wafers from WM-811K
        print(f"\n{'=' * 60}")
        print(f"  PHASE 2: WM-811K Passed Wafers (all ~786K)")
        print(f"{'=' * 60}\n")

        passed_count = insert_wm811k_passed(db_connection, batch_id, start_time)
        t2 = time.time()

        total = len(X) + passed_count
        print(f"\n{'=' * 60}")
        print(f"  SCAN COMPLETE")
        print(f"  Defective (Mixed-type): {defective:,}")
        print(f"  Normal (Mixed-type):    {normals_mixed:,}")
        print(f"  Passed (WM-811K):       {passed_count:,}")
        print(f"  Total records:          {total:,}")
        print(f"  Pass rate:              {(normals_mixed + passed_count) / total * 100:.1f}%")
        print(f"  Time elapsed:           {t2 - t0:.1f}s")
        print(f"  Database:               {DB_PATH}")
        print(f"{'=' * 60}")

    except Exception as e:
        print(f"\nError during scan: {e}")
        import traceback
        traceback.print_exc()

    finally:
        db_connection.close()
        print("Database connection closed.")