| | |
| | |
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | import os, re, glob, argparse, sys, time |
| | from pathlib import Path |
| | import numpy as np |
| | from PIL import Image |
| | import tensorflow as tf |
| | from tensorflow.keras import layers, models, backend as K |
| |
|
| | |
| | def parse_args(): |
| | p = argparse.ArgumentParser("Test inference CRNN+CTC dari weights Keras 3 (model_with_ctc.save_weights).") |
| | p.add_argument("--weights", required=True, help="Path ke *.weights.h5 (hasil save_weights).") |
| | p.add_argument("--image", help="Uji 1 gambar (PNG/JPG). Nama file jadi GT jika --gt tidak diisi.") |
| | p.add_argument("--gt", help="Ground truth untuk --image (opsional, default dari nama file).") |
| | p.add_argument("--data-root", help="Root dataset berisi style0..style59/LABEL.png untuk batch test.") |
| | p.add_argument("--samples", type=int, default=64, help="Jumlah sampel di batch test.") |
| | p.add_argument("--height", type=int, default=50) |
| | p.add_argument("--width", type=int, default=250) |
| | p.add_argument("--ext", type=str, default="png") |
| | p.add_argument("--show", type=int, default=12, help="Banyak baris contoh yang ditampilkan.") |
| | return p.parse_args() |
| |
|
| | |
| | CHARSET = list("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ") |
| | BLANK_ID = len(CHARSET) |
| | ID2CHAR = np.array(CHARSET) |
| |
|
| | def collapse_and_strip_blanks(seq_ids, blank_id=BLANK_ID): |
| | prev = -1; out = [] |
| | for t in seq_ids: |
| | if t != prev and t != blank_id: |
| | out.append(t) |
| | prev = t |
| | return out |
| |
|
| | def ids_to_text(ids): |
| | ids = [i for i in ids if 0 <= i < len(CHARSET)] |
| | return "".join(ID2CHAR[ids]) if ids else "" |
| |
|
| | def cer(pred, gt): |
| | m, n = len(pred), len(gt) |
| | if n == 0: return 0.0 if m == 0 else 1.0 |
| | dp = np.zeros((m+1, n+1), dtype=np.int32) |
| | dp[:,0] = np.arange(m+1); dp[0,:] = np.arange(n+1) |
| | for i in range(1, m+1): |
| | for j in range(1, n+1): |
| | dp[i,j] = min(dp[i-1,j]+1, dp[i,j-1]+1, dp[i-1,j-1] + (pred[i-1]!=gt[j-1])) |
| | return dp[m,n] / n |
| |
|
| | |
| | def build_models(h=50, w=250, num_classes=len(CHARSET)+1): |
| | inp = layers.Input(shape=(h, w, 1), name="input") |
| | x = layers.Conv2D(32, (3,3), activation="relu", padding="same")(inp) |
| | x = layers.BatchNormalization()(x) |
| | x = layers.MaxPooling2D((2,2))(x) |
| |
|
| | x = layers.Conv2D(64, (3,3), activation="relu", padding="same")(x) |
| | x = layers.BatchNormalization()(x) |
| | x = layers.MaxPooling2D((2,2))(x) |
| |
|
| | x = layers.Conv2D(128, (3,3), activation="relu", padding="same")(x) |
| | x = layers.BatchNormalization()(x) |
| | x = layers.MaxPooling2D((2,2))(x) |
| |
|
| | shp = K.int_shape(x) |
| | x = layers.Reshape((shp[2], shp[1]*shp[3]))(x) |
| |
|
| | x = layers.Bidirectional(layers.LSTM(128, return_sequences=True, dropout=0.0, recurrent_dropout=0.0))(x) |
| | x = layers.Bidirectional(layers.LSTM(128, return_sequences=True, dropout=0.0, recurrent_dropout=0.0))(x) |
| |
|
| | pred = layers.Dense(num_classes, activation="softmax", name="predictions")(x) |
| |
|
| | |
| | labels = layers.Input(name="labels", shape=(None,), dtype="int32") |
| | input_len = layers.Input(name="input_length", shape=(1,), dtype="int32") |
| | label_len = layers.Input(name="label_length", shape=(1,), dtype="int32") |
| | def ctc_fn(args): |
| | y_pred, labels_t, in_l, lab_l = args |
| | return K.ctc_batch_cost(labels_t, y_pred, in_l, lab_l) |
| | ctc = layers.Lambda(ctc_fn, output_shape=(1,), name="ctc_loss", dtype="float32")([pred, labels, input_len, label_len]) |
| |
|
| | model_with_ctc = models.Model(inputs=[inp, labels, input_len, label_len], outputs=ctc, name="crnn_ctc_train") |
| | base_model = models.Model(inputs=inp, outputs=pred, name="crnn_ctc_base") |
| | return model_with_ctc, base_model |
| |
|
| | |
| | def preprocess_gray(img_pil, h=50, w=250): |
| | im = img_pil.convert("L").resize((w, h), Image.BILINEAR) |
| | arr = np.asarray(im, dtype=np.float32) / 255.0 |
| | arr = (arr - 0.5) / 0.5 |
| | return arr[..., None] |
| |
|
| | def list_files(root, ext="png", max_n=64): |
| | rootp = Path(root) |
| | pat = re.compile(r"^[A-Z0-9]{5}$") |
| | pairs = [] |
| | for sid in range(60): |
| | d = rootp / f"style{sid}" |
| | if not d.exists(): continue |
| | for f in glob.glob(str(d / f"*.{ext}")): |
| | lbl = Path(f).stem.upper() |
| | if pat.match(lbl): |
| | pairs.append((f, lbl)) |
| | if len(pairs) >= max_n: break |
| | if len(pairs) >= max_n: break |
| | return pairs |
| |
|
| | |
| | def predict_batch(base_model, batch_imgs): |
| | """batch_imgs: np.array (B,H,W,1) float32 [-1,1]""" |
| | probs = base_model.predict(batch_imgs, verbose=0) |
| | ids = np.argmax(probs, axis=-1) |
| | texts = [] |
| | for row in ids: |
| | dec = collapse_and_strip_blanks(row, blank_id=BLANK_ID) |
| | texts.append(ids_to_text(dec)) |
| | return texts |
| |
|
| | def main(): |
| | args = parse_args() |
| |
|
| | |
| | os.environ.setdefault("TF_NUM_INTRAOP_THREADS", "1") |
| | os.environ.setdefault("TF_NUM_INTEROP_THREADS", "1") |
| | os.environ.setdefault("OMP_NUM_THREADS", "1") |
| |
|
| | |
| | wpath = Path(args.weights) |
| | if not wpath.exists(): |
| | print("Weights not found:", wpath); sys.exit(1) |
| | st = wpath.stat() |
| | print(f"Found weights: {wpath} | size: {st.st_size/1024:.1f} KB | mtime: {time.ctime(st.st_mtime)}") |
| | print("TF GPUs:", tf.config.list_physical_devices('GPU')) |
| |
|
| | model_with_ctc, base_model = build_models(h=args.height, w=args.width, num_classes=len(CHARSET)+1) |
| | try: |
| | model_with_ctc.load_weights(str(wpath)) |
| | print("OK: weights loaded.") |
| | except Exception as e: |
| | print("Failed to load weights:", e); sys.exit(2) |
| |
|
| | print("Base output shape:", base_model.output_shape) |
| |
|
| | |
| | if args.image: |
| | f = Path(args.image) |
| | if not f.exists(): |
| | print("Image not found:", f); sys.exit(3) |
| | with Image.open(f) as im: |
| | x = preprocess_gray(im, h=args.height, w=args.width) |
| | pred = predict_batch(base_model, np.expand_dims(x, 0))[0] |
| | gt = args.gt if args.gt else f.stem.upper() |
| | print(f"\nSingle image:") |
| | print(f"GT : {gt}") |
| | print(f"PRED: {pred}") |
| | sys.exit(0) |
| |
|
| | |
| | if args.data_root: |
| | pairs = list_files(args.data_root, ext=args.ext, max_n=args.samples) |
| | if not pairs: |
| | print("No valid files in dataset root."); sys.exit(0) |
| | print(f"Testing on {len(pairs)} samples from {args.data_root} ...") |
| | X, GT = [], [] |
| | for f, lbl in pairs: |
| | with Image.open(f) as im: |
| | X.append(preprocess_gray(im, h=args.height, w=args.width)) |
| | GT.append(lbl) |
| | X = np.stack(X, 0).astype(np.float32) |
| |
|
| | PRED = predict_batch(base_model, X) |
| | exact = np.mean([int(p == g) for p, g in zip(PRED, GT)]) |
| | cer_vals = [cer(p, g) for p, g in zip(PRED, GT)] |
| |
|
| | for i in range(min(args.show, len(PRED))): |
| | print(f"{i:02d} GT: {GT[i]} | Pred: {PRED[i]}") |
| |
|
| | print(f"\nExact match: {exact*100:.2f}% | Mean CER: {float(np.mean(cer_vals)):.4f}\n") |
| | print(f"Total images tested: {len(PRED)}\n") |
| | sys.exit(0) |
| |
|
| | print("Nothing to test. Provide --image or --data-root.") |
| | sys.exit(0) |
| |
|
| | if __name__ == "__main__": |
| | main() |