k-l-lambda commited on
Commit
23c0a70
·
1 Parent(s): 216c17a

update: sync starry bugfixes

Browse files
README.md CHANGED
@@ -21,6 +21,6 @@ Online sheet music recognition and editing platform.
21
  - Score editing and annotation
22
  - Music set management
23
 
24
- ## Note
25
 
26
- This is a lightweight deployment. ML prediction services (layout, gauge, mask, semantic, OCR) are not included in this Space.
 
21
  - Score editing and annotation
22
  - Music set management
23
 
24
+ ## Included Services
25
 
26
+ This Space runs the full STARRY OMR stack with bundled PostgreSQL, frontend, OMR service, and CPU prediction services for layout, mask, semantic, text localization, OCR, and brackets.
backend/python-services/requirements.txt CHANGED
@@ -3,6 +3,8 @@ numpy>=1.21.0
3
  opencv-python>=4.5.0
4
  Pillow>=8.0.0
5
  PyYAML>=5.4.0
 
 
6
 
7
  # Communication
8
  pyzmq>=22.0.0
 
3
  opencv-python>=4.5.0
4
  Pillow>=8.0.0
5
  PyYAML>=5.4.0
6
+ shapely>=1.8.0
7
+ pyclipper>=1.3.0
8
 
9
  # Communication
10
  pyzmq>=22.0.0
backend/python-services/services/loc_service.py CHANGED
@@ -14,6 +14,8 @@ import torch.nn as nn
14
  import torch.nn.functional as F
15
  import cv2
16
  import logging
 
 
17
  from collections import OrderedDict
18
 
19
  from predictors.torchscript_predictor import resolve_model_path
@@ -261,12 +263,15 @@ class LocService:
261
  """
262
 
263
  def __init__(self, model_path, device='cuda', image_short_side=736,
264
- box_thresh=0.01, class_num=13, **kwargs):
265
  self.device = device
266
  self.model = _load_loc_model(model_path, device)
267
  self.image_short_side = image_short_side
268
  self.box_thresh = box_thresh
 
269
  self.class_num = class_num
 
 
270
 
271
  def resize_image(self, img):
272
  """Resize image keeping aspect ratio, with short side = image_short_side."""
@@ -289,50 +294,95 @@ class LocService:
289
  img = torch.from_numpy(img).permute(2, 0, 1).float().unsqueeze(0)
290
  return img.to(self.device), original_shape
291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  def represent_boxes(self, pred, out_class, original_shape, resized_shape):
293
  """Post-process model output to extract bounding boxes."""
294
- pred_np = pred.cpu().numpy()[0, 0]
295
  class_np = out_class.cpu().numpy()[0, 0]
296
 
297
- binary = (pred_np > self.box_thresh).astype(np.uint8) * 255
298
- contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
299
 
300
  boxes = []
301
- h_scale = original_shape[0] / resized_shape[0]
302
- w_scale = original_shape[1] / resized_shape[1]
303
 
304
- for contour in contours:
305
- if len(contour) < 4:
 
306
  continue
307
 
308
- rect = cv2.minAreaRect(contour)
309
- box_points = cv2.boxPoints(rect)
310
- box_points = np.int0(box_points)
 
311
 
312
- mask = np.zeros(pred_np.shape, dtype=np.uint8)
313
- cv2.drawContours(mask, [contour], -1, 1, -1)
314
- class_region = class_np * mask
315
- if mask.sum() > 0:
316
- box_class = int(np.argmax(np.bincount(class_region[mask > 0].astype(int))))
317
- else:
318
- box_class = 0
319
 
320
- score_region = pred_np * mask
321
- score = score_region.sum() / max(mask.sum(), 1)
 
322
 
323
- scaled_points = box_points.astype(float)
324
- scaled_points[:, 0] *= w_scale
325
- scaled_points[:, 1] *= h_scale
326
 
327
  boxes.append({
328
- 'x0': float(scaled_points[0, 0]),
329
- 'y0': float(scaled_points[0, 1]),
330
- 'x1': float(scaled_points[1, 0]),
331
- 'y1': float(scaled_points[1, 1]),
332
- 'x2': float(scaled_points[2, 0]),
333
- 'y2': float(scaled_points[2, 1]),
334
- 'x3': float(scaled_points[3, 0]),
335
- 'y3': float(scaled_points[3, 1]),
336
  'score': float(score),
337
  'class': box_class,
338
  })
 
14
  import torch.nn.functional as F
15
  import cv2
16
  import logging
17
+ import pyclipper
18
+ from shapely.geometry import Polygon
19
  from collections import OrderedDict
20
 
21
  from predictors.torchscript_predictor import resolve_model_path
 
263
  """
264
 
265
  def __init__(self, model_path, device='cuda', image_short_side=736,
266
+ box_thresh=0.01, thresh=0.3, class_num=13, **kwargs):
267
  self.device = device
268
  self.model = _load_loc_model(model_path, device)
269
  self.image_short_side = image_short_side
270
  self.box_thresh = box_thresh
271
+ self.thresh = thresh
272
  self.class_num = class_num
273
+ self.min_size = 3
274
+ self.max_candidates = 1000
275
 
276
  def resize_image(self, img):
277
  """Resize image keeping aspect ratio, with short side = image_short_side."""
 
294
  img = torch.from_numpy(img).permute(2, 0, 1).float().unsqueeze(0)
295
  return img.to(self.device), original_shape
296
 
297
+ def get_mini_boxes(self, contour):
298
+ bounding_box = cv2.minAreaRect(contour)
299
+ points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
300
+
301
+ if points[1][1] > points[0][1]:
302
+ index_1, index_4 = 0, 1
303
+ else:
304
+ index_1, index_4 = 1, 0
305
+
306
+ if points[3][1] > points[2][1]:
307
+ index_2, index_3 = 2, 3
308
+ else:
309
+ index_2, index_3 = 3, 2
310
+
311
+ return [points[index_1], points[index_2], points[index_3], points[index_4]], min(bounding_box[1])
312
+
313
+ def unclip(self, box, unclip_ratio=1.5):
314
+ poly = Polygon(box)
315
+ if poly.length == 0:
316
+ return np.array([])
317
+
318
+ distance = poly.area * unclip_ratio / poly.length
319
+ offset = pyclipper.PyclipperOffset()
320
+ offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
321
+ expanded = offset.Execute(distance)
322
+ return np.array(expanded) if expanded else np.array([])
323
+
324
+ def box_score_fast_with_class(self, bitmap, classes, box):
325
+ h, w = bitmap.shape[:2]
326
+ box = box.copy()
327
+ xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int32), 0, w - 1)
328
+ xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int32), 0, w - 1)
329
+ ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int32), 0, h - 1)
330
+ ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int32), 0, h - 1)
331
+
332
+ mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
333
+ box[:, 0] -= xmin
334
+ box[:, 1] -= ymin
335
+ cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
336
+
337
+ class_crop = np.squeeze(classes)[ymin:ymax + 1, xmin:xmax + 1]
338
+ class_values = class_crop[mask > 0].astype(np.int32)
339
+ box_class = int(np.argmax(np.bincount(class_values))) if class_values.size else 0
340
+
341
+ return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0], box_class
342
+
343
  def represent_boxes(self, pred, out_class, original_shape, resized_shape):
344
  """Post-process model output to extract bounding boxes."""
345
+ pred_np = pred.cpu().detach().numpy()[0, 0]
346
  class_np = out_class.cpu().numpy()[0, 0]
347
 
348
+ binary = (pred_np > self.thresh).astype(np.uint8) * 255
349
+ contours, _ = cv2.findContours(binary, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
350
 
351
  boxes = []
352
+ dest_height, dest_width = original_shape
353
+ bitmap_height, bitmap_width = pred_np.shape
354
 
355
+ for contour in contours[:self.max_candidates]:
356
+ points, short_side = self.get_mini_boxes(contour)
357
+ if short_side < self.min_size:
358
  continue
359
 
360
+ points = np.array(points)
361
+ score, box_class = self.box_score_fast_with_class(pred_np, class_np, points.reshape(-1, 2))
362
+ if self.box_thresh > score:
363
+ continue
364
 
365
+ expanded = self.unclip(points)
366
+ if len(expanded) == 0:
367
+ continue
 
 
 
 
368
 
369
+ box, short_side = self.get_mini_boxes(expanded.reshape(-1, 1, 2))
370
+ if short_side < self.min_size + 2:
371
+ continue
372
 
373
+ box = np.array(box)
374
+ box[:, 0] = np.clip(np.round(box[:, 0] / bitmap_width * dest_width), 0, dest_width)
375
+ box[:, 1] = np.clip(np.round(box[:, 1] / bitmap_height * dest_height), 0, dest_height)
376
 
377
  boxes.append({
378
+ 'x0': float(box[0, 0]),
379
+ 'y0': float(box[0, 1]),
380
+ 'x1': float(box[1, 0]),
381
+ 'y1': float(box[1, 1]),
382
+ 'x2': float(box[2, 0]),
383
+ 'y2': float(box[2, 1]),
384
+ 'x3': float(box[3, 0]),
385
+ 'y3': float(box[3, 1]),
386
  'score': float(score),
387
  'class': box_class,
388
  })
backend/python-services/services/ocr_service.py CHANGED
@@ -163,8 +163,7 @@ class OcrService:
163
 
164
  for box in location:
165
  text_type = TYPE_NAMES[box.get('class', 0)]
166
- crop_box = self.expand_box(box, image.shape, left=20, right=5, top=5, bottom=15) if text_type == 'TempoNumeral' else box
167
- dst_pic = self.perspective_transform(image, crop_box)
168
  if dst_pic is None:
169
  continue
170
 
 
163
 
164
  for box in location:
165
  text_type = TYPE_NAMES[box.get('class', 0)]
166
+ dst_pic = self.perspective_transform(image, box)
 
167
  if dst_pic is None:
168
  continue
169