Spaces:

k-l-lambda
/

starry

Running

App Files Files Community

k-l-lambda commited on 24 days ago

Commit

23c0a70

1 Parent(s): 216c17a

update: sync starry bugfixes

Browse files

Files changed (4) hide show

README.md +2 -2
backend/python-services/requirements.txt +2 -0
backend/python-services/services/loc_service.py +81 -31
backend/python-services/services/ocr_service.py +1 -2

README.md CHANGED Viewed

@@ -21,6 +21,6 @@ Online sheet music recognition and editing platform.
 - Score editing and annotation
 - Music set management
-## Note
-This is a lightweight deployment. ML prediction services (layout, gauge, mask, semantic, OCR) are not included in this Space.

 - Score editing and annotation
 - Music set management
+## Included Services
+This Space runs the full STARRY OMR stack with bundled PostgreSQL, frontend, OMR service, and CPU prediction services for layout, mask, semantic, text localization, OCR, and brackets.

backend/python-services/requirements.txt CHANGED Viewed

@@ -3,6 +3,8 @@ numpy>=1.21.0
 opencv-python>=4.5.0
 Pillow>=8.0.0
 PyYAML>=5.4.0
 # Communication
 pyzmq>=22.0.0

 opencv-python>=4.5.0
 Pillow>=8.0.0
 PyYAML>=5.4.0
+shapely>=1.8.0
+pyclipper>=1.3.0
 # Communication
 pyzmq>=22.0.0

backend/python-services/services/loc_service.py CHANGED Viewed

@@ -14,6 +14,8 @@ import torch.nn as nn
 import torch.nn.functional as F
 import cv2
 import logging
 from collections import OrderedDict
 from predictors.torchscript_predictor import resolve_model_path
@@ -261,12 +263,15 @@ class LocService:
 	"""
 	def __init__(self, model_path, device='cuda', image_short_side=736,
-				 box_thresh=0.01, class_num=13, **kwargs):
 		self.device = device
 		self.model = _load_loc_model(model_path, device)
 		self.image_short_side = image_short_side
 		self.box_thresh = box_thresh
 		self.class_num = class_num
 	def resize_image(self, img):
 		"""Resize image keeping aspect ratio, with short side = image_short_side."""
@@ -289,50 +294,95 @@ class LocService:
 		img = torch.from_numpy(img).permute(2, 0, 1).float().unsqueeze(0)
 		return img.to(self.device), original_shape
 	def represent_boxes(self, pred, out_class, original_shape, resized_shape):
 		"""Post-process model output to extract bounding boxes."""
-		pred_np = pred.cpu().numpy()[0, 0]
 		class_np = out_class.cpu().numpy()[0, 0]
-		binary = (pred_np > self.box_thresh).astype(np.uint8) * 255
-		contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
 		boxes = []
-		h_scale = original_shape[0] / resized_shape[0]
-		w_scale = original_shape[1] / resized_shape[1]
-		for contour in contours:
-			if len(contour) < 4:
 				continue
-			rect = cv2.minAreaRect(contour)
-			box_points = cv2.boxPoints(rect)
-			box_points = np.int0(box_points)
-			mask = np.zeros(pred_np.shape, dtype=np.uint8)
-			cv2.drawContours(mask, [contour], -1, 1, -1)
-			class_region = class_np * mask
-			if mask.sum() > 0:
-				box_class = int(np.argmax(np.bincount(class_region[mask > 0].astype(int))))
-			else:
-				box_class = 0
-			score_region = pred_np * mask
-			score = score_region.sum() / max(mask.sum(), 1)
-			scaled_points = box_points.astype(float)
-			scaled_points[:, 0] *= w_scale
-			scaled_points[:, 1] *= h_scale
 			boxes.append({
-				'x0': float(scaled_points[0, 0]),
-				'y0': float(scaled_points[0, 1]),
-				'x1': float(scaled_points[1, 0]),
-				'y1': float(scaled_points[1, 1]),
-				'x2': float(scaled_points[2, 0]),
-				'y2': float(scaled_points[2, 1]),
-				'x3': float(scaled_points[3, 0]),
-				'y3': float(scaled_points[3, 1]),
 				'score': float(score),
 				'class': box_class,
 			})

 import torch.nn.functional as F
 import cv2
 import logging
+import pyclipper
+from shapely.geometry import Polygon
 from collections import OrderedDict
 from predictors.torchscript_predictor import resolve_model_path
 	"""
 	def __init__(self, model_path, device='cuda', image_short_side=736,
+				 box_thresh=0.01, thresh=0.3, class_num=13, **kwargs):
 		self.device = device
 		self.model = _load_loc_model(model_path, device)
 		self.image_short_side = image_short_side
 		self.box_thresh = box_thresh
+		self.thresh = thresh
 		self.class_num = class_num
+		self.min_size = 3
+		self.max_candidates = 1000
 	def resize_image(self, img):
 		"""Resize image keeping aspect ratio, with short side = image_short_side."""
 		img = torch.from_numpy(img).permute(2, 0, 1).float().unsqueeze(0)
 		return img.to(self.device), original_shape
+	def get_mini_boxes(self, contour):
+		bounding_box = cv2.minAreaRect(contour)
+		points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
+		if points[1][1] > points[0][1]:
+			index_1, index_4 = 0, 1
+		else:
+			index_1, index_4 = 1, 0
+		if points[3][1] > points[2][1]:
+			index_2, index_3 = 2, 3
+		else:
+			index_2, index_3 = 3, 2
+		return [points[index_1], points[index_2], points[index_3], points[index_4]], min(bounding_box[1])
+	def unclip(self, box, unclip_ratio=1.5):
+		poly = Polygon(box)
+		if poly.length == 0:
+			return np.array([])
+		distance = poly.area * unclip_ratio / poly.length
+		offset = pyclipper.PyclipperOffset()
+		offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+		expanded = offset.Execute(distance)
+		return np.array(expanded) if expanded else np.array([])
+	def box_score_fast_with_class(self, bitmap, classes, box):
+		h, w = bitmap.shape[:2]
+		box = box.copy()
+		xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int32), 0, w - 1)
+		xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int32), 0, w - 1)
+		ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int32), 0, h - 1)
+		ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int32), 0, h - 1)
+		mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
+		box[:, 0] -= xmin
+		box[:, 1] -= ymin
+		cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
+		class_crop = np.squeeze(classes)[ymin:ymax + 1, xmin:xmax + 1]
+		class_values = class_crop[mask > 0].astype(np.int32)
+		box_class = int(np.argmax(np.bincount(class_values))) if class_values.size else 0
+		return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0], box_class
 	def represent_boxes(self, pred, out_class, original_shape, resized_shape):
 		"""Post-process model output to extract bounding boxes."""
+		pred_np = pred.cpu().detach().numpy()[0, 0]
 		class_np = out_class.cpu().numpy()[0, 0]
+		binary = (pred_np > self.thresh).astype(np.uint8) * 255
+		contours, _ = cv2.findContours(binary, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
 		boxes = []
+		dest_height, dest_width = original_shape
+		bitmap_height, bitmap_width = pred_np.shape
+		for contour in contours[:self.max_candidates]:
+			points, short_side = self.get_mini_boxes(contour)
+			if short_side < self.min_size:
 				continue
+			points = np.array(points)
+			score, box_class = self.box_score_fast_with_class(pred_np, class_np, points.reshape(-1, 2))
+			if self.box_thresh > score:
+				continue
+			expanded = self.unclip(points)
+			if len(expanded) == 0:
+				continue
+			box, short_side = self.get_mini_boxes(expanded.reshape(-1, 1, 2))
+			if short_side < self.min_size + 2:
+				continue
+			box = np.array(box)
+			box[:, 0] = np.clip(np.round(box[:, 0] / bitmap_width * dest_width), 0, dest_width)
+			box[:, 1] = np.clip(np.round(box[:, 1] / bitmap_height * dest_height), 0, dest_height)
 			boxes.append({
+				'x0': float(box[0, 0]),
+				'y0': float(box[0, 1]),
+				'x1': float(box[1, 0]),
+				'y1': float(box[1, 1]),
+				'x2': float(box[2, 0]),
+				'y2': float(box[2, 1]),
+				'x3': float(box[3, 0]),
+				'y3': float(box[3, 1]),
 				'score': float(score),
 				'class': box_class,
 			})

backend/python-services/services/ocr_service.py CHANGED Viewed

@@ -163,8 +163,7 @@ class OcrService:
 		for box in location:
 			text_type = TYPE_NAMES[box.get('class', 0)]
-			crop_box = self.expand_box(box, image.shape, left=20, right=5, top=5, bottom=15) if text_type == 'TempoNumeral' else box
-			dst_pic = self.perspective_transform(image, crop_box)
 			if dst_pic is None:
 				continue

 		for box in location:
 			text_type = TYPE_NAMES[box.get('class', 0)]
+			dst_pic = self.perspective_transform(image, box)
 			if dst_pic is None:
 				continue