Spaces:
Sleeping
Sleeping
File size: 6,726 Bytes
a37f5d3 4490536 a37f5d3 4490536 a37f5d3 4490536 a37f5d3 4490536 a37f5d3 4490536 a37f5d3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 | import os
import numpy as np
import cv2
from PIL import Image
import open3d as o3d
from utils.utils import read_calib_file #, fill_projected_os1_rings
color_key = {
0: [0, 0, 128], # road
1: [0, 128, 0], # water
2: [0, 0, 0] # other
}
class ImageData():
def __init__(self, file_path, calib_path, label_path=None):
self.image = cv2.imread(file_path)
intrinsics = read_calib_file(calib_path)
focal_len = intrinsics['focal_len'][0]
principal_x = intrinsics['principal_x'][0]
principal_y = intrinsics['principal_y'][0]
pp_mm_x = intrinsics['pp_mm_x'][0]
pp_mm_y = intrinsics['pp_mm_y'][0]
# print(f"{focal_len}, {principal_x}, {principal_y}, {pp_mm_x}, {pp_mm_y}")
self.camera_matrix = self.create_camera_matrix(focal_len, principal_x, principal_y, pp_mm_x, pp_mm_y)
self.dist_coeffs = np.array([0.0, 0.0, 0.0, 0.0, 0.0])
self.semantic_classes = None
self.colour_label = None
self.semantic_classes = {'road': [0, 0, 128], 'water': [0, 128, 0], 'other': [0, 0, 0]} # In the opencv BGR convention
if label_path is not None:
if os.path.exists(label_path):
label_img = cv2.imread(label_path)
else:
# Catch for no label existing #
print(f"Could not load label \'{label_path}\'. Using blank labels.")
label_img = np.zeros(self.image.shape).astype(np.uint8)
self.colour_label = cv2.resize(label_img, (self.image.shape[1], self.image.shape[0]), interpolation=cv2.INTER_NEAREST)
H, W, _ = label_img.shape
self.label_img = np.full((H, W), 2, dtype=np.uint8) # unknown = 255
# Convert to uint8 in case it's float
img = label_img.astype(np.uint8)
for class_idx, color in color_key.items():
# Create mask where all 3 channels match
mask = np.all(img == color, axis=2)
self.label_img[mask] = class_idx
self.label_img = cv2.resize(self.label_img, (self.image.shape[1], self.image.shape[0]), interpolation=cv2.INTER_NEAREST)
def create_camera_matrix(self, focal_length_mm, principal_point_x_pixels, principal_point_y_pixels,
pixels_per_mm_x, pixels_per_mm_y):
"""
Create camera matrix from physical camera parameters.
Args:
focal_length_mm: Focal length in millimeters
principal_point_x_pixels: Principal point X coordinate in pixels
principal_point_y_pixels: Principal point Y coordinate in pixels
pixels_per_mm_x: Pixels per millimeter in X direction
pixels_per_mm_y: Pixels per millimeter in Y direction
Returns:
camera_matrix: 3x3 camera intrinsic matrix
"""
# Check for None or invalid values
if any(x is None for x in [focal_length_mm, principal_point_x_pixels, principal_point_y_pixels,
pixels_per_mm_x, pixels_per_mm_y]):
print("ERROR: One or more input parameters is None!")
return None
if pixels_per_mm_x == 0 or pixels_per_mm_y == 0:
print("ERROR: pixels_per_mm cannot be zero!")
return None
try:
# Convert focal length from mm to pixels
fx = focal_length_mm * pixels_per_mm_x
fy = focal_length_mm * pixels_per_mm_y
# Principal point is already in pixels
cx = principal_point_x_pixels
cy = principal_point_y_pixels
camera_matrix = np.array([
[fx, 0, cx],
[0, fy, cy],
[0, 0, 1]
], dtype=np.float64)
return camera_matrix
except Exception as e:
print(f"ERROR in create_camera_matrix: {e}")
return None
def project_points(self, points, colours, cmap, valid_cam, colour_norm=None, semantic_label=False):
# , beam_id, azimuth
# Project to image coordinates
rvec = np.zeros(3) # No additional rotation
tvec = np.zeros(3) # No additional translation
image_points, _ = cv2.projectPoints(points, rvec, tvec, self.camera_matrix, self.dist_coeffs)
image_points = image_points.reshape(-1, 2)
# Filter points within image bounds
h, w = self.image.shape[0], self.image.shape[1]
valid_img_mask = ((image_points[:, 0] >= 0) & (image_points[:, 0] < w) &
(image_points[:, 1] >= 0) & (image_points[:, 1] < h))
valid_mask = valid_img_mask & valid_cam
points2project = image_points[valid_mask]
if colour_norm is None:
colour2project = colours[valid_mask]/colours[valid_img_mask].max()
else:
colour2project = colours[valid_mask]/colour_norm
if semantic_label:
# Build meta_points: x, y, z, intensity, u, v, semantic_label
valid_3d = points[valid_mask] # (N, 3) xyz in camera frame
valid_intensity = colours[valid_mask] # (N,) intensity
valid_uv = points2project # (N, 2) pixel coords
valid_labels = np.array([
self.label_img[int(pt[1]), int(pt[0])]
for pt in valid_uv
])
meta_points = np.column_stack([
valid_3d, # x, y, z
valid_intensity, # intensity
valid_uv, # u, v
valid_labels # semantic label
])
img_vis = self.image.copy()
mask_img = np.zeros((self.image.shape[0], self.image.shape[1]))
# Draw points
for (point, c) in zip(points2project.astype(int), colour2project):
r, g, b, _ = cmap(c)
colour = (r*255, g*255, b*255)
cv2.circle(img_vis, (int(point[0]), int(point[1])), 5, colour, -1) # -1 = filled circle
cv2.circle(mask_img, (int(point[0]), int(point[1])), 10, 255, -1)
if semantic_label:
return img_vis, image_points, valid_mask, mask_img, meta_points
else:
return img_vis, image_points, valid_mask, mask_img
def get_image_coords(self, points):
# Project to image coordinates
rvec = np.zeros(3) # No additional rotation
tvec = np.zeros(3) # No additional translation
image_points, _ = cv2.projectPoints(points, rvec, tvec, self.camera_matrix, self.dist_coeffs)
image_points = image_points.reshape(-1, 2)
return image_points
|