MobileSAM / python_onnx /sam_encoder.py
wli1995's picture
Upload folder using huggingface_hub
c183d33 verified
import onnxruntime
import cv2
import numpy as np
class SAMEncoder:
def __init__(self,model_path):
self.sess = onnxruntime.InferenceSession(model_path)
self.input_shape = (1024, 1024)
def letterbox(self, image, target_size, color=(114, 114, 114)):
"""
将图像调整为目标大小,同时保持原始长宽比,并填充空白区域。
:param image: 输入图像 (H, W, C)
:param target_size: 目标尺寸 (width, height)
:param color: 填充颜色 (B, G, R)
:return: 调整后的图像,缩放比例,填充区域
"""
original_height, original_width = image.shape[:2]
target_width, target_height = target_size
# 计算缩放比例
scale = min(target_width / original_width, target_height / original_height)
new_width = int(original_width * scale)
new_height = int(original_height * scale)
# 调整图像大小
resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
# 计算填充
pad_width = (target_width - new_width) // 2
pad_height = (target_height - new_height) // 2
# 填充图像
padded_image = cv2.copyMakeBorder(
resized_image,
0 , target_height - new_height ,
0, target_width - new_width ,
cv2.BORDER_CONSTANT,
value=color
)
return padded_image, scale, (pad_width, pad_height)
def preprocess(self,image):
padded_image, scale, (pad_width, pad_height) = self.letterbox(image, self.input_shape)
padded_image = cv2.cvtColor(padded_image, cv2.COLOR_BGR2RGB)
return padded_image, scale
def encode(self,image):
padded_image, scale = self.preprocess(image)
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32).reshape((1,1,3))
std = np.array([0.229, 0.224, 0.225], dtype=np.float32).reshape((1,1,3))
padded_image = (padded_image.astype(np.float32)/255 - mean)/std
padded_image = np.transpose(padded_image, (2, 0, 1))
padded_image = np.expand_dims(padded_image, axis=0)
return self.sess.run(None,{self.sess.get_inputs()[0].name:padded_image}), scale