File size: 2,317 Bytes
c183d33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import onnxruntime
import cv2
import numpy as np

class SAMEncoder:
    def __init__(self,model_path):
        self.sess = onnxruntime.InferenceSession(model_path)
        self.input_shape = (1024, 1024)
    
    def letterbox(self, image, target_size, color=(114, 114, 114)):
        """
        将图像调整为目标大小,同时保持原始长宽比,并填充空白区域。
        
        :param image: 输入图像 (H, W, C)
        :param target_size: 目标尺寸 (width, height)
        :param color: 填充颜色 (B, G, R)
        :return: 调整后的图像,缩放比例,填充区域
        """
        original_height, original_width = image.shape[:2]
        target_width, target_height = target_size

        # 计算缩放比例
        scale = min(target_width / original_width, target_height / original_height)
        new_width = int(original_width * scale)
        new_height = int(original_height * scale)

        # 调整图像大小
        resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR)

        # 计算填充
        pad_width = (target_width - new_width) // 2
        pad_height = (target_height - new_height) // 2

        # 填充图像
        padded_image = cv2.copyMakeBorder(
            resized_image, 
            0 , target_height - new_height , 
            0, target_width - new_width ,
            cv2.BORDER_CONSTANT, 
            value=color
        )

        return padded_image, scale, (pad_width, pad_height)
    
    def preprocess(self,image):
        padded_image, scale, (pad_width, pad_height) = self.letterbox(image, self.input_shape)
        
        padded_image = cv2.cvtColor(padded_image, cv2.COLOR_BGR2RGB)
        return padded_image, scale
        
    def encode(self,image):
        padded_image, scale = self.preprocess(image)
        
        mean = np.array([0.485, 0.456, 0.406], dtype=np.float32).reshape((1,1,3))
        std = np.array([0.229, 0.224, 0.225], dtype=np.float32).reshape((1,1,3))
        padded_image = (padded_image.astype(np.float32)/255 - mean)/std
        
        padded_image = np.transpose(padded_image, (2, 0, 1))
        padded_image = np.expand_dims(padded_image, axis=0)
        

        return self.sess.run(None,{self.sess.get_inputs()[0].name:padded_image}), scale