| import cv2 |
| import numpy as np |
| import torch |
|
|
| from thirdparty.MoGe.moge.model.v2 import MoGeModel as MoGeModelV2 |
|
|
|
|
| class MogePipeline: |
| """ |
| Inference pipeline for MoGeModelV2 to estimate the horizontal Field of View (FoV). |
| """ |
|
|
| def __init__( |
| self, |
| model_name: str = "Ruicheng/moge-2-vitl", |
| device: torch.device = torch.device("cuda") |
| ): |
| """ |
| Initializes the pipeline and loads the MoGe model. |
| |
| Args: |
| model_name (str): Path or name of the pre-trained MoGe model. |
| device (torch.device): Device to load the model onto (e.g., 'cuda'). |
| """ |
| self.device = device |
| self.model = MoGeModelV2.from_pretrained(model_name).to(device) |
|
|
| def infer(self, input_image: np.ndarray) -> float: |
| """ |
| Performs inference to estimate the horizontal FoV from an image. |
| |
| Args: |
| input_image (np.ndarray): The input image (H, W, 3) in BGR format. |
| |
| Returns: |
| float: The estimated horizontal FoV in degrees. |
| """ |
|
|
| input_image_rgb = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB) |
| input_tensor = torch.tensor( |
| input_image_rgb / 255.0, |
| dtype=torch.float32, |
| device=self.device |
| ).permute(2, 0, 1) |
|
|
| |
| output = self.model.infer(input_tensor, resolution_level=1) |
|
|
| intrinsics = output['intrinsics'].cpu().numpy() |
| |
| |
| fov_x_rad = 2 * np.arctan(intrinsics[0, 2] / intrinsics[0, 0]) |
| fov_x_deg = np.rad2deg(fov_x_rad) |
| |
| return fov_x_deg |