| |
| |
| |
| |
| |
|
|
| import unittest |
| from itertools import product |
| from typing import Tuple |
|
|
| from unittest.mock import patch |
|
|
| import torch |
| from pytorch3d.common.compat import meshgrid_ij |
| from pytorch3d.implicitron.models.renderer.base import EvaluationMode |
| from pytorch3d.implicitron.models.renderer.ray_sampler import ( |
| AdaptiveRaySampler, |
| compute_radii, |
| NearFarRaySampler, |
| ) |
|
|
| from pytorch3d.renderer.cameras import ( |
| CamerasBase, |
| FoVOrthographicCameras, |
| FoVPerspectiveCameras, |
| OrthographicCameras, |
| PerspectiveCameras, |
| ) |
| from pytorch3d.renderer.implicit.utils import HeterogeneousRayBundle |
| from tests.common_camera_utils import init_random_cameras |
|
|
| from tests.common_testing import TestCaseMixin |
|
|
| CAMERA_TYPES = ( |
| FoVPerspectiveCameras, |
| FoVOrthographicCameras, |
| OrthographicCameras, |
| PerspectiveCameras, |
| ) |
|
|
|
|
| def unproject_xy_grid_from_ndc_to_world_coord( |
| cameras: CamerasBase, xy_grid: torch.Tensor |
| ) -> Tuple[torch.Tensor, torch.Tensor]: |
| """ |
| |
| Unproject a xy_grid from NDC coordinates to world coordinates. |
| |
| Args: |
| cameras: CamerasBase. |
| xy_grid: A tensor of shape `(..., H*W, 2)` representing the |
| x, y coords. |
| |
| Returns: |
| A tensor of shape `(..., H*W, 3)` representing the |
| """ |
|
|
| batch_size = xy_grid.shape[0] |
| n_rays_per_image = xy_grid.shape[1:-1].numel() |
| xy = xy_grid.view(batch_size, -1, 2) |
| xyz = torch.cat([xy, xy_grid.new_ones(batch_size, n_rays_per_image, 1)], dim=-1) |
| plane_at_depth1 = cameras.unproject_points(xyz, from_ndc=True) |
| return plane_at_depth1.view(*xy_grid.shape[:-1], 3) |
|
|
|
|
| class TestRaysampler(TestCaseMixin, unittest.TestCase): |
| def test_ndc_raysampler_n_ray_total_is_none(self): |
| sampler = NearFarRaySampler() |
| message = ( |
| "If you introduce the support of `n_rays_total` for {0}, please handle the " |
| "packing and unpacking logic for the radii and lengths computation." |
| ) |
| self.assertIsNone( |
| sampler._training_raysampler._n_rays_total, message.format(type(sampler)) |
| ) |
| self.assertIsNone( |
| sampler._evaluation_raysampler._n_rays_total, message.format(type(sampler)) |
| ) |
|
|
| sampler = AdaptiveRaySampler() |
| self.assertIsNone( |
| sampler._training_raysampler._n_rays_total, message.format(type(sampler)) |
| ) |
| self.assertIsNone( |
| sampler._evaluation_raysampler._n_rays_total, message.format(type(sampler)) |
| ) |
|
|
| def test_catch_heterogeneous_exception(self): |
| cameras = init_random_cameras(FoVPerspectiveCameras, 1, random_z=True) |
|
|
| class FakeSampler: |
| def __init__(self): |
| self.min_x, self.max_x = 1, 2 |
| self.min_y, self.max_y = 1, 2 |
|
|
| def __call__(self, **kwargs): |
| return HeterogeneousRayBundle( |
| torch.rand(3), torch.rand(3), torch.rand(3), torch.rand(1) |
| ) |
|
|
| with patch( |
| "pytorch3d.implicitron.models.renderer.ray_sampler.NDCMultinomialRaysampler", |
| return_value=FakeSampler(), |
| ): |
| for sampler in [ |
| AdaptiveRaySampler(cast_ray_bundle_as_cone=True), |
| NearFarRaySampler(cast_ray_bundle_as_cone=True), |
| ]: |
| with self.assertRaises(TypeError): |
| _ = sampler(cameras, EvaluationMode.TRAINING) |
| for sampler in [ |
| AdaptiveRaySampler(cast_ray_bundle_as_cone=False), |
| NearFarRaySampler(cast_ray_bundle_as_cone=False), |
| ]: |
| _ = sampler(cameras, EvaluationMode.TRAINING) |
|
|
| def test_compute_radii(self): |
| batch_size = 1 |
| image_height, image_width = 20, 10 |
| min_y, max_y, min_x, max_x = -1.0, 1.0, -1.0, 1.0 |
| y, x = meshgrid_ij( |
| torch.linspace(min_y, max_y, image_height, dtype=torch.float32), |
| torch.linspace(min_x, max_x, image_width, dtype=torch.float32), |
| ) |
| xy_grid = torch.stack([x, y], dim=-1).view(-1, 2) |
| pixel_width = (max_x - min_x) / (image_width - 1) |
| pixel_height = (max_y - min_y) / (image_height - 1) |
|
|
| for cam_type in CAMERA_TYPES: |
| |
| cameras = init_random_cameras(cam_type, batch_size, random_z=True) |
| |
| |
| |
| radii = compute_radii( |
| cameras, xy_grid, pixel_hw_ndc=(pixel_height, pixel_width) |
| ) |
| plane_at_depth1 = unproject_xy_grid_from_ndc_to_world_coord( |
| cameras, xy_grid |
| ) |
| |
| expected_radii = compute_pixel_radii_from_grid( |
| plane_at_depth1.reshape(1, image_height, image_width, 3) |
| ) |
| self.assertClose(expected_radii.reshape(-1, 1), radii) |
|
|
| def test_forward(self): |
| n_rays_per_image = 16 |
| image_height, image_width = 20, 20 |
| kwargs = { |
| "image_width": image_width, |
| "image_height": image_height, |
| "n_pts_per_ray_training": 32, |
| "n_pts_per_ray_evaluation": 32, |
| "n_rays_per_image_sampled_from_mask": n_rays_per_image, |
| "cast_ray_bundle_as_cone": False, |
| } |
|
|
| batch_size = 2 |
| samplers = [NearFarRaySampler(**kwargs), AdaptiveRaySampler(**kwargs)] |
| evaluation_modes = [EvaluationMode.TRAINING, EvaluationMode.EVALUATION] |
|
|
| for cam_type, sampler, evaluation_mode in product( |
| CAMERA_TYPES, samplers, evaluation_modes |
| ): |
| cameras = init_random_cameras(cam_type, batch_size, random_z=True) |
| ray_bundle = sampler(cameras, evaluation_mode) |
|
|
| shape_out = ( |
| (batch_size, image_width, image_height) |
| if evaluation_mode == EvaluationMode.EVALUATION |
| else (batch_size, n_rays_per_image, 1) |
| ) |
| n_pts_per_ray = ( |
| kwargs["n_pts_per_ray_evaluation"] |
| if evaluation_mode == EvaluationMode.EVALUATION |
| else kwargs["n_pts_per_ray_training"] |
| ) |
| self.assertIsNone(ray_bundle.bins) |
| self.assertIsNone(ray_bundle.pixel_radii_2d) |
| self.assertEqual( |
| ray_bundle.lengths.shape, |
| (*shape_out, n_pts_per_ray), |
| ) |
| self.assertEqual(ray_bundle.directions.shape, (*shape_out, 3)) |
| self.assertEqual(ray_bundle.origins.shape, (*shape_out, 3)) |
|
|
| def test_forward_with_use_bins(self): |
| n_rays_per_image = 16 |
| image_height, image_width = 20, 20 |
| kwargs = { |
| "image_width": image_width, |
| "image_height": image_height, |
| "n_pts_per_ray_training": 32, |
| "n_pts_per_ray_evaluation": 32, |
| "n_rays_per_image_sampled_from_mask": n_rays_per_image, |
| "cast_ray_bundle_as_cone": True, |
| } |
|
|
| batch_size = 1 |
| samplers = [NearFarRaySampler(**kwargs), AdaptiveRaySampler(**kwargs)] |
| evaluation_modes = [EvaluationMode.TRAINING, EvaluationMode.EVALUATION] |
| for cam_type, sampler, evaluation_mode in product( |
| CAMERA_TYPES, samplers, evaluation_modes |
| ): |
| cameras = init_random_cameras(cam_type, batch_size, random_z=True) |
| ray_bundle = sampler(cameras, evaluation_mode) |
|
|
| lengths = 0.5 * (ray_bundle.bins[..., :-1] + ray_bundle.bins[..., 1:]) |
|
|
| self.assertClose(ray_bundle.lengths, lengths) |
| shape_out = ( |
| (batch_size, image_width, image_height) |
| if evaluation_mode == EvaluationMode.EVALUATION |
| else (batch_size, n_rays_per_image, 1) |
| ) |
| self.assertEqual(ray_bundle.pixel_radii_2d.shape, (*shape_out, 1)) |
| self.assertEqual(ray_bundle.directions.shape, (*shape_out, 3)) |
| self.assertEqual(ray_bundle.origins.shape, (*shape_out, 3)) |
|
|
|
|
| |
| def compute_pixel_radii_from_grid(pixel_grid: torch.Tensor) -> torch.Tensor: |
| """ |
| Compute the radii of a conical frustum given the pixel grid. |
| |
| To compute the radii we first compute the translation from a pixel |
| to its neighbors along the x and y axis. Then, we compute the norm |
| of each translation along the x and y axis. |
| The radii are then obtained by the following formula: |
| |
| (dx_norm + dy_norm) * 0.5 * 2 / 12**0.5 |
| |
| where 2/12**0.5 is a scaling factor to match |
| the variance of the pixel’s footprint. |
| |
| Args: |
| pixel_grid: A tensor of shape `(..., H, W, dim)` representing the |
| full grid of rays pixel_grid. |
| |
| Returns: |
| The radiis for each pixels and shape `(..., H, W, 1)`. |
| """ |
| |
| x_translation = torch.diff(pixel_grid, dim=-2) |
| |
| y_translation = torch.diff(pixel_grid, dim=-3) |
| |
| dx_norm = torch.linalg.norm(x_translation, dim=-1, keepdim=True) |
| |
| dy_norm = torch.linalg.norm(y_translation, dim=-1, keepdim=True) |
|
|
| |
| dx_norm = torch.concatenate([dx_norm, dx_norm[..., -1:, :]], -2) |
| dy_norm = torch.concatenate([dy_norm, dy_norm[..., -1:, :, :]], -3) |
|
|
| |
| |
| radii = (dx_norm + dy_norm) / 12**0.5 |
| return radii |
|
|
|
|
| class TestRadiiComputationOnFullGrid(TestCaseMixin, unittest.TestCase): |
| def test_compute_pixel_radii_from_grid(self): |
| pixel_grid = torch.tensor( |
| [ |
| [[0.0, 0, 0], [1.0, 0.0, 0], [3.0, 0.0, 0.0]], |
| [[0.0, 0.25, 0], [1.0, 0.25, 0], [3.0, 0.25, 0]], |
| [[0.0, 1, 0], [1.0, 1.0, 0], [3.0000, 1.0, 0]], |
| ] |
| ) |
|
|
| expected_y_norm = torch.tensor( |
| [ |
| [0.25, 0.25, 0.25], |
| [0.75, 0.75, 0.75], |
| [0.75, 0.75, 0.75], |
| ] |
| ) |
| expected_x_norm = torch.tensor( |
| [ |
| |
| [1.0, 2.0, 2.0], |
| [1.0, 2.0, 2.0], |
| [1.0, 2.0, 2.0], |
| ] |
| ) |
| expected_radii = (expected_x_norm + expected_y_norm) / 12**0.5 |
| radii = compute_pixel_radii_from_grid(pixel_grid) |
| self.assertClose(radii, expected_radii[..., None]) |
|
|