idol-models / submodule /pytorch3d /tests /implicitron /test_models_renderer_ray_sampler.py

Upload folder using huggingface_hub

789eef1 verified 8 months ago

10.9 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the BSD-style license found in the
	# LICENSE file in the root directory of this source tree.

	import unittest
	from itertools import product
	from typing import Tuple

	from unittest.mock import patch

	import torch
	from pytorch3d.common.compat import meshgrid_ij
	from pytorch3d.implicitron.models.renderer.base import EvaluationMode
	from pytorch3d.implicitron.models.renderer.ray_sampler import (
	AdaptiveRaySampler,
	compute_radii,
	NearFarRaySampler,
	)

	from pytorch3d.renderer.cameras import (
	CamerasBase,
	FoVOrthographicCameras,
	FoVPerspectiveCameras,
	OrthographicCameras,
	PerspectiveCameras,
	)
	from pytorch3d.renderer.implicit.utils import HeterogeneousRayBundle
	from tests.common_camera_utils import init_random_cameras

	from tests.common_testing import TestCaseMixin

	CAMERA_TYPES = (
	FoVPerspectiveCameras,
	FoVOrthographicCameras,
	OrthographicCameras,
	PerspectiveCameras,
	)


	def unproject_xy_grid_from_ndc_to_world_coord(
	cameras: CamerasBase, xy_grid: torch.Tensor
	) -> Tuple[torch.Tensor, torch.Tensor]:
	"""

	Unproject a xy_grid from NDC coordinates to world coordinates.

	Args:
	cameras: CamerasBase.
	xy_grid: A tensor of shape `(..., H*W, 2)` representing the
	x, y coords.

	Returns:
	A tensor of shape `(..., H*W, 3)` representing the
	"""

	batch_size = xy_grid.shape[0]
	n_rays_per_image = xy_grid.shape[1:-1].numel()
	xy = xy_grid.view(batch_size, -1, 2)
	xyz = torch.cat([xy, xy_grid.new_ones(batch_size, n_rays_per_image, 1)], dim=-1)
	plane_at_depth1 = cameras.unproject_points(xyz, from_ndc=True)
	return plane_at_depth1.view(*xy_grid.shape[:-1], 3)


	class TestRaysampler(TestCaseMixin, unittest.TestCase):
	def test_ndc_raysampler_n_ray_total_is_none(self):
	sampler = NearFarRaySampler()
	message = (
	"If you introduce the support of `n_rays_total` for {0}, please handle the "
	"packing and unpacking logic for the radii and lengths computation."
	)
	self.assertIsNone(
	sampler._training_raysampler._n_rays_total, message.format(type(sampler))
	)
	self.assertIsNone(
	sampler._evaluation_raysampler._n_rays_total, message.format(type(sampler))
	)

	sampler = AdaptiveRaySampler()
	self.assertIsNone(
	sampler._training_raysampler._n_rays_total, message.format(type(sampler))
	)
	self.assertIsNone(
	sampler._evaluation_raysampler._n_rays_total, message.format(type(sampler))
	)

	def test_catch_heterogeneous_exception(self):
	cameras = init_random_cameras(FoVPerspectiveCameras, 1, random_z=True)

	class FakeSampler:
	def __init__(self):
	self.min_x, self.max_x = 1, 2
	self.min_y, self.max_y = 1, 2

	def __call__(self, **kwargs):
	return HeterogeneousRayBundle(
	torch.rand(3), torch.rand(3), torch.rand(3), torch.rand(1)
	)

	with patch(
	"pytorch3d.implicitron.models.renderer.ray_sampler.NDCMultinomialRaysampler",
	return_value=FakeSampler(),
	):
	for sampler in [
	AdaptiveRaySampler(cast_ray_bundle_as_cone=True),
	NearFarRaySampler(cast_ray_bundle_as_cone=True),
	]:
	with self.assertRaises(TypeError):
	_ = sampler(cameras, EvaluationMode.TRAINING)
	for sampler in [
	AdaptiveRaySampler(cast_ray_bundle_as_cone=False),
	NearFarRaySampler(cast_ray_bundle_as_cone=False),
	]:
	_ = sampler(cameras, EvaluationMode.TRAINING)

	def test_compute_radii(self):
	batch_size = 1
	image_height, image_width = 20, 10
	min_y, max_y, min_x, max_x = -1.0, 1.0, -1.0, 1.0
	y, x = meshgrid_ij(
	torch.linspace(min_y, max_y, image_height, dtype=torch.float32),
	torch.linspace(min_x, max_x, image_width, dtype=torch.float32),
	)
	xy_grid = torch.stack([x, y], dim=-1).view(-1, 2)
	pixel_width = (max_x - min_x) / (image_width - 1)
	pixel_height = (max_y - min_y) / (image_height - 1)

	for cam_type in CAMERA_TYPES:
	# init a batch of random cameras
	cameras = init_random_cameras(cam_type, batch_size, random_z=True)
	# This method allow us to compute the radii whithout having
	# access to the full grid. Raysamplers during the training
	# will sample random rays from the grid.
	radii = compute_radii(
	cameras, xy_grid, pixel_hw_ndc=(pixel_height, pixel_width)
	)
	plane_at_depth1 = unproject_xy_grid_from_ndc_to_world_coord(
	cameras, xy_grid
	)
	# This method absolutely needs the full grid to work.
	expected_radii = compute_pixel_radii_from_grid(
	plane_at_depth1.reshape(1, image_height, image_width, 3)
	)
	self.assertClose(expected_radii.reshape(-1, 1), radii)

	def test_forward(self):
	n_rays_per_image = 16
	image_height, image_width = 20, 20
	kwargs = {
	"image_width": image_width,
	"image_height": image_height,
	"n_pts_per_ray_training": 32,
	"n_pts_per_ray_evaluation": 32,
	"n_rays_per_image_sampled_from_mask": n_rays_per_image,
	"cast_ray_bundle_as_cone": False,
	}

	batch_size = 2
	samplers = [NearFarRaySampler(kwargs), AdaptiveRaySampler(kwargs)]
	evaluation_modes = [EvaluationMode.TRAINING, EvaluationMode.EVALUATION]

	for cam_type, sampler, evaluation_mode in product(
	CAMERA_TYPES, samplers, evaluation_modes
	):
	cameras = init_random_cameras(cam_type, batch_size, random_z=True)
	ray_bundle = sampler(cameras, evaluation_mode)

	shape_out = (
	(batch_size, image_width, image_height)
	if evaluation_mode == EvaluationMode.EVALUATION
	else (batch_size, n_rays_per_image, 1)
	)
	n_pts_per_ray = (
	kwargs["n_pts_per_ray_evaluation"]
	if evaluation_mode == EvaluationMode.EVALUATION
	else kwargs["n_pts_per_ray_training"]
	)
	self.assertIsNone(ray_bundle.bins)
	self.assertIsNone(ray_bundle.pixel_radii_2d)
	self.assertEqual(
	ray_bundle.lengths.shape,
	(*shape_out, n_pts_per_ray),
	)
	self.assertEqual(ray_bundle.directions.shape, (*shape_out, 3))
	self.assertEqual(ray_bundle.origins.shape, (*shape_out, 3))

	def test_forward_with_use_bins(self):
	n_rays_per_image = 16
	image_height, image_width = 20, 20
	kwargs = {
	"image_width": image_width,
	"image_height": image_height,
	"n_pts_per_ray_training": 32,
	"n_pts_per_ray_evaluation": 32,
	"n_rays_per_image_sampled_from_mask": n_rays_per_image,
	"cast_ray_bundle_as_cone": True,
	}

	batch_size = 1
	samplers = [NearFarRaySampler(kwargs), AdaptiveRaySampler(kwargs)]
	evaluation_modes = [EvaluationMode.TRAINING, EvaluationMode.EVALUATION]
	for cam_type, sampler, evaluation_mode in product(
	CAMERA_TYPES, samplers, evaluation_modes
	):
	cameras = init_random_cameras(cam_type, batch_size, random_z=True)
	ray_bundle = sampler(cameras, evaluation_mode)

	lengths = 0.5 * (ray_bundle.bins[..., :-1] + ray_bundle.bins[..., 1:])

	self.assertClose(ray_bundle.lengths, lengths)
	shape_out = (
	(batch_size, image_width, image_height)
	if evaluation_mode == EvaluationMode.EVALUATION
	else (batch_size, n_rays_per_image, 1)
	)
	self.assertEqual(ray_bundle.pixel_radii_2d.shape, (*shape_out, 1))
	self.assertEqual(ray_bundle.directions.shape, (*shape_out, 3))
	self.assertEqual(ray_bundle.origins.shape, (*shape_out, 3))


	# Helper to test compute_radii
	def compute_pixel_radii_from_grid(pixel_grid: torch.Tensor) -> torch.Tensor:
	"""
	Compute the radii of a conical frustum given the pixel grid.

	To compute the radii we first compute the translation from a pixel
	to its neighbors along the x and y axis. Then, we compute the norm
	of each translation along the x and y axis.
	The radii are then obtained by the following formula:

	(dx_norm + dy_norm) * 0.5 * 2 / 12**0.5

	where 2/12**0.5 is a scaling factor to match
	the variance of the pixel’s footprint.

	Args:
	pixel_grid: A tensor of shape `(..., H, W, dim)` representing the
	full grid of rays pixel_grid.

	Returns:
	The radiis for each pixels and shape `(..., H, W, 1)`.
	"""
	# [B, H, W - 1, 3]
	x_translation = torch.diff(pixel_grid, dim=-2)
	# [B, H - 1, W, 3]
	y_translation = torch.diff(pixel_grid, dim=-3)
	# [B, H, W - 1, 1]
	dx_norm = torch.linalg.norm(x_translation, dim=-1, keepdim=True)
	# [B, H - 1, W, 1]
	dy_norm = torch.linalg.norm(y_translation, dim=-1, keepdim=True)

	# Fill the missing value [B, H, W, 1]
	dx_norm = torch.concatenate([dx_norm, dx_norm[..., -1:, :]], -2)
	dy_norm = torch.concatenate([dy_norm, dy_norm[..., -1:, :, :]], -3)

	# Cut the distance in half to obtain the base radius: (dx_norm + dy_norm) * 0.5
	# and multiply it by the scaling factor: * 2 / 12**0.5
	radii = (dx_norm + dy_norm) / 12**0.5
	return radii


	class TestRadiiComputationOnFullGrid(TestCaseMixin, unittest.TestCase):
	def test_compute_pixel_radii_from_grid(self):
	pixel_grid = torch.tensor(
	[
	[[0.0, 0, 0], [1.0, 0.0, 0], [3.0, 0.0, 0.0]],
	[[0.0, 0.25, 0], [1.0, 0.25, 0], [3.0, 0.25, 0]],
	[[0.0, 1, 0], [1.0, 1.0, 0], [3.0000, 1.0, 0]],
	]
	)

	expected_y_norm = torch.tensor(
	[
	[0.25, 0.25, 0.25],
	[0.75, 0.75, 0.75],
	[0.75, 0.75, 0.75], # duplicated from previous row
	]
	)
	expected_x_norm = torch.tensor(
	[
	# 3rd column is duplicated from 2nd
	[1.0, 2.0, 2.0],
	[1.0, 2.0, 2.0],
	[1.0, 2.0, 2.0],
	]
	)
	expected_radii = (expected_x_norm + expected_y_norm) / 12**0.5
	radii = compute_pixel_radii_from_grid(pixel_grid)
	self.assertClose(radii, expected_radii[..., None])