| |
| |
| |
| |
| |
|
|
|
|
| import contextlib |
| import dataclasses |
| import itertools |
| import math |
| import os |
| import unittest |
|
|
| import lpips |
| import numpy as np |
| import torch |
|
|
| from pytorch3d.implicitron.dataset.frame_data import FrameData |
| from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset |
| from pytorch3d.implicitron.evaluation.evaluate_new_view_synthesis import eval_batch |
| from pytorch3d.implicitron.models.base_model import ImplicitronModelBase |
| from pytorch3d.implicitron.models.generic_model import GenericModel |
| from pytorch3d.implicitron.models.model_dbir import ModelDBIR |
| from pytorch3d.implicitron.tools.config import expand_args_fields, registry |
| from pytorch3d.implicitron.tools.metric_utils import calc_psnr, eval_depth |
| from pytorch3d.implicitron.tools.utils import dataclass_to_cuda_ |
|
|
| from .common_resources import get_skateboard_data, provide_lpips_vgg |
|
|
|
|
| class TestEvaluation(unittest.TestCase): |
| def setUp(self): |
| |
| torch.manual_seed(42) |
|
|
| stack = contextlib.ExitStack() |
| dataset_root, path_manager = stack.enter_context(get_skateboard_data()) |
| self.addCleanup(stack.close) |
|
|
| category = "skateboard" |
| frame_file = os.path.join(dataset_root, category, "frame_annotations.jgz") |
| sequence_file = os.path.join(dataset_root, category, "sequence_annotations.jgz") |
| self.image_size = 64 |
| expand_args_fields(JsonIndexDataset) |
| self.dataset = JsonIndexDataset( |
| frame_annotations_file=frame_file, |
| sequence_annotations_file=sequence_file, |
| dataset_root=dataset_root, |
| image_height=self.image_size, |
| image_width=self.image_size, |
| box_crop=True, |
| remove_empty_masks=False, |
| path_manager=path_manager, |
| ) |
| self.bg_color = (0.0, 0.0, 0.0) |
|
|
| |
| provide_lpips_vgg() |
| self.lpips_model = lpips.LPIPS(net="vgg").cuda() |
|
|
| def test_eval_depth(self): |
| """ |
| Check that eval_depth correctly masks errors and that, for get_best_scale=True, |
| the error with scaled prediction equals the error without scaling the |
| predicted depth. Finally, test that the error values are as expected |
| for prediction and gt differing by a constant offset. |
| """ |
| gt = (torch.randn(10, 1, 300, 400, device="cuda") * 5.0).clamp(0.0) |
| mask = (torch.rand_like(gt) > 0.5).type_as(gt) |
|
|
| for diff in 10 ** torch.linspace(-5, 0, 6): |
| for crop in (0, 5): |
|
|
| pred = gt + (torch.rand_like(gt) - 0.5) * 2 * diff |
|
|
| |
| mse_depth, abs_depth = eval_depth( |
| pred, |
| gt, |
| crop=crop, |
| mask=mask, |
| get_best_scale=True, |
| ) |
| mse_depth_scale, abs_depth_scale = eval_depth( |
| pred * 10.0, |
| gt, |
| crop=crop, |
| mask=mask, |
| get_best_scale=True, |
| ) |
| self.assertAlmostEqual( |
| float(mse_depth.sum()), float(mse_depth_scale.sum()), delta=1e-4 |
| ) |
| self.assertAlmostEqual( |
| float(abs_depth.sum()), float(abs_depth_scale.sum()), delta=1e-4 |
| ) |
|
|
| |
| pred_masked_err = gt + (torch.rand_like(gt) + diff) * (1 - mask) |
| mse_depth_masked, abs_depth_masked = eval_depth( |
| pred_masked_err, |
| gt, |
| crop=crop, |
| mask=mask, |
| get_best_scale=True, |
| ) |
| self.assertAlmostEqual( |
| float(mse_depth_masked.sum()), float(0.0), delta=1e-4 |
| ) |
| self.assertAlmostEqual( |
| float(abs_depth_masked.sum()), float(0.0), delta=1e-4 |
| ) |
| mse_depth_unmasked, abs_depth_unmasked = eval_depth( |
| pred_masked_err, |
| gt, |
| crop=crop, |
| mask=1 - mask, |
| get_best_scale=True, |
| ) |
| self.assertGreater( |
| float(mse_depth_unmasked.sum()), |
| float(diff**2), |
| ) |
| self.assertGreater( |
| float(abs_depth_unmasked.sum()), |
| float(diff), |
| ) |
|
|
| |
| pred_fix_diff = gt + diff * mask |
| for _mask_gt in (mask, None): |
| mse_depth_fix_diff, abs_depth_fix_diff = eval_depth( |
| pred_fix_diff, |
| gt, |
| crop=crop, |
| mask=_mask_gt, |
| get_best_scale=False, |
| ) |
| if _mask_gt is not None: |
| expected_err_abs = diff |
| expected_err_mse = diff**2 |
| else: |
| err_mask = (gt > 0.0).float() * mask |
| if crop > 0: |
| err_mask = err_mask[:, :, crop:-crop, crop:-crop] |
| gt_cropped = gt[:, :, crop:-crop, crop:-crop] |
| else: |
| gt_cropped = gt |
| gt_mass = (gt_cropped > 0.0).float().sum(dim=(1, 2, 3)) |
| expected_err_abs = ( |
| diff * err_mask.sum(dim=(1, 2, 3)) / (gt_mass) |
| ) |
| expected_err_mse = diff * expected_err_abs |
| self.assertTrue( |
| torch.allclose( |
| abs_depth_fix_diff, |
| expected_err_abs * torch.ones_like(abs_depth_fix_diff), |
| atol=1e-4, |
| ) |
| ) |
| self.assertTrue( |
| torch.allclose( |
| mse_depth_fix_diff, |
| expected_err_mse * torch.ones_like(mse_depth_fix_diff), |
| atol=1e-4, |
| ) |
| ) |
|
|
| def test_psnr(self): |
| """ |
| Compare against opencv and check that the psnr is above |
| the minimum possible value. |
| """ |
| import cv2 |
|
|
| im1 = torch.rand(100, 3, 256, 256).cuda() |
| im1_uint8 = (im1 * 255).to(torch.uint8) |
| im1_rounded = im1_uint8.float() / 255 |
| for max_diff in 10 ** torch.linspace(-5, 0, 6): |
| im2 = im1 + (torch.rand_like(im1) - 0.5) * 2 * max_diff |
| im2 = im2.clamp(0.0, 1.0) |
| im2_uint8 = (im2 * 255).to(torch.uint8) |
| im2_rounded = im2_uint8.float() / 255 |
| |
| psnr = calc_psnr(im1_rounded, im2_rounded) |
| |
| psnr_cv2 = cv2.PSNR( |
| im1_uint8.cpu().numpy(), |
| im2_uint8.cpu().numpy(), |
| ) |
| self.assertAlmostEqual(float(psnr), float(psnr_cv2), delta=1e-4) |
| |
| max_mse = max_diff**2 |
| min_psnr = 10 * math.log10(1.0 / max_mse) |
| for _im1, _im2 in zip(im1, im2): |
| _psnr = calc_psnr(_im1, _im2) |
| self.assertGreaterEqual(float(_psnr) + 1e-6, min_psnr) |
|
|
| def _one_sequence_test( |
| self, |
| seq_dataset, |
| model, |
| batch_indices, |
| check_metrics=False, |
| ): |
| loader = torch.utils.data.DataLoader( |
| seq_dataset, |
| shuffle=False, |
| batch_sampler=batch_indices, |
| collate_fn=FrameData.collate, |
| ) |
|
|
| for frame_data in loader: |
| self.assertIsNone(frame_data.frame_type) |
| self.assertIsNotNone(frame_data.image_rgb) |
| |
| frame_data.frame_type = [ |
| "train_unseen", |
| *(["train_known"] * (len(frame_data.image_rgb) - 1)), |
| ] |
|
|
| frame_data = dataclass_to_cuda_(frame_data) |
| preds = model(**dataclasses.asdict(frame_data)) |
|
|
| eval_result = eval_batch( |
| frame_data, |
| preds["implicitron_render"], |
| bg_color=self.bg_color, |
| lpips_model=self.lpips_model, |
| ) |
|
|
| if check_metrics: |
| self._check_metrics( |
| frame_data, preds["implicitron_render"], eval_result |
| ) |
|
|
| def _check_metrics(self, frame_data, implicitron_render, eval_result): |
| |
| |
| implicitron_render_bad = implicitron_render.clone() |
| implicitron_render_bad.depth_render += ( |
| torch.randn_like(implicitron_render_bad.depth_render) * 100.0 |
| ) |
| implicitron_render_bad.image_render += ( |
| torch.randn_like(implicitron_render_bad.image_render) * 100.0 |
| ) |
| implicitron_render_bad.mask_render = ( |
| torch.randn_like(implicitron_render_bad.mask_render) > 0.0 |
| ).float() |
| eval_result_bad = eval_batch( |
| frame_data, |
| implicitron_render_bad, |
| bg_color=self.bg_color, |
| lpips_model=self.lpips_model, |
| ) |
|
|
| lower_better = { |
| "psnr_masked": False, |
| "psnr_fg": False, |
| "psnr_full_image": False, |
| "depth_abs_fg": True, |
| "iou": False, |
| "rgb_l1_masked": True, |
| "rgb_l1_fg": True, |
| "lpips_masked": True, |
| "lpips_full_image": True, |
| } |
|
|
| for metric in lower_better: |
| m_better = eval_result[metric] |
| m_worse = eval_result_bad[metric] |
| if np.isnan(m_better) or np.isnan(m_worse): |
| continue |
| _assert = ( |
| self.assertLessEqual |
| if lower_better[metric] |
| else self.assertGreaterEqual |
| ) |
| _assert(m_better, m_worse) |
|
|
| def _get_random_batch_indices( |
| self, seq_dataset, n_batches=2, min_batch_size=5, max_batch_size=10 |
| ): |
| batch_indices = [] |
| for _ in range(n_batches): |
| batch_size = torch.randint( |
| low=min_batch_size, high=max_batch_size, size=(1,) |
| ) |
| batch_indices.append(torch.randperm(len(seq_dataset))[:batch_size]) |
|
|
| return batch_indices |
|
|
| def test_full_eval(self, n_sequences=5): |
| """Test evaluation.""" |
|
|
| |
| seq_datasets = {} |
| batch_indices = {} |
| for seq in itertools.islice(self.dataset.sequence_names(), n_sequences): |
| idx = list(self.dataset.sequence_indices_in_order(seq)) |
| seq_dataset = torch.utils.data.Subset(self.dataset, idx) |
| seq_datasets[seq] = seq_dataset |
| batch_indices[seq] = self._get_random_batch_indices(seq_dataset) |
|
|
| for model_class_type in ["ModelDBIR", "GenericModel"]: |
| ModelClass = registry.get(ImplicitronModelBase, model_class_type) |
| expand_args_fields(ModelClass) |
| model = ModelClass( |
| render_image_width=self.image_size, |
| render_image_height=self.image_size, |
| bg_color=self.bg_color, |
| ) |
| model.eval() |
| model.cuda() |
|
|
| for seq in itertools.islice(self.dataset.sequence_names(), n_sequences): |
| self._one_sequence_test( |
| seq_datasets[seq], |
| model, |
| batch_indices[seq], |
| check_metrics=(model_class_type == "ModelDBIR"), |
| ) |
|
|