| | |
| | import argparse |
| | import time |
| | import torch |
| | from mmcv import Config |
| | from mmcv.parallel import MMDataParallel |
| | from mmcv.runner import load_checkpoint, wrap_fp16_model |
| | import sys |
| | sys.path.append('.') |
| | from projects.mmdet3d_plugin.datasets.builder import build_dataloader |
| | from projects.mmdet3d_plugin.datasets import custom_build_dataset |
| | |
| | from mmdet3d.models import build_detector |
| | |
| |
|
| |
|
| | def parse_args(): |
| | parser = argparse.ArgumentParser(description='MMDet benchmark a model') |
| | parser.add_argument('config', help='test config file path') |
| | parser.add_argument('--checkpoint', default=None, help='checkpoint file') |
| | parser.add_argument('--samples', default=2000, help='samples to benchmark') |
| | parser.add_argument( |
| | '--log-interval', default=10, help='interval of logging') |
| | parser.add_argument( |
| | '--fuse-conv-bn', |
| | action='store_true', |
| | help='Whether to fuse conv and bn, this will slightly increase' |
| | 'the inference speed') |
| | args = parser.parse_args() |
| | return args |
| |
|
| |
|
| | def main(): |
| | args = parse_args() |
| |
|
| | cfg = Config.fromfile(args.config) |
| | |
| | if cfg.get('cudnn_benchmark', False): |
| | torch.backends.cudnn.benchmark = True |
| | cfg.model.pretrained = None |
| | cfg.data.test.test_mode = True |
| |
|
| | |
| | |
| | print(cfg.data.test) |
| | dataset = custom_build_dataset(cfg.data.test) |
| | data_loader = build_dataloader( |
| | dataset, |
| | samples_per_gpu=1, |
| | workers_per_gpu=cfg.data.workers_per_gpu, |
| | dist=False, |
| | shuffle=False) |
| |
|
| | |
| | cfg.model.train_cfg = None |
| | model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) |
| | fp16_cfg = cfg.get('fp16', None) |
| | if fp16_cfg is not None: |
| | wrap_fp16_model(model) |
| | if args.checkpoint is not None: |
| | load_checkpoint(model, args.checkpoint, map_location='cpu') |
| | |
| | |
| |
|
| | model = MMDataParallel(model, device_ids=[0]) |
| |
|
| | model.eval() |
| |
|
| | |
| | num_warmup = 5 |
| | pure_inf_time = 0 |
| |
|
| | |
| | for i, data in enumerate(data_loader): |
| | torch.cuda.synchronize() |
| | start_time = time.perf_counter() |
| | with torch.no_grad(): |
| | model(return_loss=False, rescale=True, **data) |
| |
|
| | torch.cuda.synchronize() |
| | elapsed = time.perf_counter() - start_time |
| |
|
| | if i >= num_warmup: |
| | pure_inf_time += elapsed |
| | if (i + 1) % args.log_interval == 0: |
| | fps = (i + 1 - num_warmup) / pure_inf_time |
| | print(f'Done image [{i + 1:<3}/ {args.samples}], ' |
| | f'fps: {fps:.1f} img / s') |
| |
|
| | if (i + 1) == args.samples: |
| | pure_inf_time += elapsed |
| | fps = (i + 1 - num_warmup) / pure_inf_time |
| | print(f'Overall fps: {fps:.1f} img / s') |
| | break |
| |
|
| |
|
| | if __name__ == '__main__': |
| | main() |
| |
|