| | """ |
| | MIT License |
| | |
| | Copyright (c) 2021 Wilson Yan |
| | |
| | Permission is hereby granted, free of charge, to any person obtaining a copy |
| | of this software and associated documentation files (the "Software"), to deal |
| | in the Software without restriction, including without limitation the rights |
| | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| | copies of the Software, and to permit persons to whom the Software is |
| | furnished to do so, subject to the following conditions: |
| | |
| | The above copyright notice and this permission notice shall be included in all |
| | copies or substantial portions of the Software. |
| | |
| | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| | SOFTWARE. |
| | |
| | |
| | This file is copied from https://github.com/wilson1yan/VideoGPT/blob/master/videogpt/utils.py |
| | We adapted it to Hugging Face AutoModel for easier model loading. |
| | """ |
| |
|
| |
|
| | |
| | |
| | def shift_dim(x, src_dim=-1, dest_dim=-1, make_contiguous=True): |
| | n_dims = len(x.shape) |
| | if src_dim < 0: |
| | src_dim = n_dims + src_dim |
| | if dest_dim < 0: |
| | dest_dim = n_dims + dest_dim |
| |
|
| | assert 0 <= src_dim < n_dims and 0 <= dest_dim < n_dims |
| |
|
| | dims = list(range(n_dims)) |
| | del dims[src_dim] |
| |
|
| | permutation = [] |
| | ctr = 0 |
| | for i in range(n_dims): |
| | if i == dest_dim: |
| | permutation.append(src_dim) |
| | else: |
| | permutation.append(dims[ctr]) |
| | ctr += 1 |
| | x = x.permute(permutation) |
| | if make_contiguous: |
| | x = x.contiguous() |
| | return x |
| |
|
| | |
| | |
| | |
| | |
| | |
| | def view_range(x, i, j, shape): |
| | shape = tuple(shape) |
| |
|
| | n_dims = len(x.shape) |
| | if i < 0: |
| | i = n_dims + i |
| |
|
| | if j is None: |
| | j = n_dims |
| | elif j < 0: |
| | j = n_dims + j |
| |
|
| | assert 0 <= i < j <= n_dims |
| |
|
| | x_shape = x.shape |
| | target_shape = x_shape[:i] + shape + x_shape[j:] |
| | return x.view(target_shape) |
| |
|
| | |
| | def tensor_slice(x, begin, size): |
| | assert all([b >= 0 for b in begin]) |
| | size = [l - b if s == -1 else s |
| | for s, b, l in zip(size, begin, x.shape)] |
| | assert all([s >= 0 for s in size]) |
| |
|
| | slices = [slice(b, b + s) for b, s in zip(begin, size)] |
| | return x[slices] |
| |
|
| |
|
| | import math |
| | import numpy as np |
| | import skvideo.io |
| | def save_video_grid(video, fname, nrow=None): |
| | b, c, t, h, w = video.shape |
| | video = video.permute(0, 2, 3, 4, 1) |
| | video = (video.cpu().numpy() * 255).astype('uint8') |
| |
|
| | if nrow is None: |
| | nrow = math.ceil(math.sqrt(b)) |
| | ncol = math.ceil(b / nrow) |
| | padding = 1 |
| | video_grid = np.zeros((t, (padding + h) * nrow + padding, |
| | (padding + w) * ncol + padding, c), dtype='uint8') |
| | for i in range(b): |
| | r = i // ncol |
| | c = i % ncol |
| |
|
| | start_r = (padding + h) * r |
| | start_c = (padding + w) * c |
| | video_grid[:, start_r:start_r + h, start_c:start_c + w] = video[i] |
| |
|
| | skvideo.io.vwrite(fname, video_grid, inputdict={'-r': '5'}) |
| | print('saved videos to', fname) |
| |
|
| |
|