Spaces:

microsoft
/

VITRA

Running on Zero

App Files Files Community

VITRA / visualization /video_utils.py

arnoldland

Initial commit

aae3ba1 3 months ago

raw

history blame contribute delete

6.53 kB

	# Utils for video processing, frame manipulation, and visualization.
	import cv2
	import numpy as np
	from typing import List
	import imageio

	def rotate_frame(frame: np.ndarray) -> np.ndarray:
	"""Rotate a frame 90 degrees clockwise."""
	return cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)

	def center_crop_image(frame: np.ndarray, crop_percent: float = 1.0) -> np.ndarray:
	"""
	Center crop an image to a specified percentage of its original size.

	Args:
	frame (np.ndarray): Input image.
	crop_percent (float): Percentage of original size to crop to (0 < crop_percent <= 1).

	Returns:
	np.ndarray: Cropped image.
	"""
	if crop_percent == 1.0:
	return frame

	# Get original dimensions
	original_height, original_width = frame.shape[:2]

	# Calculate new dimensions
	new_width = int(original_width * crop_percent)
	new_height = int(original_height * crop_percent)

	# Calculate top-left corner of the cropping box
	start_x = (original_width - new_width) // 2
	start_y = (original_height - new_height) // 2

	# Perform the crop
	cropped_frame = frame[start_y:start_y + new_height, start_x:start_x + new_width]

	return cropped_frame

	def read_video_frames(
	cap,
	start_frame: int = None,
	end_frame: int = None,
	interval: int = 1,
	rotate: bool = False,
	crop_percent: float = 1.0
	) -> List[np.ndarray]:
	"""
	Read frames from a video capture object with optional rotation and cropping.

	Args:
	cap: OpenCV VideoCapture object.
	start_frame (int): Starting frame index.
	end_frame (int): Ending frame index.
	interval (int): Frame interval for sampling.
	rotate (bool): Whether to rotate frames 90 degrees clockwise.
	crop_percent (float): Center crop percentage.

	Returns:
	List[np.ndarray]: List of frames.
	"""
	frame_count = 0
	frame_list = []

	# If a start frame is specified, move the video pointer
	if start_frame is not None:
	cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
	frame_count += start_frame

	# If no end frame is specified, read until the end of the video
	if end_frame is None:
	end_frame = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

	# Read frames in a loop
	while frame_count < end_frame:
	ret, frame = cap.read()
	if not ret:
	break

	# Process frames at the specified interval
	if frame_count % interval == 0:

	# Optionally rotate the frame
	if rotate:
	frame = rotate_frame(frame)

	# Optionally center crop the frame
	frame = center_crop_image(frame, crop_percent=crop_percent)
	frame_list.append(frame)

	frame_count += 1

	return frame_list

	def save_to_video(frames: List[np.ndarray], output_path: str, fps: int = 30):
	"""
	Save a list of frames to a video file.

	Args:
	frames (List[np.ndarray]): List of frames.
	output_path (str): Output video path.
	fps (int): Frames per second.
	"""
	imageio.mimsave(output_path, frames, fps=fps, codec='libx264')

	def resize_frames_to_long_side(frames: List[np.ndarray], target_long_side: int) -> List[np.ndarray]:
	"""
	Resize frames so the longer side matches the target size, preserving aspect ratio.

	Args:
	frames (List[np.ndarray]): List of frames.
	target_long_side (int): Desired length of the longer side.

	Returns:
	List[np.ndarray]: Resized frames.
	"""

	# If target_long_side is None, return original frames without resizing
	if target_long_side is None:
	return frames

	resized_frames = []
	# Loop over each frame
	for frame in frames:
	height, width = frame.shape[:2]

	# Determine the scaling factor and new dimensions
	if width > height:
	scale_factor = target_long_side / width
	else:
	scale_factor = target_long_side / height

	new_width = int(width * scale_factor)
	new_height = int(height * scale_factor)

	# Resize the frame while maintaining the aspect ratio
	resized_frame = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_AREA)
	resized_frames.append(resized_frame)

	return resized_frames

	def sample_frames_evenly(video_frames: List[np.ndarray], num_frames: int) -> List[np.ndarray]:
	"""
	Sample frames evenly from a video sequence.

	Args:
	video_frames (List[np.ndarray]): List of frames.
	num_frames (int): Number of frames to sample.

	Returns:
	List[np.ndarray]: Sampled frames.
	"""
	total = len(video_frames)
	if num_frames >= total:
	return video_frames.copy()

	indices = np.linspace(0, total - 1, num=num_frames, dtype=int)
	return [video_frames[i] for i in indices]

	def wrap_text(text: str, max_width: int, font, font_scale: float) -> List[str]:
	"""
	Wraps text to fit within a given width.

	Args:
	text (str): The text to wrap.
	max_width (int): The maximum width in pixels.
	font: The font to use.
	font_scale (float): The scale of the font.

	Returns:
	List of lines of wrapped text.
	"""
	words = text.split(' ')
	lines = []
	current_line = ''

	for word in words:
	test_line = current_line + word + ' '
	# Measure the width of the line
	size = cv2.getTextSize(test_line, font, font_scale, 1)[0]
	if size[0] > max_width:
	lines.append(current_line.strip())
	current_line = word + ' '
	else:
	current_line = test_line

	if current_line:
	lines.append(current_line.strip())

	return lines

	def add_overlay_text(frame: np.ndarray, caption: str) -> np.ndarray:
	"""
	Add overlay text to a frame.

	Args:
	frame (np.ndarray): Input image.
	caption (str): Text to overlay.

	Returns:
	np.ndarray: Frame with text.
	"""

	w = frame.shape[1] # Width of the frame
	y0, dy = 30, 20 # Starting Y position and line height
	for i, line in enumerate(wrap_text(caption, w, cv2.FONT_HERSHEY_SIMPLEX, 1.0)):
	y = y0 + i * dy
	cv2.putText(frame, line, (10, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)
	return frame