OpenPeerAI
/

Cloud-Agents

distributed-learning

machine-learning

Model card Files Files and versions

Cloud-Agents / cloud_agents /tensor_ops.py

Mentors4EDU's picture

Upload 14 files

f2bab5e verified 6 months ago

history blame contribute delete

3.09 kB

	"""
	Tensor operations for distributed computing.
	"""
	import torch
	import numpy as np
	from typing import Dict, List, Optional, Union, Tuple

	class TensorOps:
	"""Utility class for distributed tensor operations."""

	@staticmethod
	def split_tensor(tensor: torch.Tensor, num_parts: int) -> List[torch.Tensor]:
	"""Split a tensor into multiple parts for distributed processing."""
	return torch.chunk(tensor, num_parts)

	@staticmethod
	def merge_tensors(tensors: List[torch.Tensor], dim: int = 0) -> torch.Tensor:
	"""Merge multiple tensors back into a single tensor."""
	return torch.cat(tensors, dim=dim)

	@staticmethod
	def average_gradients(gradients: List[Dict[str, torch.Tensor]]) -> Dict[str, torch.Tensor]:
	"""Average gradients from multiple workers."""
	avg_gradients = {}
	for key in gradients[0].keys():
	avg_gradients[key] = torch.mean(torch.stack([g[key] for g in gradients]), dim=0)
	return avg_gradients

	@staticmethod
	def serialize_tensor(tensor: torch.Tensor) -> Dict[str, Union[List, str]]:
	"""Serialize a tensor for storage/transmission."""
	return {
	'data': tensor.cpu().numpy().tolist(),
	'shape': list(tensor.shape),
	'dtype': str(tensor.dtype)
	}

	@staticmethod
	def deserialize_tensor(tensor_dict: Dict[str, Union[List, str]]) -> torch.Tensor:
	"""Deserialize a tensor from storage/transmission format."""
	data = np.array(tensor_dict['data'])
	shape = tensor_dict['shape']
	dtype = getattr(torch, tensor_dict['dtype'].split('.')[-1])
	return torch.tensor(data, dtype=dtype).reshape(shape)

	@staticmethod
	def gradient_clipping(gradients: Dict[str, torch.Tensor], max_norm: float) -> Dict[str, torch.Tensor]:
	"""Apply gradient clipping to prevent exploding gradients."""
	for k, v in gradients.items():
	if v is not None:
	torch.nn.utils.clip_grad_norm_(v, max_norm)
	return gradients

	@staticmethod
	def reduce_precision(tensor: torch.Tensor, bits: int = 16) -> torch.Tensor:
	"""Reduce tensor precision for efficient transmission."""
	if bits == 16:
	return tensor.half()
	elif bits == 32:
	return tensor.float()
	else:
	raise ValueError("Unsupported precision bits")

	@staticmethod
	def shard_tensor(tensor: torch.Tensor, shard_size: int) -> List[torch.Tensor]:
	"""Shard a tensor into smaller pieces for distributed processing."""
	return [tensor[i:i + shard_size] for i in range(0, tensor.size(0), shard_size)]

	@staticmethod
	def compute_parameter_norm(parameters: Dict[str, torch.Tensor]) -> float:
	"""Compute the total norm of all parameters."""
	total_norm = 0.0
	for param in parameters.values():
	total_norm += param.norm().item() ** 2
	return total_norm ** 0.5