import torch def time_shift_sana(t: torch.Tensor, flow_shift: float = 1., sigma: float = 1.): return (1 / flow_shift) / ( (1 / flow_shift) + (1 / t - 1) ** sigma) def get_score_from_velocity(velocity, x, t): alpha_t, d_alpha_t = t, 1 sigma_t, d_sigma_t = 1 - t, -1 mean = x reverse_alpha_ratio = alpha_t / d_alpha_t var = sigma_t**2 - reverse_alpha_ratio * d_sigma_t * sigma_t score = (reverse_alpha_ratio * velocity - mean) / var return score def get_velocity_from_cfg(velocity, cfg, cfg_mult): if cfg_mult == 2: cond_v, uncond_v = torch.chunk(velocity, 2, dim=0) velocity = uncond_v + cfg * (cond_v - uncond_v) return velocity @torch.compile() def euler_step(x, v, dt: float, cfg: float, cfg_mult: int): with torch.amp.autocast("cuda", enabled=False): v = v.to(torch.float32) v = get_velocity_from_cfg(v, cfg, cfg_mult) x = x + v * dt return x @torch.compile() def euler_maruyama_step(x, v, t, dt: float, cfg: float, cfg_mult: int): with torch.amp.autocast("cuda", enabled=False): v = v.to(torch.float32) v = get_velocity_from_cfg(v, cfg, cfg_mult) score = get_score_from_velocity(v, x, t) drift = v + (1 - t) * score noise_scale = (2.0 * (1.0 - t) * dt) ** 0.5 x = x + drift * dt + noise_scale * torch.randn_like(x) return x def euler_maruyama( input_dim, forward_fn, c: torch.Tensor, cfg: float = 1.0, num_sampling_steps: int = 20, last_step_size: float = 0.05, time_shift: float = 1., ): cfg_mult = 1 if cfg > 1.0: cfg_mult += 1 x_shape = list(c.shape) x_shape[0] = x_shape[0] // cfg_mult x_shape[-1] = input_dim x = torch.randn(x_shape, device=c.device) # an = (1.0 - last_step_size) / num_sampling_steps t_all = torch.linspace(0, 1-last_step_size, num_sampling_steps+1, device=c.device, dtype=torch.float32) t_all = time_shift_sana(t_all, time_shift) dt = t_all[1:] - t_all[:-1] t = torch.tensor( 0.0, device=c.device, dtype=torch.float32 ) # use tensor to avoid compile warning t_batch = torch.zeros(c.shape[0], device=c.device) for i in range(num_sampling_steps): t_batch[:] = t combined = torch.cat([x] * cfg_mult, dim=0) output = forward_fn( combined, t_batch, c, ) v = (output - combined) / (1 - t_batch.view(-1, 1)).clamp_min(0.05) x = euler_maruyama_step(x, v, t, dt[i], cfg, cfg_mult) t += dt[i] combined = torch.cat([x] * cfg_mult, dim=0) t_batch[:] = 1 - last_step_size output = forward_fn( combined, t_batch, c, ) v = (output - combined) / (1 - t_batch.view(-1, 1)).clamp_min(0.05) x = euler_step(x, v, last_step_size, cfg, cfg_mult) return torch.cat([x] * cfg_mult, dim=0) def euler( input_dim, forward_fn, c, cfg: float = 1.0, num_sampling_steps: int = 50, ): cfg_mult = 1 if cfg > 1.0: cfg_mult = 2 x_shape = list(c.shape) x_shape[0] = x_shape[0] // cfg_mult x_shape[-1] = input_dim x = torch.randn(x_shape, device=c.device) dt = 1.0 / num_sampling_steps t = 0 t_batch = torch.zeros(c.shape[0], device=c.device) for _ in range(num_sampling_steps): t_batch[:] = t combined = torch.cat([x] * cfg_mult, dim=0) v = forward_fn(combined, t_batch, c) x = euler_step(x, v, dt, cfg, cfg_mult) t += dt return torch.cat([x] * cfg_mult, dim=0)