BiliSakura
/

BitDance-ImageNet-diffusers

+import torch
+def time_shift_sana(t: torch.Tensor, flow_shift: float = 1., sigma: float = 1.):
+    return (1 / flow_shift) / ( (1 / flow_shift) + (1 / t - 1) ** sigma)
+def get_score_from_velocity(velocity, x, t):
+    alpha_t, d_alpha_t = t, 1
+    sigma_t, d_sigma_t = 1 - t, -1
+    mean = x
+    reverse_alpha_ratio = alpha_t / d_alpha_t
+    var = sigma_t**2 - reverse_alpha_ratio * d_sigma_t * sigma_t
+    score = (reverse_alpha_ratio * velocity - mean) / var
+    return score
+def get_velocity_from_cfg(velocity, cfg, cfg_mult):
+    if cfg_mult == 2:
+        cond_v, uncond_v = torch.chunk(velocity, 2, dim=0)
+        velocity = uncond_v + cfg * (cond_v - uncond_v)
+    return velocity
+@torch.compile()
+def euler_step(x, v, dt: float, cfg: float, cfg_mult: int):
+    with torch.amp.autocast("cuda", enabled=False):
+        v = v.to(torch.float32)
+        v = get_velocity_from_cfg(v, cfg, cfg_mult)
+        x = x + v * dt
+    return x
+@torch.compile()
+def euler_maruyama_step(x, v, t, dt: float, cfg: float, cfg_mult: int):
+    with torch.amp.autocast("cuda", enabled=False):
+        v = v.to(torch.float32)
+        v = get_velocity_from_cfg(v, cfg, cfg_mult)
+        score = get_score_from_velocity(v, x, t)
+        drift = v + (1 - t) * score
+        noise_scale = (2.0 * (1.0 - t) * dt) ** 0.5
+        x = x + drift * dt + noise_scale * torch.randn_like(x)
+    return x
+def euler_maruyama(
+    input_dim,
+    forward_fn,
+    c: torch.Tensor,
+    cfg: float = 1.0,
+    num_sampling_steps: int = 20,
+    last_step_size: float = 0.05,
+    time_shift: float = 1.,
+):
+    cfg_mult = 1
+    if cfg > 1.0:
+        cfg_mult += 1
+    x_shape = list(c.shape)
+    x_shape[0] = x_shape[0] // cfg_mult
+    x_shape[-1] = input_dim
+    x = torch.randn(x_shape, device=c.device)
+    # an = (1.0 - last_step_size) / num_sampling_steps
+    t_all = torch.linspace(0, 1-last_step_size, num_sampling_steps+1, device=c.device, dtype=torch.float32)
+    t_all = time_shift_sana(t_all, time_shift)
+    dt = t_all[1:] - t_all[:-1]
+    t = torch.tensor(
+        0.0, device=c.device, dtype=torch.float32
+    )  # use tensor to avoid compile warning
+    t_batch = torch.zeros(c.shape[0], device=c.device)
+    for i in range(num_sampling_steps):
+        t_batch[:] = t
+        combined = torch.cat([x] * cfg_mult, dim=0)
+        output = forward_fn(
+            combined,
+            t_batch,
+            c,
+        )
+        v = (output - combined) / (1 - t_batch.view(-1, 1)).clamp_min(0.05)
+        x = euler_maruyama_step(x, v, t, dt[i], cfg, cfg_mult)
+        t += dt[i]
+    combined = torch.cat([x] * cfg_mult, dim=0)
+    t_batch[:] = 1 - last_step_size
+    output = forward_fn(
+        combined,
+        t_batch,
+        c,
+    )
+    v = (output - combined) / (1 - t_batch.view(-1, 1)).clamp_min(0.05)
+    x = euler_step(x, v, last_step_size, cfg, cfg_mult)
+    return torch.cat([x] * cfg_mult, dim=0)
+def euler(
+    input_dim,
+    forward_fn,
+    c,
+    cfg: float = 1.0,
+    num_sampling_steps: int = 50,
+):
+    cfg_mult = 1
+    if cfg > 1.0:
+        cfg_mult = 2
+    x_shape = list(c.shape)
+    x_shape[0] = x_shape[0] // cfg_mult
+    x_shape[-1] = input_dim
+    x = torch.randn(x_shape, device=c.device)
+    dt = 1.0 / num_sampling_steps
+    t = 0
+    t_batch = torch.zeros(c.shape[0], device=c.device)
+    for _ in range(num_sampling_steps):
+        t_batch[:] = t
+        combined = torch.cat([x] * cfg_mult, dim=0)
+        v = forward_fn(combined, t_batch, c)
+        x = euler_step(x, v, dt, cfg, cfg_mult)
+        t += dt
+    return torch.cat([x] * cfg_mult, dim=0)