Update app.py
Browse files
app.py
CHANGED
|
@@ -188,8 +188,8 @@ class LTX23DistilledA2VPipeline(DistilledPipeline):
|
|
| 188 |
stage_1_output_shape = VideoPixelShape(
|
| 189 |
batch=1,
|
| 190 |
frames=num_frames,
|
| 191 |
-
width=width
|
| 192 |
-
height=height
|
| 193 |
fps=frame_rate,
|
| 194 |
)
|
| 195 |
stage_1_conditionings = combined_image_conditionings(
|
|
@@ -213,39 +213,6 @@ class LTX23DistilledA2VPipeline(DistilledPipeline):
|
|
| 213 |
initial_audio_latent=encoded_audio_latent,
|
| 214 |
)
|
| 215 |
|
| 216 |
-
torch.cuda.synchronize()
|
| 217 |
-
cleanup_memory()
|
| 218 |
-
|
| 219 |
-
upscaled_video_latent = upsample_video(
|
| 220 |
-
latent=video_state.latent[:1],
|
| 221 |
-
video_encoder=video_encoder,
|
| 222 |
-
upsampler=self.model_ledger.spatial_upsampler(),
|
| 223 |
-
)
|
| 224 |
-
stage_2_sigmas = torch.tensor(STAGE_2_DISTILLED_SIGMA_VALUES, device=self.device)
|
| 225 |
-
stage_2_output_shape = VideoPixelShape(batch=1, frames=num_frames, width=width, height=height, fps=frame_rate)
|
| 226 |
-
stage_2_conditionings = combined_image_conditionings(
|
| 227 |
-
images=images,
|
| 228 |
-
height=stage_2_output_shape.height,
|
| 229 |
-
width=stage_2_output_shape.width,
|
| 230 |
-
video_encoder=video_encoder,
|
| 231 |
-
dtype=dtype,
|
| 232 |
-
device=self.device,
|
| 233 |
-
)
|
| 234 |
-
video_state = denoise_video_only(
|
| 235 |
-
output_shape=stage_2_output_shape,
|
| 236 |
-
conditionings=stage_2_conditionings,
|
| 237 |
-
noiser=noiser,
|
| 238 |
-
sigmas=stage_2_sigmas,
|
| 239 |
-
stepper=stepper,
|
| 240 |
-
denoising_loop_fn=denoising_loop,
|
| 241 |
-
components=self.pipeline_components,
|
| 242 |
-
dtype=dtype,
|
| 243 |
-
device=self.device,
|
| 244 |
-
noise_scale=stage_2_sigmas[0],
|
| 245 |
-
initial_video_latent=upscaled_video_latent,
|
| 246 |
-
initial_audio_latent=encoded_audio_latent,
|
| 247 |
-
)
|
| 248 |
-
|
| 249 |
torch.cuda.synchronize()
|
| 250 |
del transformer
|
| 251 |
del video_encoder
|
|
|
|
| 188 |
stage_1_output_shape = VideoPixelShape(
|
| 189 |
batch=1,
|
| 190 |
frames=num_frames,
|
| 191 |
+
width=width,
|
| 192 |
+
height=height,
|
| 193 |
fps=frame_rate,
|
| 194 |
)
|
| 195 |
stage_1_conditionings = combined_image_conditionings(
|
|
|
|
| 213 |
initial_audio_latent=encoded_audio_latent,
|
| 214 |
)
|
| 215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
torch.cuda.synchronize()
|
| 217 |
del transformer
|
| 218 |
del video_encoder
|