More poodles
This commit is contained in:
@@ -75,7 +75,6 @@ def generate_video(
|
||||
trim_first_frames: int = 0,
|
||||
debug_latents: bool = False,
|
||||
):
|
||||
|
||||
"""Generate video using Wan pipeline (supports T2V and I2V).
|
||||
|
||||
Args:
|
||||
@@ -108,7 +107,6 @@ def generate_video(
|
||||
discards first 4). Use 2 for more aggressive trimming. Default: 0.
|
||||
debug_latents: If True, print per-temporal-position latent statistics
|
||||
after denoising for diagnosing first-frame artifacts.
|
||||
|
||||
"""
|
||||
import json
|
||||
|
||||
@@ -494,6 +492,7 @@ def generate_video(
|
||||
print(f"\n{Colors.GREEN}Denoising ({steps} steps)...{Colors.RESET}")
|
||||
t3 = time.time()
|
||||
|
||||
# Compile model forward for faster denoising
|
||||
if not no_compile:
|
||||
models_to_compile = (
|
||||
[high_noise_model, low_noise_model] if is_dual else [single_model]
|
||||
@@ -501,9 +500,6 @@ def generate_video(
|
||||
for m in models_to_compile:
|
||||
m._compiled = mx.compile(m)
|
||||
|
||||
|
||||
|
||||
|
||||
# Pre-convert timesteps to Python list to avoid .item() sync each step
|
||||
timestep_list = sched.timesteps.tolist()
|
||||
|
||||
@@ -773,7 +769,6 @@ def main():
|
||||
"--debug-latents", action="store_true",
|
||||
help="Print per-temporal-position latent statistics after denoising (diagnostic)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Parse guide scale
|
||||
@@ -814,7 +809,6 @@ def main():
|
||||
no_compile=args.no_compile,
|
||||
trim_first_frames=args.trim_first_frames,
|
||||
debug_latents=args.debug_latents,
|
||||
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -146,12 +146,16 @@ For example, for using the the distilled [Wan2.2-Lightning](https://huggingface.
|
||||
python -m mlx_video.generate_wan \
|
||||
--model-dir /Volumes/SSD/Wan-AI/Wan2.2-T2V-A14B-MLX \
|
||||
--width 480 \
|
||||
--height 480 \
|
||||
--num-frames 121 \
|
||||
--prompt "Two dogs of the poodle breed sitting on a beach wearing sunglasses, close up, cinematic, sunset" \
|
||||
--height 704 \
|
||||
--num-frames 41 \
|
||||
--prompt "Two dogs of the poodle breed sitting on a beach wearing sunglasses, nodding with their heads, close up, cinematic, sunset" \
|
||||
--steps 4 \
|
||||
--guide-scale 1 \
|
||||
--trim-first-frames 1 \
|
||||
--seed 2391784614 \
|
||||
--lora-high /Volumes/SSD/Wan-AI/lightx2v/Wan2.2-Lightning/Wan2.2-T2V-A14B-4steps-lora-rank64-Seko-V2.0/high_noise_model.safetensors 1 \
|
||||
--lora-low /Volumes/SSD/Wan-AI/lightx2v/Wan2.2-Lightning/Wan2.2-T2V-A14B-4steps-lora-rank64-Seko-V2.0/low_noise_model.safetensors 1
|
||||
```
|
||||
|
||||
Which results in
|
||||

|
||||
@@ -1,5 +1,5 @@
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional, Tuple, Union
|
||||
from dataclasses import dataclass
|
||||
from typing import Tuple, Union
|
||||
|
||||
from mlx_video.models.ltx.config import BaseModelConfig
|
||||
|
||||
@@ -104,7 +104,7 @@ class WanModelConfig(BaseModelConfig):
|
||||
sample_shift=5.0,
|
||||
sample_guide_scale=(3.5, 3.5),
|
||||
max_area=704 * 1280,
|
||||
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def wan22_ti2v_5b(cls) -> "WanModelConfig":
|
||||
@@ -126,4 +126,4 @@ class WanModelConfig(BaseModelConfig):
|
||||
sample_guide_scale=5.0,
|
||||
sample_fps=24,
|
||||
max_area=704 * 1280,
|
||||
|
||||
)
|
||||
|
||||
@@ -315,11 +315,6 @@ Applied alongside bug fixes to improve inference speed:
|
||||
- **Redundant type cast removal**: MLX type promotion handles `bfloat16 * float32 → float32` automatically — removed 240 unnecessary graph nodes per step (6 casts × 40 blocks)
|
||||
- **Euler scheduler sync fix**: Pre-store sigmas as Python floats to avoid `.item()` evaluation sync
|
||||
|
||||
### TeaCache Integration
|
||||
- Polynomial rescaling stays in MLX lazy graph (Horner's method)
|
||||
- Single `.item()` call on the accumulated distance for the skip/compute decision
|
||||
- Configurable threshold, retention steps, and cutoff steps
|
||||
|
||||
---
|
||||
|
||||
## Resolved: CFG Effectiveness (was Open Investigation)
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import math
|
||||
|
||||
import mlx.core as mx
|
||||
import mlx.nn as nn
|
||||
import numpy as np
|
||||
@@ -354,7 +353,6 @@ class WanModel(nn.Module):
|
||||
for i, sl in enumerate(seq_lens_list):
|
||||
attn_mask[i, :, :, sl:] = -1e9
|
||||
|
||||
|
||||
kwargs = dict(
|
||||
e=e0,
|
||||
seq_lens=seq_lens_list,
|
||||
|
||||
Reference in New Issue
Block a user