Refactor VideoEncoder to initialize from VideoEncoderModelConfig, enhancing configuration management. Add methods for weight sanitization and loading from pretrained models, improving model usability and integration with existing workflows.

2026-01-23 17:59:57 +01:00
parent f8f78aeab5
commit ce39e744c3
3 changed files with 110 additions and 45 deletions
--- a/mlx_video/models/ltx/config.py
+++ b/mlx_video/models/ltx/config.py
@@ -277,12 +277,12 @@ class VideoDecoderModelConfig(BaseModelConfig):
@dataclass
 class VideoEncoderModelConfig(BaseModelConfig):
    convolution_dimensions: int = 3
-    in_channels : int = 3,
-    out_channels: int = 128,
-    patch_size: int = 4,
-    norm_layer: Enum = None,
-    latent_log_var: Enum = None,
-    encoder_spatial_padding_mode: Enum = None,
+    in_channels: int = 3
+    out_channels: int = 128
+    patch_size: int = 4
+    norm_layer: Enum = None
+    latent_log_var: Enum = None
+    encoder_spatial_padding_mode: Enum = None
    encoder_blocks: List[tuple] = field(default_factory=lambda: [("res_x", {"num_layers": 4}),
        ("compress_space_res", {"multiplier": 2}),
        ("res_x", {"num_layers": 6}),