Refactor VideoEncoder to initialize from VideoEncoderModelConfig, enhancing configuration management. Add methods for weight sanitization and loading from pretrained models, improving model usability and integration with existing workflows.

This commit is contained in:
Prince Canuma
2026-01-23 17:59:57 +01:00
parent f8f78aeab5
commit ce39e744c3
3 changed files with 110 additions and 45 deletions

View File

@@ -277,12 +277,12 @@ class VideoDecoderModelConfig(BaseModelConfig):
@dataclass
class VideoEncoderModelConfig(BaseModelConfig):
convolution_dimensions: int = 3
in_channels : int = 3,
out_channels: int = 128,
patch_size: int = 4,
norm_layer: Enum = None,
latent_log_var: Enum = None,
encoder_spatial_padding_mode: Enum = None,
in_channels: int = 3
out_channels: int = 128
patch_size: int = 4
norm_layer: Enum = None
latent_log_var: Enum = None
encoder_spatial_padding_mode: Enum = None
encoder_blocks: List[tuple] = field(default_factory=lambda: [("res_x", {"num_layers": 4}),
("compress_space_res", {"multiplier": 2}),
("res_x", {"num_layers": 6}),