Refactor video generation and model loading processes to utilize from_pretrained methods for VideoEncoder and VideoDecoder. Update denoising functions to include a cfg_rescale parameter for improved artifact reduction. Ensure consistent dtype handling across audio and video processing, enhancing precision and aligning with PyTorch behavior.
This commit is contained in:
@@ -10,24 +10,16 @@ from mlx_video.convert import (
|
||||
|
||||
# Audio VAE components
|
||||
from mlx_video.models.ltx.audio_vae import (
|
||||
AudioEncoder,
|
||||
AudioDecoder,
|
||||
Vocoder,
|
||||
AudioProcessor,
|
||||
decode_audio,
|
||||
)
|
||||
|
||||
# Patchifiers
|
||||
from mlx_video.components.patchifiers import (
|
||||
VideoLatentPatchifier,
|
||||
AudioPatchifier,
|
||||
VideoLatentShape,
|
||||
AudioLatentShape,
|
||||
PerChannelStatistics,
|
||||
)
|
||||
|
||||
# Conditioning
|
||||
from mlx_video.conditioning import (
|
||||
VideoConditionByKeyframeIndex,
|
||||
VideoConditionByLatentIndex,
|
||||
)
|
||||
|
||||
@@ -43,17 +35,12 @@ __all__ = [
|
||||
"sanitize_audio_vae_weights",
|
||||
"sanitize_vocoder_weights",
|
||||
# Audio VAE
|
||||
"AudioEncoder",
|
||||
"AudioDecoder",
|
||||
"Vocoder",
|
||||
"AudioProcessor",
|
||||
"decode_audio",
|
||||
# Patchifiers
|
||||
"VideoLatentPatchifier",
|
||||
"AudioPatchifier",
|
||||
"VideoLatentShape",
|
||||
"AudioLatentShape",
|
||||
"PerChannelStatistics",
|
||||
# Conditioning
|
||||
"VideoConditionByKeyframeIndex",
|
||||
"VideoConditionByLatentIndex",
|
||||
]
|
||||
Reference in New Issue
Block a user