Refactor denoising functions in generate.py and utils.py to use float32 for improved precision, aligning with PyTorch behavior. Update calculations for latents and denoised outputs to ensure consistent dtype handling across audio and video processing.
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
import math
|
||||
from typing import Optional, Tuple, Union
|
||||
from typing import Optional, Union
|
||||
|
||||
import mlx.core as mx
|
||||
import mlx.nn as nn
|
||||
@@ -61,6 +61,9 @@ def to_denoised(
|
||||
Given noisy input x_t and velocity prediction v, compute denoised x_0:
|
||||
x_0 = x_t - sigma * v
|
||||
|
||||
Uses float32 for computation precision (matching PyTorch behavior),
|
||||
then converts back to input dtype.
|
||||
|
||||
Args:
|
||||
noisy: Noisy input tensor x_t
|
||||
velocity: Velocity prediction v
|
||||
@@ -69,16 +72,21 @@ def to_denoised(
|
||||
Returns:
|
||||
Denoised tensor x_0
|
||||
"""
|
||||
original_dtype = noisy.dtype
|
||||
|
||||
# Cast to float32 for precision (PyTorch uses calc_dtype=torch.float32)
|
||||
noisy_f32 = noisy.astype(mx.float32)
|
||||
velocity_f32 = velocity.astype(mx.float32)
|
||||
|
||||
if isinstance(sigma, (int, float)):
|
||||
# Convert to array with matching dtype to avoid float32 promotion
|
||||
sigma_arr = mx.array(sigma, dtype=velocity.dtype)
|
||||
return noisy - sigma_arr * velocity
|
||||
sigma_f32 = mx.array(sigma, dtype=mx.float32)
|
||||
else:
|
||||
# sigma is per-sample - ensure dtype matches
|
||||
sigma = sigma.astype(velocity.dtype)
|
||||
while sigma.ndim < velocity.ndim:
|
||||
sigma = mx.expand_dims(sigma, axis=-1)
|
||||
return noisy - sigma * velocity
|
||||
sigma_f32 = sigma.astype(mx.float32)
|
||||
while sigma_f32.ndim < velocity_f32.ndim:
|
||||
sigma_f32 = mx.expand_dims(sigma_f32, axis=-1)
|
||||
|
||||
result = noisy_f32 - sigma_f32 * velocity_f32
|
||||
return result.astype(original_dtype)
|
||||
|
||||
|
||||
def repeat_interleave(x: mx.array, repeats: int, axis: int = -1) -> mx.array:
|
||||
|
||||
Reference in New Issue
Block a user