Added audio
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
# ltx2-mps
|
||||
|
||||
run [LTX-2](https://huggingface.co/Lightricks/LTX-2) video generation on mac using MPS (metal).
|
||||
run [LTX-2](https://huggingface.co/Lightricks/LTX-2) video + audio generation on mac using MPS (metal).
|
||||
|
||||
## what's this about
|
||||
|
||||
@@ -53,6 +53,7 @@ python generate.py "a cat walking through grass" -o output.mp4
|
||||
| `--fps` | 24 | output fps |
|
||||
| `--seed` | random | seed for reproducibility |
|
||||
| `-n` | "" | negative prompt |
|
||||
| `--no-audio` | false | disable audio generation |
|
||||
|
||||
### examples
|
||||
|
||||
|
||||
54
generate.py
54
generate.py
@@ -8,10 +8,10 @@ usage: python generate.py "your prompt" -o output.mp4
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
import imageio
|
||||
import numpy as np
|
||||
import torch
|
||||
from diffusers import LTX2Pipeline
|
||||
from diffusers.pipelines.ltx2.export_utils import encode_video
|
||||
|
||||
|
||||
def main():
|
||||
@@ -26,8 +26,7 @@ def main():
|
||||
parser.add_argument("--frames", type=int, default=25, help="frame count")
|
||||
parser.add_argument("--fps", type=int, default=24, help="output fps")
|
||||
parser.add_argument("--seed", type=int, default=None, help="random seed")
|
||||
parser.add_argument("--crf", type=int, default=10, help="video quality (0-51, lower=better)")
|
||||
parser.add_argument("--prores", action="store_true", help="use prores codec (large files, best quality)")
|
||||
parser.add_argument("--no-audio", action="store_true", help="disable audio generation")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -63,7 +62,7 @@ def main():
|
||||
generator = torch.Generator(device="cpu")
|
||||
generator.manual_seed(args.seed)
|
||||
|
||||
print(f"\ngenerating...")
|
||||
print(f"\ngenerating{'...' if args.no_audio else ' with audio...'}")
|
||||
print(f" prompt: {args.prompt}")
|
||||
print(f" size: {args.width}x{args.height}, {args.frames} frames")
|
||||
print(f" steps: {args.steps}, guidance: {args.guidance}")
|
||||
@@ -81,39 +80,26 @@ def main():
|
||||
generator=generator,
|
||||
)
|
||||
|
||||
# get video frames as tensor
|
||||
video_frames = result.frames[0]
|
||||
video_tensor = torch.stack([torch.from_numpy(np.array(f)) for f in video_frames])
|
||||
|
||||
# convert to uint8 numpy arrays
|
||||
frames = []
|
||||
for frame in video_frames:
|
||||
frame = np.array(frame, dtype=np.uint8)
|
||||
frames.append(frame)
|
||||
# get audio if available
|
||||
audio = None
|
||||
audio_sample_rate = None
|
||||
if not args.no_audio and result.audio is not None:
|
||||
audio = result.audio[0].float().cpu()
|
||||
audio_sample_rate = pipe.vocoder.config.output_sampling_rate
|
||||
print(f"audio generated ({audio_sample_rate}Hz)")
|
||||
|
||||
# export video
|
||||
if args.prores:
|
||||
output_path = args.output.replace('.mp4', '.mov') if args.output.endswith('.mp4') else args.output
|
||||
writer = imageio.get_writer(
|
||||
output_path,
|
||||
fps=args.fps,
|
||||
codec='prores_ks',
|
||||
pixelformat='yuv422p10le',
|
||||
output_params=['-profile:v', '3'] # prores hq
|
||||
)
|
||||
else:
|
||||
output_path = args.output
|
||||
writer = imageio.get_writer(
|
||||
output_path,
|
||||
fps=args.fps,
|
||||
codec='libx264',
|
||||
quality=None,
|
||||
pixelformat='yuv420p',
|
||||
output_params=['-crf', str(args.crf), '-preset', 'slow']
|
||||
)
|
||||
|
||||
for frame in frames:
|
||||
writer.append_data(frame)
|
||||
writer.close()
|
||||
args.output = output_path
|
||||
# export with audio
|
||||
encode_video(
|
||||
video=video_tensor,
|
||||
fps=args.fps,
|
||||
audio=audio,
|
||||
audio_sample_rate=audio_sample_rate,
|
||||
output_path=args.output
|
||||
)
|
||||
|
||||
print(f"\nsaved to: {args.output}")
|
||||
print(f"seed: {args.seed}")
|
||||
|
||||
@@ -7,5 +7,6 @@ safetensors>=0.4.0
|
||||
sentencepiece>=0.1.99
|
||||
imageio>=2.30.0
|
||||
imageio-ffmpeg>=0.4.9
|
||||
av>=10.0.0
|
||||
# Install diffusers from git for LTX2Pipeline:
|
||||
# pip install git+https://github.com/huggingface/diffusers.git
|
||||
|
||||
Reference in New Issue
Block a user