Added audio
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
# ltx2-mps
|
# ltx2-mps
|
||||||
|
|
||||||
run [LTX-2](https://huggingface.co/Lightricks/LTX-2) video generation on mac using MPS (metal).
|
run [LTX-2](https://huggingface.co/Lightricks/LTX-2) video + audio generation on mac using MPS (metal).
|
||||||
|
|
||||||
## what's this about
|
## what's this about
|
||||||
|
|
||||||
@@ -53,6 +53,7 @@ python generate.py "a cat walking through grass" -o output.mp4
|
|||||||
| `--fps` | 24 | output fps |
|
| `--fps` | 24 | output fps |
|
||||||
| `--seed` | random | seed for reproducibility |
|
| `--seed` | random | seed for reproducibility |
|
||||||
| `-n` | "" | negative prompt |
|
| `-n` | "" | negative prompt |
|
||||||
|
| `--no-audio` | false | disable audio generation |
|
||||||
|
|
||||||
### examples
|
### examples
|
||||||
|
|
||||||
|
|||||||
48
generate.py
48
generate.py
@@ -8,10 +8,10 @@ usage: python generate.py "your prompt" -o output.mp4
|
|||||||
import argparse
|
import argparse
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import imageio
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from diffusers import LTX2Pipeline
|
from diffusers import LTX2Pipeline
|
||||||
|
from diffusers.pipelines.ltx2.export_utils import encode_video
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@@ -26,8 +26,7 @@ def main():
|
|||||||
parser.add_argument("--frames", type=int, default=25, help="frame count")
|
parser.add_argument("--frames", type=int, default=25, help="frame count")
|
||||||
parser.add_argument("--fps", type=int, default=24, help="output fps")
|
parser.add_argument("--fps", type=int, default=24, help="output fps")
|
||||||
parser.add_argument("--seed", type=int, default=None, help="random seed")
|
parser.add_argument("--seed", type=int, default=None, help="random seed")
|
||||||
parser.add_argument("--crf", type=int, default=10, help="video quality (0-51, lower=better)")
|
parser.add_argument("--no-audio", action="store_true", help="disable audio generation")
|
||||||
parser.add_argument("--prores", action="store_true", help="use prores codec (large files, best quality)")
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
@@ -63,7 +62,7 @@ def main():
|
|||||||
generator = torch.Generator(device="cpu")
|
generator = torch.Generator(device="cpu")
|
||||||
generator.manual_seed(args.seed)
|
generator.manual_seed(args.seed)
|
||||||
|
|
||||||
print(f"\ngenerating...")
|
print(f"\ngenerating{'...' if args.no_audio else ' with audio...'}")
|
||||||
print(f" prompt: {args.prompt}")
|
print(f" prompt: {args.prompt}")
|
||||||
print(f" size: {args.width}x{args.height}, {args.frames} frames")
|
print(f" size: {args.width}x{args.height}, {args.frames} frames")
|
||||||
print(f" steps: {args.steps}, guidance: {args.guidance}")
|
print(f" steps: {args.steps}, guidance: {args.guidance}")
|
||||||
@@ -81,40 +80,27 @@ def main():
|
|||||||
generator=generator,
|
generator=generator,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# get video frames as tensor
|
||||||
video_frames = result.frames[0]
|
video_frames = result.frames[0]
|
||||||
|
video_tensor = torch.stack([torch.from_numpy(np.array(f)) for f in video_frames])
|
||||||
|
|
||||||
# convert to uint8 numpy arrays
|
# get audio if available
|
||||||
frames = []
|
audio = None
|
||||||
for frame in video_frames:
|
audio_sample_rate = None
|
||||||
frame = np.array(frame, dtype=np.uint8)
|
if not args.no_audio and result.audio is not None:
|
||||||
frames.append(frame)
|
audio = result.audio[0].float().cpu()
|
||||||
|
audio_sample_rate = pipe.vocoder.config.output_sampling_rate
|
||||||
|
print(f"audio generated ({audio_sample_rate}Hz)")
|
||||||
|
|
||||||
# export video
|
# export with audio
|
||||||
if args.prores:
|
encode_video(
|
||||||
output_path = args.output.replace('.mp4', '.mov') if args.output.endswith('.mp4') else args.output
|
video=video_tensor,
|
||||||
writer = imageio.get_writer(
|
|
||||||
output_path,
|
|
||||||
fps=args.fps,
|
fps=args.fps,
|
||||||
codec='prores_ks',
|
audio=audio,
|
||||||
pixelformat='yuv422p10le',
|
audio_sample_rate=audio_sample_rate,
|
||||||
output_params=['-profile:v', '3'] # prores hq
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
output_path=args.output
|
output_path=args.output
|
||||||
writer = imageio.get_writer(
|
|
||||||
output_path,
|
|
||||||
fps=args.fps,
|
|
||||||
codec='libx264',
|
|
||||||
quality=None,
|
|
||||||
pixelformat='yuv420p',
|
|
||||||
output_params=['-crf', str(args.crf), '-preset', 'slow']
|
|
||||||
)
|
)
|
||||||
|
|
||||||
for frame in frames:
|
|
||||||
writer.append_data(frame)
|
|
||||||
writer.close()
|
|
||||||
args.output = output_path
|
|
||||||
|
|
||||||
print(f"\nsaved to: {args.output}")
|
print(f"\nsaved to: {args.output}")
|
||||||
print(f"seed: {args.seed}")
|
print(f"seed: {args.seed}")
|
||||||
|
|
||||||
|
|||||||
@@ -7,5 +7,6 @@ safetensors>=0.4.0
|
|||||||
sentencepiece>=0.1.99
|
sentencepiece>=0.1.99
|
||||||
imageio>=2.30.0
|
imageio>=2.30.0
|
||||||
imageio-ffmpeg>=0.4.9
|
imageio-ffmpeg>=0.4.9
|
||||||
|
av>=10.0.0
|
||||||
# Install diffusers from git for LTX2Pipeline:
|
# Install diffusers from git for LTX2Pipeline:
|
||||||
# pip install git+https://github.com/huggingface/diffusers.git
|
# pip install git+https://github.com/huggingface/diffusers.git
|
||||||
|
|||||||
Reference in New Issue
Block a user