#!/usr/bin/env python3 """ clipmaker - high quality video clip generator for ltx-2 usage: clipmaker "your prompt here" # quick preview clipmaker "your prompt" --preset hq # high quality clipmaker "your prompt" --preset max # maximum quality clipmaker --batch prompts.txt # batch from file clipmaker --interactive # interactive mode """ import argparse import os import sys import json from datetime import datetime from pathlib import Path import numpy as np import torch from PIL import Image # quality presets PRESETS = { "preview": { "width": 512, "height": 320, "frames": 25, "steps": 10, "guidance": 4.0, "description": "fast preview (~1 min)" }, "standard": { "width": 768, "height": 448, "frames": 49, "steps": 20, "guidance": 4.0, "description": "balanced quality (~5 min)" }, "hq": { "width": 1024, "height": 576, "frames": 97, "steps": 25, "guidance": 4.0, "description": "high quality (~15 min)" }, "max": { "width": 1024, "height": 576, "frames": 161, "steps": 30, "guidance": 4.0, "description": "maximum quality (~30 min)" }, "cinematic": { "width": 1280, "height": 720, "frames": 97, "steps": 30, "guidance": 4.5, "description": "cinematic 720p (~25 min)" }, } # default negative prompt based on ltx-2 guide DEFAULT_NEGATIVE = "blurry, low quality, distorted, deformed, ugly, bad anatomy, text, watermark, signature, out of frame" # prompt enhancement tips PROMPT_TIPS = """ prompt tips (from ltx-2 guide): - write as flowing paragraph, 4-8 sentences - include: shot type, lighting, action, camera movement, audio - use cinematography terms: dolly, pan, track, handheld, close-up - describe sounds and dialogue in "quotes" - use present tense for actions example: "A cinematic medium shot of a coffee cup on a wooden table, steam rising gently in soft morning light. The camera slowly pushes in as a hand reaches into frame to lift the cup. Warm ambient cafe sounds and soft jazz play in the background. Shallow depth of field, golden hour lighting." """ class ClipMaker: def __init__(self, output_dir="~/Desktop/clips"): self.output_dir = Path(output_dir).expanduser() self.output_dir.mkdir(parents=True, exist_ok=True) self.pipe = None self.device = None def load_model(self): """load the ltx-2 pipeline""" if self.pipe is not None: return from diffusers import LTX2Pipeline self.device = "mps" if torch.backends.mps.is_available() else "cpu" print(f"device: {self.device}") print("loading ltx-2 model...") self.pipe = LTX2Pipeline.from_pretrained( "Lightricks/LTX-2", torch_dtype=torch.bfloat16 ) self.pipe.to(self.device) print("model ready\n") def generate(self, prompt, preset="standard", negative_prompt=None, seed=None, output_path=None, no_audio=False): """generate a video clip""" from diffusers.pipelines.ltx2.export_utils import encode_video self.load_model() # get preset settings if preset not in PRESETS: print(f"unknown preset: {preset}") print(f"available: {', '.join(PRESETS.keys())}") return None settings = PRESETS[preset] # generate output path if not provided if output_path is None: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_path = self.output_dir / f"clip_{timestamp}.mp4" else: output_path = Path(output_path) # set seed if seed is None: seed = torch.randint(0, 2**31, (1,)).item() generator = torch.Generator(device="cpu") generator.manual_seed(seed) # use default negative if not provided if negative_prompt is None: negative_prompt = DEFAULT_NEGATIVE print(f"{'='*60}") print(f"generating clip") print(f"{'='*60}") print(f"preset: {preset} ({settings['description']})") print(f"size: {settings['width']}x{settings['height']}") print(f"frames: {settings['frames']} (~{settings['frames']/24:.1f}s)") print(f"steps: {settings['steps']}") print(f"seed: {seed}") print(f"audio: {'no' if no_audio else 'yes'}") print(f"output: {output_path}") print(f"\nprompt: {prompt[:100]}{'...' if len(prompt) > 100 else ''}") print(f"{'='*60}\n") # generate result = self.pipe( prompt=prompt, negative_prompt=negative_prompt, width=settings["width"], height=settings["height"], num_frames=settings["frames"], num_inference_steps=settings["steps"], guidance_scale=settings["guidance"], generator=generator, ) # get video frames as tensor video_frames = result.frames[0] video_tensor = torch.stack([torch.from_numpy(np.array(f)) for f in video_frames]) # get audio audio = None audio_sr = None if not no_audio and result.audio is not None: audio = result.audio[0].float().cpu() audio_sr = self.pipe.vocoder.config.output_sampling_rate print(f"audio: {audio_sr}Hz") # export encode_video( video=video_tensor, fps=24, audio=audio, audio_sample_rate=audio_sr, output_path=str(output_path) ) # save metadata meta_path = output_path.with_suffix(".json") metadata = { "prompt": prompt, "negative_prompt": negative_prompt, "preset": preset, "settings": settings, "seed": seed, "timestamp": datetime.now().isoformat(), "output": str(output_path), } with open(meta_path, "w") as f: json.dump(metadata, f, indent=2) print(f"\n{'='*60}") print(f"done!") print(f"video: {output_path}") print(f"metadata: {meta_path}") print(f"seed: {seed} (use --seed {seed} to reproduce)") print(f"{'='*60}\n") return output_path def batch_generate(self, prompts_file, preset="standard"): """generate multiple clips from a file""" prompts_path = Path(prompts_file) if not prompts_path.exists(): print(f"file not found: {prompts_file}") return prompts = [] with open(prompts_path) as f: current_prompt = [] for line in f: line = line.strip() if line == "---": # separator between prompts if current_prompt: prompts.append(" ".join(current_prompt)) current_prompt = [] elif line and not line.startswith("#"): # skip comments current_prompt.append(line) if current_prompt: prompts.append(" ".join(current_prompt)) print(f"found {len(prompts)} prompts in {prompts_file}") print(f"preset: {preset}") print() for i, prompt in enumerate(prompts): print(f"\n[{i+1}/{len(prompts)}]") self.generate(prompt, preset=preset) def interactive(self): """interactive prompt mode""" print("\n" + "="*60) print("clipmaker interactive mode") print("="*60) print(PROMPT_TIPS) print("\npresets:", ", ".join(PRESETS.keys())) print("commands: /preset , /tips, /quit\n") current_preset = "standard" while True: try: prompt = input(f"[{current_preset}] > ").strip() except (EOFError, KeyboardInterrupt): print("\nbye!") break if not prompt: continue elif prompt == "/quit": print("bye!") break elif prompt == "/tips": print(PROMPT_TIPS) elif prompt.startswith("/preset"): parts = prompt.split() if len(parts) > 1 and parts[1] in PRESETS: current_preset = parts[1] print(f"preset: {current_preset} - {PRESETS[current_preset]['description']}") else: print(f"presets: {', '.join(PRESETS.keys())}") else: self.generate(prompt, preset=current_preset) def main(): parser = argparse.ArgumentParser( description="clipmaker - hq video clip generator", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=f""" presets: preview - {PRESETS['preview']['description']} standard - {PRESETS['standard']['description']} hq - {PRESETS['hq']['description']} max - {PRESETS['max']['description']} cinematic - {PRESETS['cinematic']['description']} examples: clipmaker "a cat sleeping on a couch" clipmaker "epic sunset timelapse" --preset hq clipmaker --batch prompts.txt --preset standard clipmaker --interactive """ ) parser.add_argument("prompt", nargs="?", help="video prompt") parser.add_argument("--preset", "-p", default="standard", choices=PRESETS.keys(), help="quality preset") parser.add_argument("--output", "-o", help="output path") parser.add_argument("--seed", "-s", type=int, help="random seed") parser.add_argument("--negative", "-n", help="negative prompt") parser.add_argument("--no-audio", action="store_true", help="disable audio") parser.add_argument("--batch", "-b", help="batch generate from file") parser.add_argument("--interactive", "-i", action="store_true", help="interactive mode") parser.add_argument("--output-dir", default="~/Desktop/clips", help="output directory") parser.add_argument("--tips", action="store_true", help="show prompt tips") args = parser.parse_args() if args.tips: print(PROMPT_TIPS) return maker = ClipMaker(output_dir=args.output_dir) if args.interactive: maker.interactive() elif args.batch: maker.batch_generate(args.batch, preset=args.preset) elif args.prompt: maker.generate( prompt=args.prompt, preset=args.preset, negative_prompt=args.negative, seed=args.seed, output_path=args.output, no_audio=args.no_audio, ) else: parser.print_help() if __name__ == "__main__": main()