Added story generator
This commit is contained in:
339
clipmaker.py
Executable file
339
clipmaker.py
Executable file
@@ -0,0 +1,339 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
clipmaker - high quality video clip generator for ltx-2
|
||||
|
||||
usage:
|
||||
clipmaker "your prompt here" # quick preview
|
||||
clipmaker "your prompt" --preset hq # high quality
|
||||
clipmaker "your prompt" --preset max # maximum quality
|
||||
clipmaker --batch prompts.txt # batch from file
|
||||
clipmaker --interactive # interactive mode
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import Image
|
||||
|
||||
# quality presets
|
||||
PRESETS = {
|
||||
"preview": {
|
||||
"width": 512,
|
||||
"height": 320,
|
||||
"frames": 25,
|
||||
"steps": 10,
|
||||
"guidance": 4.0,
|
||||
"description": "fast preview (~1 min)"
|
||||
},
|
||||
"standard": {
|
||||
"width": 768,
|
||||
"height": 448,
|
||||
"frames": 49,
|
||||
"steps": 20,
|
||||
"guidance": 4.0,
|
||||
"description": "balanced quality (~5 min)"
|
||||
},
|
||||
"hq": {
|
||||
"width": 1024,
|
||||
"height": 576,
|
||||
"frames": 97,
|
||||
"steps": 25,
|
||||
"guidance": 4.0,
|
||||
"description": "high quality (~15 min)"
|
||||
},
|
||||
"max": {
|
||||
"width": 1024,
|
||||
"height": 576,
|
||||
"frames": 161,
|
||||
"steps": 30,
|
||||
"guidance": 4.0,
|
||||
"description": "maximum quality (~30 min)"
|
||||
},
|
||||
"cinematic": {
|
||||
"width": 1280,
|
||||
"height": 720,
|
||||
"frames": 97,
|
||||
"steps": 30,
|
||||
"guidance": 4.5,
|
||||
"description": "cinematic 720p (~25 min)"
|
||||
},
|
||||
}
|
||||
|
||||
# default negative prompt based on ltx-2 guide
|
||||
DEFAULT_NEGATIVE = "blurry, low quality, distorted, deformed, ugly, bad anatomy, text, watermark, signature, out of frame"
|
||||
|
||||
# prompt enhancement tips
|
||||
PROMPT_TIPS = """
|
||||
prompt tips (from ltx-2 guide):
|
||||
- write as flowing paragraph, 4-8 sentences
|
||||
- include: shot type, lighting, action, camera movement, audio
|
||||
- use cinematography terms: dolly, pan, track, handheld, close-up
|
||||
- describe sounds and dialogue in "quotes"
|
||||
- use present tense for actions
|
||||
|
||||
example:
|
||||
"A cinematic medium shot of a coffee cup on a wooden table, steam rising
|
||||
gently in soft morning light. The camera slowly pushes in as a hand
|
||||
reaches into frame to lift the cup. Warm ambient cafe sounds and soft
|
||||
jazz play in the background. Shallow depth of field, golden hour lighting."
|
||||
"""
|
||||
|
||||
|
||||
class ClipMaker:
|
||||
def __init__(self, output_dir="~/Desktop/clips"):
|
||||
self.output_dir = Path(output_dir).expanduser()
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.pipe = None
|
||||
self.device = None
|
||||
|
||||
def load_model(self):
|
||||
"""load the ltx-2 pipeline"""
|
||||
if self.pipe is not None:
|
||||
return
|
||||
|
||||
from diffusers import LTX2Pipeline
|
||||
|
||||
self.device = "mps" if torch.backends.mps.is_available() else "cpu"
|
||||
print(f"device: {self.device}")
|
||||
|
||||
print("loading ltx-2 model...")
|
||||
self.pipe = LTX2Pipeline.from_pretrained(
|
||||
"Lightricks/LTX-2",
|
||||
torch_dtype=torch.bfloat16
|
||||
)
|
||||
self.pipe.to(self.device)
|
||||
print("model ready\n")
|
||||
|
||||
def generate(self, prompt, preset="standard", negative_prompt=None,
|
||||
seed=None, output_path=None, no_audio=False):
|
||||
"""generate a video clip"""
|
||||
from diffusers.pipelines.ltx2.export_utils import encode_video
|
||||
|
||||
self.load_model()
|
||||
|
||||
# get preset settings
|
||||
if preset not in PRESETS:
|
||||
print(f"unknown preset: {preset}")
|
||||
print(f"available: {', '.join(PRESETS.keys())}")
|
||||
return None
|
||||
|
||||
settings = PRESETS[preset]
|
||||
|
||||
# generate output path if not provided
|
||||
if output_path is None:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
output_path = self.output_dir / f"clip_{timestamp}.mp4"
|
||||
else:
|
||||
output_path = Path(output_path)
|
||||
|
||||
# set seed
|
||||
if seed is None:
|
||||
seed = torch.randint(0, 2**31, (1,)).item()
|
||||
generator = torch.Generator(device="cpu")
|
||||
generator.manual_seed(seed)
|
||||
|
||||
# use default negative if not provided
|
||||
if negative_prompt is None:
|
||||
negative_prompt = DEFAULT_NEGATIVE
|
||||
|
||||
print(f"{'='*60}")
|
||||
print(f"generating clip")
|
||||
print(f"{'='*60}")
|
||||
print(f"preset: {preset} ({settings['description']})")
|
||||
print(f"size: {settings['width']}x{settings['height']}")
|
||||
print(f"frames: {settings['frames']} (~{settings['frames']/24:.1f}s)")
|
||||
print(f"steps: {settings['steps']}")
|
||||
print(f"seed: {seed}")
|
||||
print(f"audio: {'no' if no_audio else 'yes'}")
|
||||
print(f"output: {output_path}")
|
||||
print(f"\nprompt: {prompt[:100]}{'...' if len(prompt) > 100 else ''}")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
# generate
|
||||
result = self.pipe(
|
||||
prompt=prompt,
|
||||
negative_prompt=negative_prompt,
|
||||
width=settings["width"],
|
||||
height=settings["height"],
|
||||
num_frames=settings["frames"],
|
||||
num_inference_steps=settings["steps"],
|
||||
guidance_scale=settings["guidance"],
|
||||
generator=generator,
|
||||
)
|
||||
|
||||
# get video frames as tensor
|
||||
video_frames = result.frames[0]
|
||||
video_tensor = torch.stack([torch.from_numpy(np.array(f)) for f in video_frames])
|
||||
|
||||
# get audio
|
||||
audio = None
|
||||
audio_sr = None
|
||||
if not no_audio and result.audio is not None:
|
||||
audio = result.audio[0].float().cpu()
|
||||
audio_sr = self.pipe.vocoder.config.output_sampling_rate
|
||||
print(f"audio: {audio_sr}Hz")
|
||||
|
||||
# export
|
||||
encode_video(
|
||||
video=video_tensor,
|
||||
fps=24,
|
||||
audio=audio,
|
||||
audio_sample_rate=audio_sr,
|
||||
output_path=str(output_path)
|
||||
)
|
||||
|
||||
# save metadata
|
||||
meta_path = output_path.with_suffix(".json")
|
||||
metadata = {
|
||||
"prompt": prompt,
|
||||
"negative_prompt": negative_prompt,
|
||||
"preset": preset,
|
||||
"settings": settings,
|
||||
"seed": seed,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"output": str(output_path),
|
||||
}
|
||||
with open(meta_path, "w") as f:
|
||||
json.dump(metadata, f, indent=2)
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"done!")
|
||||
print(f"video: {output_path}")
|
||||
print(f"metadata: {meta_path}")
|
||||
print(f"seed: {seed} (use --seed {seed} to reproduce)")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
return output_path
|
||||
|
||||
def batch_generate(self, prompts_file, preset="standard"):
|
||||
"""generate multiple clips from a file"""
|
||||
prompts_path = Path(prompts_file)
|
||||
if not prompts_path.exists():
|
||||
print(f"file not found: {prompts_file}")
|
||||
return
|
||||
|
||||
prompts = []
|
||||
with open(prompts_path) as f:
|
||||
current_prompt = []
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line == "---": # separator between prompts
|
||||
if current_prompt:
|
||||
prompts.append(" ".join(current_prompt))
|
||||
current_prompt = []
|
||||
elif line and not line.startswith("#"): # skip comments
|
||||
current_prompt.append(line)
|
||||
if current_prompt:
|
||||
prompts.append(" ".join(current_prompt))
|
||||
|
||||
print(f"found {len(prompts)} prompts in {prompts_file}")
|
||||
print(f"preset: {preset}")
|
||||
print()
|
||||
|
||||
for i, prompt in enumerate(prompts):
|
||||
print(f"\n[{i+1}/{len(prompts)}]")
|
||||
self.generate(prompt, preset=preset)
|
||||
|
||||
def interactive(self):
|
||||
"""interactive prompt mode"""
|
||||
print("\n" + "="*60)
|
||||
print("clipmaker interactive mode")
|
||||
print("="*60)
|
||||
print(PROMPT_TIPS)
|
||||
print("\npresets:", ", ".join(PRESETS.keys()))
|
||||
print("commands: /preset <name>, /tips, /quit\n")
|
||||
|
||||
current_preset = "standard"
|
||||
|
||||
while True:
|
||||
try:
|
||||
prompt = input(f"[{current_preset}] > ").strip()
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
print("\nbye!")
|
||||
break
|
||||
|
||||
if not prompt:
|
||||
continue
|
||||
elif prompt == "/quit":
|
||||
print("bye!")
|
||||
break
|
||||
elif prompt == "/tips":
|
||||
print(PROMPT_TIPS)
|
||||
elif prompt.startswith("/preset"):
|
||||
parts = prompt.split()
|
||||
if len(parts) > 1 and parts[1] in PRESETS:
|
||||
current_preset = parts[1]
|
||||
print(f"preset: {current_preset} - {PRESETS[current_preset]['description']}")
|
||||
else:
|
||||
print(f"presets: {', '.join(PRESETS.keys())}")
|
||||
else:
|
||||
self.generate(prompt, preset=current_preset)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="clipmaker - hq video clip generator",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=f"""
|
||||
presets:
|
||||
preview - {PRESETS['preview']['description']}
|
||||
standard - {PRESETS['standard']['description']}
|
||||
hq - {PRESETS['hq']['description']}
|
||||
max - {PRESETS['max']['description']}
|
||||
cinematic - {PRESETS['cinematic']['description']}
|
||||
|
||||
examples:
|
||||
clipmaker "a cat sleeping on a couch"
|
||||
clipmaker "epic sunset timelapse" --preset hq
|
||||
clipmaker --batch prompts.txt --preset standard
|
||||
clipmaker --interactive
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument("prompt", nargs="?", help="video prompt")
|
||||
parser.add_argument("--preset", "-p", default="standard",
|
||||
choices=PRESETS.keys(), help="quality preset")
|
||||
parser.add_argument("--output", "-o", help="output path")
|
||||
parser.add_argument("--seed", "-s", type=int, help="random seed")
|
||||
parser.add_argument("--negative", "-n", help="negative prompt")
|
||||
parser.add_argument("--no-audio", action="store_true", help="disable audio")
|
||||
parser.add_argument("--batch", "-b", help="batch generate from file")
|
||||
parser.add_argument("--interactive", "-i", action="store_true",
|
||||
help="interactive mode")
|
||||
parser.add_argument("--output-dir", default="~/Desktop/clips",
|
||||
help="output directory")
|
||||
parser.add_argument("--tips", action="store_true", help="show prompt tips")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.tips:
|
||||
print(PROMPT_TIPS)
|
||||
return
|
||||
|
||||
maker = ClipMaker(output_dir=args.output_dir)
|
||||
|
||||
if args.interactive:
|
||||
maker.interactive()
|
||||
elif args.batch:
|
||||
maker.batch_generate(args.batch, preset=args.preset)
|
||||
elif args.prompt:
|
||||
maker.generate(
|
||||
prompt=args.prompt,
|
||||
preset=args.preset,
|
||||
negative_prompt=args.negative,
|
||||
seed=args.seed,
|
||||
output_path=args.output,
|
||||
no_audio=args.no_audio,
|
||||
)
|
||||
else:
|
||||
parser.print_help()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user