Added story generator

This commit is contained in:
Norbert Schmidt
2026-01-11 20:01:20 +01:00
parent fc95099449
commit 8174da7490
6 changed files with 846 additions and 0 deletions

339
clipmaker.py Executable file
View File

@@ -0,0 +1,339 @@
#!/usr/bin/env python3
"""
clipmaker - high quality video clip generator for ltx-2
usage:
clipmaker "your prompt here" # quick preview
clipmaker "your prompt" --preset hq # high quality
clipmaker "your prompt" --preset max # maximum quality
clipmaker --batch prompts.txt # batch from file
clipmaker --interactive # interactive mode
"""
import argparse
import os
import sys
import json
from datetime import datetime
from pathlib import Path
import numpy as np
import torch
from PIL import Image
# quality presets
PRESETS = {
"preview": {
"width": 512,
"height": 320,
"frames": 25,
"steps": 10,
"guidance": 4.0,
"description": "fast preview (~1 min)"
},
"standard": {
"width": 768,
"height": 448,
"frames": 49,
"steps": 20,
"guidance": 4.0,
"description": "balanced quality (~5 min)"
},
"hq": {
"width": 1024,
"height": 576,
"frames": 97,
"steps": 25,
"guidance": 4.0,
"description": "high quality (~15 min)"
},
"max": {
"width": 1024,
"height": 576,
"frames": 161,
"steps": 30,
"guidance": 4.0,
"description": "maximum quality (~30 min)"
},
"cinematic": {
"width": 1280,
"height": 720,
"frames": 97,
"steps": 30,
"guidance": 4.5,
"description": "cinematic 720p (~25 min)"
},
}
# default negative prompt based on ltx-2 guide
DEFAULT_NEGATIVE = "blurry, low quality, distorted, deformed, ugly, bad anatomy, text, watermark, signature, out of frame"
# prompt enhancement tips
PROMPT_TIPS = """
prompt tips (from ltx-2 guide):
- write as flowing paragraph, 4-8 sentences
- include: shot type, lighting, action, camera movement, audio
- use cinematography terms: dolly, pan, track, handheld, close-up
- describe sounds and dialogue in "quotes"
- use present tense for actions
example:
"A cinematic medium shot of a coffee cup on a wooden table, steam rising
gently in soft morning light. The camera slowly pushes in as a hand
reaches into frame to lift the cup. Warm ambient cafe sounds and soft
jazz play in the background. Shallow depth of field, golden hour lighting."
"""
class ClipMaker:
def __init__(self, output_dir="~/Desktop/clips"):
self.output_dir = Path(output_dir).expanduser()
self.output_dir.mkdir(parents=True, exist_ok=True)
self.pipe = None
self.device = None
def load_model(self):
"""load the ltx-2 pipeline"""
if self.pipe is not None:
return
from diffusers import LTX2Pipeline
self.device = "mps" if torch.backends.mps.is_available() else "cpu"
print(f"device: {self.device}")
print("loading ltx-2 model...")
self.pipe = LTX2Pipeline.from_pretrained(
"Lightricks/LTX-2",
torch_dtype=torch.bfloat16
)
self.pipe.to(self.device)
print("model ready\n")
def generate(self, prompt, preset="standard", negative_prompt=None,
seed=None, output_path=None, no_audio=False):
"""generate a video clip"""
from diffusers.pipelines.ltx2.export_utils import encode_video
self.load_model()
# get preset settings
if preset not in PRESETS:
print(f"unknown preset: {preset}")
print(f"available: {', '.join(PRESETS.keys())}")
return None
settings = PRESETS[preset]
# generate output path if not provided
if output_path is None:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_path = self.output_dir / f"clip_{timestamp}.mp4"
else:
output_path = Path(output_path)
# set seed
if seed is None:
seed = torch.randint(0, 2**31, (1,)).item()
generator = torch.Generator(device="cpu")
generator.manual_seed(seed)
# use default negative if not provided
if negative_prompt is None:
negative_prompt = DEFAULT_NEGATIVE
print(f"{'='*60}")
print(f"generating clip")
print(f"{'='*60}")
print(f"preset: {preset} ({settings['description']})")
print(f"size: {settings['width']}x{settings['height']}")
print(f"frames: {settings['frames']} (~{settings['frames']/24:.1f}s)")
print(f"steps: {settings['steps']}")
print(f"seed: {seed}")
print(f"audio: {'no' if no_audio else 'yes'}")
print(f"output: {output_path}")
print(f"\nprompt: {prompt[:100]}{'...' if len(prompt) > 100 else ''}")
print(f"{'='*60}\n")
# generate
result = self.pipe(
prompt=prompt,
negative_prompt=negative_prompt,
width=settings["width"],
height=settings["height"],
num_frames=settings["frames"],
num_inference_steps=settings["steps"],
guidance_scale=settings["guidance"],
generator=generator,
)
# get video frames as tensor
video_frames = result.frames[0]
video_tensor = torch.stack([torch.from_numpy(np.array(f)) for f in video_frames])
# get audio
audio = None
audio_sr = None
if not no_audio and result.audio is not None:
audio = result.audio[0].float().cpu()
audio_sr = self.pipe.vocoder.config.output_sampling_rate
print(f"audio: {audio_sr}Hz")
# export
encode_video(
video=video_tensor,
fps=24,
audio=audio,
audio_sample_rate=audio_sr,
output_path=str(output_path)
)
# save metadata
meta_path = output_path.with_suffix(".json")
metadata = {
"prompt": prompt,
"negative_prompt": negative_prompt,
"preset": preset,
"settings": settings,
"seed": seed,
"timestamp": datetime.now().isoformat(),
"output": str(output_path),
}
with open(meta_path, "w") as f:
json.dump(metadata, f, indent=2)
print(f"\n{'='*60}")
print(f"done!")
print(f"video: {output_path}")
print(f"metadata: {meta_path}")
print(f"seed: {seed} (use --seed {seed} to reproduce)")
print(f"{'='*60}\n")
return output_path
def batch_generate(self, prompts_file, preset="standard"):
"""generate multiple clips from a file"""
prompts_path = Path(prompts_file)
if not prompts_path.exists():
print(f"file not found: {prompts_file}")
return
prompts = []
with open(prompts_path) as f:
current_prompt = []
for line in f:
line = line.strip()
if line == "---": # separator between prompts
if current_prompt:
prompts.append(" ".join(current_prompt))
current_prompt = []
elif line and not line.startswith("#"): # skip comments
current_prompt.append(line)
if current_prompt:
prompts.append(" ".join(current_prompt))
print(f"found {len(prompts)} prompts in {prompts_file}")
print(f"preset: {preset}")
print()
for i, prompt in enumerate(prompts):
print(f"\n[{i+1}/{len(prompts)}]")
self.generate(prompt, preset=preset)
def interactive(self):
"""interactive prompt mode"""
print("\n" + "="*60)
print("clipmaker interactive mode")
print("="*60)
print(PROMPT_TIPS)
print("\npresets:", ", ".join(PRESETS.keys()))
print("commands: /preset <name>, /tips, /quit\n")
current_preset = "standard"
while True:
try:
prompt = input(f"[{current_preset}] > ").strip()
except (EOFError, KeyboardInterrupt):
print("\nbye!")
break
if not prompt:
continue
elif prompt == "/quit":
print("bye!")
break
elif prompt == "/tips":
print(PROMPT_TIPS)
elif prompt.startswith("/preset"):
parts = prompt.split()
if len(parts) > 1 and parts[1] in PRESETS:
current_preset = parts[1]
print(f"preset: {current_preset} - {PRESETS[current_preset]['description']}")
else:
print(f"presets: {', '.join(PRESETS.keys())}")
else:
self.generate(prompt, preset=current_preset)
def main():
parser = argparse.ArgumentParser(
description="clipmaker - hq video clip generator",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=f"""
presets:
preview - {PRESETS['preview']['description']}
standard - {PRESETS['standard']['description']}
hq - {PRESETS['hq']['description']}
max - {PRESETS['max']['description']}
cinematic - {PRESETS['cinematic']['description']}
examples:
clipmaker "a cat sleeping on a couch"
clipmaker "epic sunset timelapse" --preset hq
clipmaker --batch prompts.txt --preset standard
clipmaker --interactive
"""
)
parser.add_argument("prompt", nargs="?", help="video prompt")
parser.add_argument("--preset", "-p", default="standard",
choices=PRESETS.keys(), help="quality preset")
parser.add_argument("--output", "-o", help="output path")
parser.add_argument("--seed", "-s", type=int, help="random seed")
parser.add_argument("--negative", "-n", help="negative prompt")
parser.add_argument("--no-audio", action="store_true", help="disable audio")
parser.add_argument("--batch", "-b", help="batch generate from file")
parser.add_argument("--interactive", "-i", action="store_true",
help="interactive mode")
parser.add_argument("--output-dir", default="~/Desktop/clips",
help="output directory")
parser.add_argument("--tips", action="store_true", help="show prompt tips")
args = parser.parse_args()
if args.tips:
print(PROMPT_TIPS)
return
maker = ClipMaker(output_dir=args.output_dir)
if args.interactive:
maker.interactive()
elif args.batch:
maker.batch_generate(args.batch, preset=args.preset)
elif args.prompt:
maker.generate(
prompt=args.prompt,
preset=args.preset,
negative_prompt=args.negative,
seed=args.seed,
output_path=args.output,
no_audio=args.no_audio,
)
else:
parser.print_help()
if __name__ == "__main__":
main()