Added story generator

2026-01-11 20:01:20 +01:00
parent fc95099449
commit 8174da7490
6 changed files with 846 additions and 0 deletions
--- a/clipmaker.py
+++ b/clipmaker.py
@@ -0,0 +1,339 @@
+#!/usr/bin/env python3
+"""
+clipmaker - high quality video clip generator for ltx-2
+
+usage:
+    clipmaker "your prompt here"                    # quick preview
+    clipmaker "your prompt" --preset hq             # high quality
+    clipmaker "your prompt" --preset max            # maximum quality
+    clipmaker --batch prompts.txt                   # batch from file
+    clipmaker --interactive                         # interactive mode
+"""
+
+import argparse
+import os
+import sys
+import json
+from datetime import datetime
+from pathlib import Path
+
+import numpy as np
+import torch
+from PIL import Image
+
+# quality presets
+PRESETS = {
+    "preview": {
+        "width": 512,
+        "height": 320,
+        "frames": 25,
+        "steps": 10,
+        "guidance": 4.0,
+        "description": "fast preview (~1 min)"
+    },
+    "standard": {
+        "width": 768,
+        "height": 448,
+        "frames": 49,
+        "steps": 20,
+        "guidance": 4.0,
+        "description": "balanced quality (~5 min)"
+    },
+    "hq": {
+        "width": 1024,
+        "height": 576,
+        "frames": 97,
+        "steps": 25,
+        "guidance": 4.0,
+        "description": "high quality (~15 min)"
+    },
+    "max": {
+        "width": 1024,
+        "height": 576,
+        "frames": 161,
+        "steps": 30,
+        "guidance": 4.0,
+        "description": "maximum quality (~30 min)"
+    },
+    "cinematic": {
+        "width": 1280,
+        "height": 720,
+        "frames": 97,
+        "steps": 30,
+        "guidance": 4.5,
+        "description": "cinematic 720p (~25 min)"
+    },
+}
+
+# default negative prompt based on ltx-2 guide
+DEFAULT_NEGATIVE = "blurry, low quality, distorted, deformed, ugly, bad anatomy, text, watermark, signature, out of frame"
+
+# prompt enhancement tips
+PROMPT_TIPS = """
+prompt tips (from ltx-2 guide):
+  - write as flowing paragraph, 4-8 sentences
+  - include: shot type, lighting, action, camera movement, audio
+  - use cinematography terms: dolly, pan, track, handheld, close-up
+  - describe sounds and dialogue in "quotes"
+  - use present tense for actions
+
+example:
+  "A cinematic medium shot of a coffee cup on a wooden table, steam rising
+   gently in soft morning light. The camera slowly pushes in as a hand
+   reaches into frame to lift the cup. Warm ambient cafe sounds and soft
+   jazz play in the background. Shallow depth of field, golden hour lighting."
+"""
+
+
+class ClipMaker:
+    def __init__(self, output_dir="~/Desktop/clips"):
+        self.output_dir = Path(output_dir).expanduser()
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        self.pipe = None
+        self.device = None
+
+    def load_model(self):
+        """load the ltx-2 pipeline"""
+        if self.pipe is not None:
+            return
+
+        from diffusers import LTX2Pipeline
+
+        self.device = "mps" if torch.backends.mps.is_available() else "cpu"
+        print(f"device: {self.device}")
+
+        print("loading ltx-2 model...")
+        self.pipe = LTX2Pipeline.from_pretrained(
+            "Lightricks/LTX-2",
+            torch_dtype=torch.bfloat16
+        )
+        self.pipe.to(self.device)
+        print("model ready\n")
+
+    def generate(self, prompt, preset="standard", negative_prompt=None,
+                 seed=None, output_path=None, no_audio=False):
+        """generate a video clip"""
+        from diffusers.pipelines.ltx2.export_utils import encode_video
+
+        self.load_model()
+
+        # get preset settings
+        if preset not in PRESETS:
+            print(f"unknown preset: {preset}")
+            print(f"available: {', '.join(PRESETS.keys())}")
+            return None
+
+        settings = PRESETS[preset]
+
+        # generate output path if not provided
+        if output_path is None:
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            output_path = self.output_dir / f"clip_{timestamp}.mp4"
+        else:
+            output_path = Path(output_path)
+
+        # set seed
+        if seed is None:
+            seed = torch.randint(0, 2**31, (1,)).item()
+        generator = torch.Generator(device="cpu")
+        generator.manual_seed(seed)
+
+        # use default negative if not provided
+        if negative_prompt is None:
+            negative_prompt = DEFAULT_NEGATIVE
+
+        print(f"{'='*60}")
+        print(f"generating clip")
+        print(f"{'='*60}")
+        print(f"preset: {preset} ({settings['description']})")
+        print(f"size: {settings['width']}x{settings['height']}")
+        print(f"frames: {settings['frames']} (~{settings['frames']/24:.1f}s)")
+        print(f"steps: {settings['steps']}")
+        print(f"seed: {seed}")
+        print(f"audio: {'no' if no_audio else 'yes'}")
+        print(f"output: {output_path}")
+        print(f"\nprompt: {prompt[:100]}{'...' if len(prompt) > 100 else ''}")
+        print(f"{'='*60}\n")
+
+        # generate
+        result = self.pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            width=settings["width"],
+            height=settings["height"],
+            num_frames=settings["frames"],
+            num_inference_steps=settings["steps"],
+            guidance_scale=settings["guidance"],
+            generator=generator,
+        )
+
+        # get video frames as tensor
+        video_frames = result.frames[0]
+        video_tensor = torch.stack([torch.from_numpy(np.array(f)) for f in video_frames])
+
+        # get audio
+        audio = None
+        audio_sr = None
+        if not no_audio and result.audio is not None:
+            audio = result.audio[0].float().cpu()
+            audio_sr = self.pipe.vocoder.config.output_sampling_rate
+            print(f"audio: {audio_sr}Hz")
+
+        # export
+        encode_video(
+            video=video_tensor,
+            fps=24,
+            audio=audio,
+            audio_sample_rate=audio_sr,
+            output_path=str(output_path)
+        )
+
+        # save metadata
+        meta_path = output_path.with_suffix(".json")
+        metadata = {
+            "prompt": prompt,
+            "negative_prompt": negative_prompt,
+            "preset": preset,
+            "settings": settings,
+            "seed": seed,
+            "timestamp": datetime.now().isoformat(),
+            "output": str(output_path),
+        }
+        with open(meta_path, "w") as f:
+            json.dump(metadata, f, indent=2)
+
+        print(f"\n{'='*60}")
+        print(f"done!")
+        print(f"video: {output_path}")
+        print(f"metadata: {meta_path}")
+        print(f"seed: {seed} (use --seed {seed} to reproduce)")
+        print(f"{'='*60}\n")
+
+        return output_path
+
+    def batch_generate(self, prompts_file, preset="standard"):
+        """generate multiple clips from a file"""
+        prompts_path = Path(prompts_file)
+        if not prompts_path.exists():
+            print(f"file not found: {prompts_file}")
+            return
+
+        prompts = []
+        with open(prompts_path) as f:
+            current_prompt = []
+            for line in f:
+                line = line.strip()
+                if line == "---":  # separator between prompts
+                    if current_prompt:
+                        prompts.append(" ".join(current_prompt))
+                        current_prompt = []
+                elif line and not line.startswith("#"):  # skip comments
+                    current_prompt.append(line)
+            if current_prompt:
+                prompts.append(" ".join(current_prompt))
+
+        print(f"found {len(prompts)} prompts in {prompts_file}")
+        print(f"preset: {preset}")
+        print()
+
+        for i, prompt in enumerate(prompts):
+            print(f"\n[{i+1}/{len(prompts)}]")
+            self.generate(prompt, preset=preset)
+
+    def interactive(self):
+        """interactive prompt mode"""
+        print("\n" + "="*60)
+        print("clipmaker interactive mode")
+        print("="*60)
+        print(PROMPT_TIPS)
+        print("\npresets:", ", ".join(PRESETS.keys()))
+        print("commands: /preset <name>, /tips, /quit\n")
+
+        current_preset = "standard"
+
+        while True:
+            try:
+                prompt = input(f"[{current_preset}] > ").strip()
+            except (EOFError, KeyboardInterrupt):
+                print("\nbye!")
+                break
+
+            if not prompt:
+                continue
+            elif prompt == "/quit":
+                print("bye!")
+                break
+            elif prompt == "/tips":
+                print(PROMPT_TIPS)
+            elif prompt.startswith("/preset"):
+                parts = prompt.split()
+                if len(parts) > 1 and parts[1] in PRESETS:
+                    current_preset = parts[1]
+                    print(f"preset: {current_preset} - {PRESETS[current_preset]['description']}")
+                else:
+                    print(f"presets: {', '.join(PRESETS.keys())}")
+            else:
+                self.generate(prompt, preset=current_preset)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="clipmaker - hq video clip generator",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=f"""
+presets:
+  preview   - {PRESETS['preview']['description']}
+  standard  - {PRESETS['standard']['description']}
+  hq        - {PRESETS['hq']['description']}
+  max       - {PRESETS['max']['description']}
+  cinematic - {PRESETS['cinematic']['description']}
+
+examples:
+  clipmaker "a cat sleeping on a couch"
+  clipmaker "epic sunset timelapse" --preset hq
+  clipmaker --batch prompts.txt --preset standard
+  clipmaker --interactive
+        """
+    )
+
+    parser.add_argument("prompt", nargs="?", help="video prompt")
+    parser.add_argument("--preset", "-p", default="standard",
+                        choices=PRESETS.keys(), help="quality preset")
+    parser.add_argument("--output", "-o", help="output path")
+    parser.add_argument("--seed", "-s", type=int, help="random seed")
+    parser.add_argument("--negative", "-n", help="negative prompt")
+    parser.add_argument("--no-audio", action="store_true", help="disable audio")
+    parser.add_argument("--batch", "-b", help="batch generate from file")
+    parser.add_argument("--interactive", "-i", action="store_true",
+                        help="interactive mode")
+    parser.add_argument("--output-dir", default="~/Desktop/clips",
+                        help="output directory")
+    parser.add_argument("--tips", action="store_true", help="show prompt tips")
+
+    args = parser.parse_args()
+
+    if args.tips:
+        print(PROMPT_TIPS)
+        return
+
+    maker = ClipMaker(output_dir=args.output_dir)
+
+    if args.interactive:
+        maker.interactive()
+    elif args.batch:
+        maker.batch_generate(args.batch, preset=args.preset)
+    elif args.prompt:
+        maker.generate(
+            prompt=args.prompt,
+            preset=args.preset,
+            negative_prompt=args.negative,
+            seed=args.seed,
+            output_path=args.output,
+            no_audio=args.no_audio,
+        )
+    else:
+        parser.print_help()
+
+
+if __name__ == "__main__":
+    main()