Upgrade to LTX-2.3 with audio generation

- Switch from mlx_video.generate_av to mlx_video.models.ltx_2.generate
- Use prince-canuma/LTX-2.3-distilled model with google/gemma-3-12b-it text encoder
- Add --audio flag for joint audio-video generation
- Add auto-background execution with nohup logging
- Add CLAUDE.md and test stories

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Norbert Schmidt
2026-03-31 13:55:39 +02:00
parent e49c273b94
commit 02b8c27835
5 changed files with 117 additions and 3 deletions

View File

@@ -7,6 +7,26 @@
set -e
# Auto-background: re-exec under nohup if not already backgrounded
if [ -z "$_MLX_BG" ] && [ -t 0 ]; then
export _MLX_BG=1
# Determine log file location (need to peek at args for output dir)
_LOG_DIR="$HOME/Nextcloud/Documents/mlx-video-stories"
for _arg in "$@"; do
if [ -n "$_NEXT_IS_DIR" ]; then _LOG_DIR="$_arg"; unset _NEXT_IS_DIR; break; fi
[[ "$_arg" == --* ]] && break
[ "$_SEEN_STORY" = "1" ] && _LOG_DIR="$_arg" && break
_SEEN_STORY=1
done
mkdir -p "$_LOG_DIR"
_LOG="$_LOG_DIR/generation.log"
echo "Running in background. Log: $_LOG"
echo "Follow with: tail -f $_LOG"
nohup "$0" "$@" > "$_LOG" 2>&1 &
echo "PID: $!"
exit 0
fi
# Default settings
WIDTH=1920
HEIGHT=1088
@@ -14,7 +34,7 @@ FRAMES=121
STRENGTH=0.7
FPS=24
VENV_PYTHON="${VENV_PYTHON:-./venv/bin/python}"
OUTPUT_DIR="./output"
OUTPUT_DIR="$HOME/Nextcloud/Documents/mlx-video-stories"
# Colors
RED='\033[0;31m'
@@ -136,13 +156,16 @@ for i in $(seq 1 $NUM_SCENES); do
if [ $i -eq 1 ]; then
# First scene: Text-to-Video
$VENV_PYTHON -m mlx_video.generate_av \
$VENV_PYTHON -m mlx_video.models.ltx_2.generate \
--prompt "$PROMPT" \
--model-repo prince-canuma/LTX-2.3-distilled \
--text-encoder-repo google/gemma-3-12b-it \
--height $HEIGHT \
--width $WIDTH \
--num-frames $FRAMES \
--fps $FPS \
--seed $((42 + i)) \
--audio \
--output-path "$SCENE_FILE"
else
# Subsequent scenes: Image-to-Video
@@ -162,8 +185,10 @@ for i in $(seq 1 $NUM_SCENES); do
fi
# Generate with I2V
$VENV_PYTHON -m mlx_video.generate_av \
$VENV_PYTHON -m mlx_video.models.ltx_2.generate \
--prompt "$PROMPT" \
--model-repo prince-canuma/LTX-2.3-distilled \
--text-encoder-repo google/gemma-3-12b-it \
--image "$LAST_FRAME" \
--image-strength $STRENGTH \
--height $HEIGHT \
@@ -171,6 +196,7 @@ for i in $(seq 1 $NUM_SCENES); do
--num-frames $FRAMES \
--fps $FPS \
--seed $((42 + i)) \
--audio \
--output-path "$SCENE_FILE"
fi