From decb3eb9e5779a4f7f9cdcae5338d53e6495ca03 Mon Sep 17 00:00:00 2001
From: Prince Canuma <prince.gdt@gmail.com>
Date: Mon, 16 Mar 2026 02:02:13 +0100
Subject: [PATCH] Add librosa dependency and enhance A2V documentation with
 additional pipeline options

---
 README.md      | 16 +++++++++++++---
 pyproject.toml |  1 +
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 8d86c69..80c87ef 100644
--- a/README.md
+++ b/README.md
@@ -88,15 +88,23 @@ uv run mlx_video.generate --pipeline dev --prompt "Waves crashing" --image beach
 
 ### Audio-to-Video (A2V)
 
-Generate video conditioned on an input audio file. The audio is encoded to latent space and frozen during denoising — the transformer's cross-attention reads the audio signal to guide video generation.
+Generate video conditioned on an input audio file. Works with all four pipelines. The audio is encoded to latent space and frozen during denoising — the transformer's cross-attention reads the audio signal to guide video generation.
 
 ```bash
-# A2V - generate video from audio
+# A2V - distilled (default, fastest)
 uv run mlx_video.generate --audio-file music.wav --prompt "A band playing music"
 
-# A2V with dev pipeline
+# A2V - dev (single-stage with CFG)
 uv run mlx_video.generate --pipeline dev --audio-file ocean.wav --prompt "Ocean waves"
 
+# A2V - dev-two-stage (dev + LoRA refinement)
+uv run mlx_video.generate --pipeline dev-two-stage --audio-file music.wav \
+    --prompt "A band playing music" --model-repo prince-canuma/LTX-2-dev
+
+# A2V - dev-two-stage-hq (highest quality)
+uv run mlx_video.generate --pipeline dev-two-stage-hq --audio-file music.wav \
+    --prompt "A band playing music" --model-repo prince-canuma/LTX-2-dev
+
 # A2V + I2V (audio + image conditioning)
 uv run mlx_video.generate --audio-file rain.wav --image forest.jpg --prompt "Rain in forest"
 
@@ -104,6 +112,8 @@ uv run mlx_video.generate --audio-file rain.wav --image forest.jpg --prompt "Rai
 uv run mlx_video.generate --audio-file song.mp3 --audio-start-time 30.0 --prompt "Concert"
 ```
 
+> **Note:** `--audio-file` (A2V) and `--audio` (generate audio) are mutually exclusive. Supported formats: WAV, FLAC, MP3, OGG, and video files with audio tracks.
+
 ### Audio-Video Generation (experimental)
 
 Generate synchronized audio alongside video from scratch:
diff --git a/pyproject.toml b/pyproject.toml
index 7c10195..b20887a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,6 +21,7 @@ dependencies = [
     "Pillow>=10.3.0",
     "mlx-vlm",
     "rich>=14.2.0",
+    "librosa>=0.10.0",
 ]
 license = {text="MIT"}
 authors = [