diff --git a/README.md b/README.md
index 190bdf7..99b3f62 100644
--- a/README.md
+++ b/README.md
@@ -19,38 +19,100 @@ uv pip install git+https://github.com/Blaizzy/mlx-video.git
 Supported models:
 
 ### LTX-2
-[LTX-2](https://huggingface.co/Lightricks/LTX-Video) is 19B parameter video generation model from Lightricks
+[LTX-2](https://huggingface.co/Lightricks/LTX-2) is a 19B parameter video generation model from Lightricks.
 
 ## Features
 
-- Text-to-video generation with the LTX-2 19B DiT model
-- Two-stage generation pipeline for high-quality output
+- Text-to-video (T2V) and Image-to-video (I2V) generation
+- Three pipeline modes: Distilled, Dev, and Dev Two-Stage
+- Synchronized audio-video generation (experimental)
+- LoRA support (including HuggingFace repos)
+- Prompt enhancement via Gemma
 - 2x spatial upscaling for images and videos
 - Optimized for Apple Silicon using MLX
 
-
 ## Usage
 
-> **ℹ️ Info:** Currently, only the distilled variant is supported. Full LTX-2 feature support is coming soon.
+### Pipelines
 
-### Text-to-Video Generation
+mlx-video supports three pipeline types via the `--pipeline` flag:
+
+| Pipeline | Description | CFG | Stages | Speed |
+|----------|-------------|-----|--------|-------|
+| `distilled` (default) | Fixed sigma schedule, no CFG | No | 2 (8+3 steps) | Fastest |
+| `dev` | Dynamic sigmas, constant CFG | Yes | 1 (30 steps) | Medium |
+| `dev-two-stage` | Dev + LoRA refinement | Yes (stage 1) | 2 (30+3 steps) | Slowest, highest quality |
+
+### Text-to-Video
 
 ```bash
-uv run mlx_video.generate --prompt "Two dogs of the poodle breed wearing sunglasses, close up, cinematic, sunset" -n 100 --width 768
+# Distilled (default) - fast, two-stage
+uv run mlx_video.generate --prompt "Two dogs wearing sunglasses, cinematic, sunset" -n 97 --width 768
+
+# Dev - single-stage with CFG
+uv run mlx_video.generate --pipeline dev --prompt "A cinematic scene" --cfg-scale 3.0
+
+# Dev two-stage - dev + LoRA refinement (highest quality)
+uv run mlx_video.generate --pipeline dev-two-stage \
+    --prompt "Two dogs of the poodle breed wearing sunglasses, close up, cinematic, sunset" \
+    -n 145 --width 1024 --height 768 \
+    --model-repo prince-canuma/LTX-2-dev \
+    --cfg-scale 3.0 --lora-strength 0.8 \
+    --enhance-prompt
 ```
 
 <img src="https://github.com/Blaizzy/mlx-video/raw/main/examples/poodles.gif" width="512" alt="Poodles demo">
 
-With custom settings:
+### Image-to-Video
 
 ```bash
-python -m mlx_video.generate \
-    --prompt "Ocean waves crashing on a beach at sunset" \
-    --height 768 \
-    --width 768 \
-    --num-frames 65 \
-    --seed 123 \
-    --output my_video.mp4
+# Distilled I2V
+uv run mlx_video.generate --prompt "A person dancing" --image photo.jpg
+
+# Dev I2V
+uv run mlx_video.generate --pipeline dev --prompt "Waves crashing" --image beach.png --cfg-scale 3.5
+```
+
+### Audio-Video (experimental)
+
+```bash
+uv run mlx_video.generate --prompt "Ocean waves crashing" --audio
+uv run mlx_video.generate --pipeline dev --prompt "A jazz band playing" --audio --enhance-prompt
+```
+
+### LoRA
+
+LoRA weights can be loaded from a file, directory, or HuggingFace repo:
+
+```bash
+# From HuggingFace repo
+uv run mlx_video.generate --pipeline dev-two-stage \
+    --prompt "Camera dolly out of a forest" \
+    --lora-path Lightricks/LTX-2-19b-LoRA-Camera-Control-Dolly-Out \
+    --lora-strength 1.0
+
+# From local file
+uv run mlx_video.generate --pipeline dev-two-stage \
+    --prompt "A scene" \
+    --lora-path ./my-lora/weights.safetensors
+
+# From local directory (auto-detects .safetensors file)
+uv run mlx_video.generate --pipeline dev-two-stage \
+    --prompt "A scene" \
+    --lora-path ./LTX-2-distilled/lora
+```
+
+### Upscaling
+
+```bash
+# Upscale an image 2x
+uv run mlx_video.upscale --input photo.png --output upscaled.png
+
+# Upscale a video 2x
+uv run mlx_video.upscale --input video.mp4 --output upscaled.mp4
+
+# Upscale with refinement (higher quality, requires text prompt)
+uv run mlx_video.upscale --input video.mp4 --output upscaled.mp4 --refine --prompt "A cinematic scene"
 ```
 
 ### CLI Options
@@ -58,22 +120,56 @@ python -m mlx_video.generate \
 | Option | Default | Description |
 |--------|---------|-------------|
 | `--prompt`, `-p` | (required) | Text description of the video |
-| `--height`, `-H` | 512 | Output height (must be divisible by 64) |
-| `--width`, `-W` | 512 | Output width (must be divisible by 64) |
-| `--num-frames`, `-n` | 100 | Number of frames |
+| `--pipeline` | `distilled` | Pipeline type: `distilled`, `dev`, or `dev-two-stage` |
+| `--height`, `-H` | 512 | Output height (divisible by 64 for two-stage, 32 for dev) |
+| `--width`, `-W` | 512 | Output width (divisible by 64 for two-stage, 32 for dev) |
+| `--num-frames`, `-n` | 33 | Number of frames (must be 1 + 8*k) |
 | `--seed`, `-s` | 42 | Random seed for reproducibility |
 | `--fps` | 24 | Frames per second |
-| `--output`, `-o` | output.mp4 | Output video path |
-| `--save-frames` | false | Save individual frames as images |
+| `--output-path`, `-o` | output.mp4 | Output video path |
 | `--model-repo` | Lightricks/LTX-2 | HuggingFace model repository |
+| `--text-encoder-repo` | None | Separate text encoder repo (if not in model repo) |
+| `--save-frames` | false | Save individual frames as images |
+| `--enhance-prompt` | false | Enhance prompt using Gemma |
+| `--image`, `-i` | None | Conditioning image for I2V |
+| `--image-strength` | 1.0 | Conditioning strength for I2V |
+| `--audio`, `-a` | false | Enable synchronized audio generation |
+| `--tiling` | `auto` | VAE tiling mode: `auto`, `none`, `aggressive`, `conservative` |
+| `--stream` | false | Stream frames as they decode |
+
+**Dev/Dev-Two-Stage options:**
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `--steps` | 30 | Number of denoising steps |
+| `--cfg-scale` | 3.0 | CFG guidance scale |
+| `--cfg-rescale` | 0.7 | CFG rescale factor (reduces over-saturation) |
+| `--negative-prompt` | (default) | Negative prompt for CFG |
+| `--apg` | false | Use Adaptive Projected Guidance (more stable for I2V) |
+
+**Dev-Two-Stage LoRA options:**
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `--lora-path` | auto-detect | Path to LoRA file, directory, or HuggingFace repo |
+| `--lora-strength` | 1.0 | LoRA merge strength |
 
 ## How It Works
 
-The pipeline uses a two-stage generation process:
-
-1. **Stage 1**: Generate at half resolution (e.g., 384x384) with 8 denoising steps
+### Distilled Pipeline (default)
+1. **Stage 1**: Generate at half resolution with 8 denoising steps (fixed sigmas)
 2. **Upsample**: 2x spatial upsampling via LatentUpsampler
-3. **Stage 2**: Refine at full resolution (e.g., 768x768) with 3 denoising steps
+3. **Stage 2**: Refine at full resolution with 3 denoising steps
+4. **Decode**: VAE decoder converts latents to RGB video
+
+### Dev Pipeline
+1. **Generate**: Full resolution with configurable steps and constant CFG
+2. **Decode**: VAE decoder converts latents to RGB video
+
+### Dev Two-Stage Pipeline
+1. **Stage 1**: Dev denoising at half resolution with CFG
+2. **Upsample**: 2x spatial upsampling via LatentUpsampler
+3. **Stage 2**: Distilled refinement at full resolution with LoRA weights (3 steps, no CFG)
 4. **Decode**: VAE decoder converts latents to RGB video
 
 ## Requirements
@@ -84,29 +180,10 @@ The pipeline uses a two-stage generation process:
 
 ## Model Specifications
 
-- **Transformer**: 48 layers, 32 attention heads, 128 dim per head
+- **Transformer**: 48 layers, 32 attention heads, 128 dim per head (19B parameters)
 - **Latent channels**: 128
 - **Text encoder**: Gemma 3 with 3840-dim output
-- **RoPE**: Split mode with double precision
-
-## Project Structure
-
-```
-mlx_video/
-├── generate.py             # Video generation pipeline
-├── convert.py              # Weight conversion (PyTorch -> MLX)
-├── postprocess.py          # Video post-processing utilities
-├── utils.py                # Helper functions
-└── models/
-    └── ltx/
-        ├── ltx.py          # Main LTXModel (DiT transformer)
-        ├── config.py       # Model configuration
-        ├── transformer.py  # Transformer blocks
-        ├── attention.py    # Multi-head attention with RoPE
-        ├── text_encoder.py # Text encoder
-        ├── upsampler.py    # 2x spatial upsampler
-        └── video_vae/      # VAE encoder/decoder
-```
+- **Audio**: Synchronized audio-video with separate audio VAE and vocoder
 
 ## License
 
diff --git a/mlx_video/generate.py b/mlx_video/generate.py
index b99ab7b..4df6fbe 100644
--- a/mlx_video/generate.py
+++ b/mlx_video/generate.py
@@ -80,14 +80,25 @@ def load_and_merge_lora(
         lora_path: Path to the LoRA safetensors file or directory containing one
         strength: LoRA strength/coefficient (default 1.0)
     """
-    # Resolve path: if directory, find the safetensors file inside
+    # Resolve path: local file/dir or HuggingFace repo
     lora_file = Path(lora_path)
-    if lora_file.is_dir():
+    if lora_file.is_file():
+        pass  # direct file path
+    elif lora_file.is_dir():
+        # Local directory: find safetensors inside
         candidates = sorted(lora_file.glob("*.safetensors"))
         if not candidates:
             raise FileNotFoundError(f"No .safetensors files found in {lora_path}")
         lora_file = candidates[0]
         console.print(f"[dim]Using LoRA file: {lora_file.name}[/]")
+    else:
+        # Treat as HuggingFace repo ID
+        lora_dir = get_model_path(lora_path)
+        candidates = sorted(lora_dir.glob("*.safetensors"))
+        if not candidates:
+            raise FileNotFoundError(f"No .safetensors files found in {lora_dir}")
+        lora_file = candidates[0]
+        console.print(f"[dim]Using LoRA from repo: {lora_path} ({lora_file.name})[/]")
 
     # Load LoRA weights
     lora_weights = mx.load(str(lora_file))