Implement linking of text encoder and tokenizer directories in conversion process. Enhance error handling in LTX2TextEncoder for tokenizer loading, providing a fallback model if the specified path is unavailable.

2026-03-09 18:25:32 +01:00
parent 41ed62f7e8
commit 576e01da14
2 changed files with 31 additions and 1 deletions
--- a/mlx_video/models/ltx/convert.py
+++ b/mlx_video/models/ltx/convert.py
@@ -611,6 +611,33 @@ def convert(source: str, output_path: Path, variant: str = "distilled"):
        else:
            print(f"  {upscaler_file}: not found, skipping")

+    # 8. Link text_encoder and tokenizer directories
+    print("\nLinking text encoder & tokenizer...")
+    for subdir in ["text_encoder", "tokenizer"]:
+        dest = output_path / subdir
+        if dest.exists():
+            print(f"  {subdir}/: already exists, skipping")
+            continue
+
+        local_candidate = source_dir / subdir
+        if local_candidate.is_dir():
+            # Resolve through symlinks to get the real directory
+            real_path = local_candidate.resolve()
+            dest.symlink_to(real_path)
+            print(f"  {subdir}/: symlinked to {real_path}")
+        elif is_hf_repo:
+            from huggingface_hub import snapshot_download
+
+            print(f"  {subdir}/: downloading from {source}...")
+            snapshot_download(
+                repo_id=source,
+                allow_patterns=f"{subdir}/*",
+                local_dir=str(output_path),
+            )
+            print(f"  {subdir}/: done")
+        else:
+            print(f"  {subdir}/: not found in source, skipping")
+
    # Summary
    all_converted = (
        len(transformer_weights)
--- a/mlx_video/models/ltx/text_encoder.py
+++ b/mlx_video/models/ltx/text_encoder.py
@@ -754,7 +754,10 @@ class LTX2TextEncoder(nn.Module):
        if tokenizer_path.exists():
            self.processor = AutoTokenizer.from_pretrained(str(tokenizer_path), trust_remote_code=True)
        else:
-            self.processor = AutoTokenizer.from_pretrained(text_encoder_path, trust_remote_code=True)
+            try:
+                self.processor = AutoTokenizer.from_pretrained(text_encoder_path, trust_remote_code=True)
+            except Exception:
+                self.processor = AutoTokenizer.from_pretrained("google/gemma-3-12b-it", trust_remote_code=True)
        # Set left padding to match official LTX-2 text encoder
        self.processor.padding_side = "left"