Add fully local conversational AI pipeline for Reachy Mini

Local STT (Qwen3-ASR), VLM (Gemma 4 26B-A4B), and TTS (Spark-TTS) running
on Apple Silicon via MLX, with bracket-tag action system for nod, shake,
wiggle, dance, photo, and pre-recorded emotions.
This commit is contained in:
Norbert Schmidt
2026-05-12 09:24:02 +02:00
parent 3a8a8e3145
commit 5a04a7133a
12 changed files with 4074 additions and 0 deletions

23
pyproject.toml Normal file
View File

@@ -0,0 +1,23 @@
[project]
name = "reachy-mlx-vlm"
version = "0.1.0"
description = "Fully local conversational AI for the Reachy Mini robot, running on Apple Silicon with MLX."
readme = "README.md"
requires-python = ">=3.12"
license = { text = "MIT" }
authors = [{ name = "Norbert Schmidt" }]
keywords = ["reachy-mini", "robotics", "mlx", "llm", "tts", "stt", "apple-silicon"]
classifiers = [
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: MacOS",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
]
dependencies = [
"einops>=0.8.1",
"mlx-audio>=0.2.9",
"mlx-vlm>=0.3.9",
"numba>=0.63.1",
"reachy-mini[gstreamer,mujoco]>=1.2.3",
"tiktoken>=0.12.0",
]