Local STT (Qwen3-ASR), VLM (Gemma 4 26B-A4B), and TTS (Spark-TTS) running on Apple Silicon via MLX, with bracket-tag action system for nod, shake, wiggle, dance, photo, and pre-recorded emotions.
24 lines
741 B
TOML
24 lines
741 B
TOML
[project]
|
|
name = "reachy-mlx-vlm"
|
|
version = "0.1.0"
|
|
description = "Fully local conversational AI for the Reachy Mini robot, running on Apple Silicon with MLX."
|
|
readme = "README.md"
|
|
requires-python = ">=3.12"
|
|
license = { text = "MIT" }
|
|
authors = [{ name = "Norbert Schmidt" }]
|
|
keywords = ["reachy-mini", "robotics", "mlx", "llm", "tts", "stt", "apple-silicon"]
|
|
classifiers = [
|
|
"Programming Language :: Python :: 3",
|
|
"License :: OSI Approved :: MIT License",
|
|
"Operating System :: MacOS",
|
|
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
]
|
|
dependencies = [
|
|
"einops>=0.8.1",
|
|
"mlx-audio>=0.2.9",
|
|
"mlx-vlm>=0.3.9",
|
|
"numba>=0.63.1",
|
|
"reachy-mini[gstreamer,mujoco]>=1.2.3",
|
|
"tiktoken>=0.12.0",
|
|
]
|