""" Reachy Mini Controller A centralized server that listens for Robot connections and hosts a Gradio control interface. """ import asyncio import threading import time import queue from dataclasses import dataclass from typing import Optional, Tuple import cv2 import gradio as gr import numpy as np from fastapi import FastAPI, WebSocket, WebSocketDisconnect from fastapi.responses import StreamingResponse import uvicorn from fastrtc import WebRTC, StreamHandler, get_cloudflare_turn_credentials from reachy_mini.utils import create_head_pose # ------------------------------------------------------------------- # 1. Configuration # ------------------------------------------------------------------- AUDIO_SAMPLE_RATE = 16000 # respeaker samplerate # Audio queue configuration MAX_AUDIO_QUEUE_SIZE = 2 # Movement step sizes NUDGE_ANGLE = 5.0 # degrees for head roll / yaw NUDGE_BODY = 0.3 # degrees for body_yaw NUDGE_PITCH = 5.0 # degrees for pitch # Video loop timing FRAME_SLEEP_S = 0.02 # TURN config TURN_TTL_SERVER_MS = 360_000 USE_VIDEO_WEBRTC = True turn_credentials = None# get_cloudflare_turn_credentials() server_turn_credentials = None# get_cloudflare_turn_credentials(ttl=TURN_TTL_SERVER_MS) # ------------------------------------------------------------------- # 2. Data Models # ------------------------------------------------------------------- @dataclass class Movement: name: str x: float = 0 y: float = 0 z: float = 0 roll: float = 0 pitch: float = 0 yaw: float = 0 body_yaw: float = 0 left_antenna: Optional[float] = None right_antenna: Optional[float] = None duration: float = 1.0 # ------------------------------------------------------------------- # 3. Global State # ------------------------------------------------------------------- class GlobalState: """ Singleton-style class to manage shared state between FastAPI (WebSockets) and Gradio (UI). """ def __init__(self): # Connection handles self.robot_ws: Optional[WebSocket] = None self.robot_loop: Optional[asyncio.AbstractEventLoop] = None # Video Stream Data self.frame_lock = threading.Lock() self.black_frame = np.zeros((640, 640, 3), dtype=np.uint8) _, buffer = cv2.imencode(".jpg", self.black_frame) self.latest_frame_bytes = buffer.tobytes() self.latest_frame_ts = time.time() # Audio from robot -> browser # Queue of (sample_rate: int, audio_bytes: bytes) self.audio_queue: "queue.Queue[Tuple[int, bytes]]" = queue.Queue() # Audio from operator -> robot self.audio_to_robot_queue: "queue.Queue[bytes]" = queue.Queue() # Live pose state self.pose_lock = threading.Lock() self.current_pose = Movement( name="Current", x=0, y=0, z=0, roll=0, pitch=0, yaw=0, body_yaw=0, left_antenna=0, right_antenna=0, duration=0.2, ) # --- Connection management --- def set_robot_connection(self, ws: WebSocket, loop: asyncio.AbstractEventLoop) -> None: self.robot_ws = ws self.robot_loop = loop def clear_robot_connection(self) -> None: self.robot_ws = None self.robot_loop = None # --- Video --- def update_frame(self, frame_bytes: bytes) -> None: with self.frame_lock: self.latest_frame_bytes = frame_bytes self.latest_frame_ts = time.time() # --- Audio queues --- @staticmethod def _push_bounded(q: queue.Queue, item, max_size: int, description: str) -> None: while q.qsize() >= max_size: try: dropped = q.get_nowait() del dropped print(f"Dropping oldest audio {description}, queue size is {q.qsize()}") except queue.Empty: break q.put(item) def push_audio_from_robot(self, audio_bytes: bytes) -> None: self._push_bounded( self.audio_queue, (AUDIO_SAMPLE_RATE, audio_bytes), MAX_AUDIO_QUEUE_SIZE, "FROM robot", ) def push_audio_to_robot(self, audio_bytes: bytes) -> None: self._push_bounded( self.audio_to_robot_queue, audio_bytes, MAX_AUDIO_QUEUE_SIZE, "TO robot", ) def get_audio_to_robot_blocking(self) -> bytes: try: return self.audio_to_robot_queue.get(timeout=0.2) except queue.Empty: return None # --- Status --- def get_connection_status(self) -> str: return "â Robot Connected" if self.robot_ws else "đ´ Waiting for Robot..." # --- Pose management --- def update_pose( self, dx: float = 0, dy: float = 0, dz: float = 0, droll: float = 0, dpitch: float = 0, dyaw: float = 0, dbody_yaw: float = 0, ) -> Movement: with self.pose_lock: p = self.current_pose new = Movement( name="Current", x=p.x + dx, y=p.y + dy, z=p.z + dz, roll=p.roll + droll, pitch=p.pitch + dpitch, yaw=p.yaw + dyaw, body_yaw=p.body_yaw + dbody_yaw, left_antenna=p.left_antenna, right_antenna=p.right_antenna, duration=0.4, ) # Clamp posed values new.pitch = float(np.clip(new.pitch, -30, 30)) new.yaw = float(np.clip(new.yaw, -180, 180)) new.roll = float(np.clip(new.roll, -40, 40)) new.body_yaw = float(np.clip(new.body_yaw, -3, 3)) new.z = float(np.clip(new.z, -20, 50)) new.x = float(np.clip(new.x, -50, 50)) new.y = float(np.clip(new.y, -50, 50)) self.current_pose = new return new def reset_pose(self) -> Movement: with self.pose_lock: self.current_pose = Movement( name="Current", x=0, y=0, z=0, roll=0, pitch=0, yaw=0, body_yaw=0, left_antenna=0, right_antenna=0, duration=0.3, ) return self.current_pose def get_pose_text(self) -> str: with self.pose_lock: p = self.current_pose return ( "Head position:\n" f" x={p.x:.1f}, y={p.y:.1f}, z={p.z:.1f}\n" f" roll={p.roll:.1f}, pitch={p.pitch:.1f}, yaw={p.yaw:.1f}\n" "Body:\n" f" body_yaw={p.body_yaw:.1f}" ) state = GlobalState() # ------------------------------------------------------------------- # 4. Robot commands # ------------------------------------------------------------------- def send_pose_to_robot(mov: Movement, msg: str = "Move sent"): if not (state.robot_ws and state.robot_loop): return state.get_pose_text(), "â ī¸ Robot not connected" pose = create_head_pose( x=mov.x, y=mov.y, z=mov.z, roll=mov.roll, pitch=mov.pitch, yaw=mov.yaw, degrees=True, mm=True, ) payload = { "type": "movement", "movement": { "head": pose.tolist(), "body_yaw": mov.body_yaw, "duration": mov.duration, }, } if mov.left_antenna is not None and mov.right_antenna is not None: payload["movement"]["antennas"] = [ np.deg2rad(mov.right_antenna), np.deg2rad(mov.left_antenna), ] asyncio.run_coroutine_threadsafe( state.robot_ws.send_json(payload), state.robot_loop, ) return state.get_pose_text(), f"â {msg}" # ------------------------------------------------------------------- # 5. Video streaming helpers # ------------------------------------------------------------------- def generate_mjpeg_stream(): last_timestamp = 0.0 while True: with state.frame_lock: current_bytes = state.latest_frame_bytes current_timestamp = state.latest_frame_ts if current_timestamp > last_timestamp and current_bytes is not None: last_timestamp = current_timestamp yield ( b"--frame\r\n" b"Content-Type: image/jpeg\r\n\r\n" + current_bytes + b"\r\n" ) else: time.sleep(FRAME_SLEEP_S) continue time.sleep(FRAME_SLEEP_S) def webrtc_video_generator(): """ Generator for FastRTC WebRTC (mode='receive', modality='video'). """ last_ts = 0.0 frame = state.black_frame.copy() while True: with state.frame_lock: ts = state.latest_frame_ts frame_bytes = state.latest_frame_bytes if ts > last_ts and frame_bytes: last_ts = ts np_bytes = np.frombuffer(frame_bytes, dtype=np.uint8) decoded = cv2.imdecode(np_bytes, cv2.IMREAD_COLOR) if decoded is not None: frame = decoded else: frame = state.black_frame.copy() yield frame # ------------------------------------------------------------------- # 6. FastAPI endpoints # ------------------------------------------------------------------- app = FastAPI() @app.websocket("/robot") async def robot_endpoint(ws: WebSocket): """Endpoint for the Robot to connect to (control channel).""" await ws.accept() state.set_robot_connection(ws, asyncio.get_running_loop()) print("[System] Robot Connected") try: while True: msg = await ws.receive() if msg.get("type") == "websocket.disconnect": break except (WebSocketDisconnect, Exception): print("[System] Robot Disconnected") finally: state.clear_robot_connection() @app.get("/video_feed") def video_feed(): return StreamingResponse( generate_mjpeg_stream(), media_type="multipart/x-mixed-replace; boundary=frame", ) @app.websocket("/video_stream") async def stream_endpoint(ws: WebSocket): """Endpoint for Robot/Sim to send video frames.""" await ws.accept() try: while True: msg = await ws.receive() data = msg.get("bytes") if data: state.update_frame(data) except asyncio.CancelledError: print("[Video] stream_endpoint cancelled") except Exception: print("[Video] stream_endpoint closed") finally: print("[Video] stream_endpoint closed") @app.websocket("/audio_stream") async def audio_endpoint(ws: WebSocket): """Full duplex audio channel between Robot/Sim and server.""" await ws.accept() print("[Audio] Stream Connected") async def robot_to_server(): try: while True: data = await ws.receive() t = data.get("type") if t == "websocket.disconnect": print("[Audio] Disconnected (recv)") break if t == "websocket.receive": if data.get("bytes"): state.push_audio_from_robot(data["bytes"]) elif data.get("text") == "ping": print("[Audio] Received ping") except asyncio.CancelledError: print("[Audio] robot_to_server cancelled") except Exception as e: print(f"[Audio] robot_to_server error: {e}") async def server_to_robot(): loop = asyncio.get_running_loop() try: while True: chunk: bytes = await loop.run_in_executor( None, state.get_audio_to_robot_blocking ) if chunk is not None: await ws.send_bytes(chunk) except asyncio.CancelledError: print("[Audio] server_to_robot cancelled") except Exception as e: print(f"[Audio] server_to_robot error: {e}") try: await asyncio.gather(robot_to_server(), server_to_robot()) except asyncio.CancelledError: print("[Audio] audio_endpoint cancelled") finally: print("[Audio] Stream Closed") # ------------------------------------------------------------------- # 7. FastRTC audio handler # ------------------------------------------------------------------- class RobotAudioHandler(StreamHandler): """ FastRTC handler that connects browser WebRTC audio to the robot. - receive(): browser mic -> state.audio_to_robot_queue -> /audio_stream -> robot - emit(): state.audio_queue (robot) -> browser playback """ def __init__(self) -> None: super().__init__( input_sample_rate=AUDIO_SAMPLE_RATE, output_sample_rate=AUDIO_SAMPLE_RATE, ) def receive(self, frame: Tuple[int, np.ndarray]) -> None: if frame is None: return sample_rate, array = frame if array is None: return arr = np.asarray(array) # Ensure mono if arr.ndim > 1: arr = arr[0] if arr.dtype != np.int16: if np.issubdtype(arr.dtype, np.floating): arr = np.clip(arr, -1.0, 1.0) arr = (arr * 32767.0).astype(np.int16) else: arr = arr.astype(np.int16) state.push_audio_to_robot(arr.tobytes()) def emit(self): try: sample_rate, frame_bytes = state.audio_queue.get(timeout=0.5) audio = np.frombuffer(frame_bytes, dtype=np.int16).reshape(1, -1) return sample_rate, audio except queue.Empty: return None def copy(self) -> "RobotAudioHandler": return RobotAudioHandler() def shutdown(self) -> None: pass def start_up(self) -> None: pass # ------------------------------------------------------------------- # 8. Movement UI helpers # ------------------------------------------------------------------- def get_pose_string(): """Returns pose in format JS can parse: pitch:X,yaw:Y,roll:Z,body:B""" with state.pose_lock: p = state.current_pose return f"pitch:{p.pitch:.1f},yaw:{p.yaw:.1f},roll:{p.roll:.1f},body:{p.body_yaw:.1f}" def nudge_pose(dpitch=0, dyaw=0, droll=0, dbody_yaw=0, label="Move"): """Modified to return pose string instead of tuple.""" mov = state.update_pose( dpitch=dpitch, dyaw=dyaw, droll=droll, dbody_yaw=dbody_yaw, ) send_pose_to_robot(mov, label) return get_pose_string() def center_pose(): """Modified to return pose string.""" mov = state.reset_pose() send_pose_to_robot(mov, "Reset pose") return get_pose_string() # ------------------------------------------------------------------- # 9. Gradio UI # ------------------------------------------------------------------- CUSTOM_CSS = """ /* Dark theme overrides */ .gradio-container { background: linear-gradient(135deg, #0a0a0f 0%, #121218 100%) !important; min-height: 100vh; } .dark { --background-fill-primary: #12121a !important; --background-fill-secondary: #1a1a24 !important; --border-color-primary: #2a2a3a !important; --text-color-subdued: #888 !important; } /* Header styling */ #header-row { background: transparent !important; border: none !important; margin-bottom: 1rem; display: flex !important; justify-content: space-between !important; align-items: center !important; } #app-title { font-size: 1.5rem !important; font-weight: 600 !important; background: linear-gradient(90deg, #fff, #888) !important; -webkit-background-clip: text !important; -webkit-text-fill-color: transparent !important; border: none !important; padding: 0 !important; margin: 0 !important; } /* Status badge */ #status-box { flex-shrink: 0 !important; width: auto !important; max-width: 200px !important; min-width: 160px !important; background: rgba(16, 185, 129, 0.15) !important; border: 1px solid rgba(16, 185, 129, 0.4) !important; border-radius: 9999px !important; padding: 0.4rem 1rem !important; font-size: 0.875rem !important; } #status-box textarea { background: transparent !important; border: none !important; color: #10b981 !important; text-align: center !important; font-weight: 500 !important; padding: 0 !important; min-height: unset !important; height: auto !important; line-height: 1.4 !important; } /* Video panel */ #video-column { background: #0f0f14 !important; border-radius: 1rem !important; border: 1px solid #2a2a3a !important; overflow: hidden !important; min-height: 500px !important; } #robot-video { border-radius: 0.75rem !important; overflow: hidden !important; } /* Control panel cards */ .control-card { background: rgba(26, 26, 36, 0.8) !important; border: 1px solid #2a2a3a !important; border-radius: 0.75rem !important; padding: 1rem !important; } /* Audio section */ #audio-section { background: rgba(26, 26, 36, 0.8) !important; border: 1px solid #2a2a3a !important; border-radius: 0.75rem !important; } #listen-btn { background: rgba(139, 92, 246, 0.2) !important; border: 1px solid rgba(139, 92, 246, 0.3) !important; color: #a78bfa !important; border-radius: 0.5rem !important; transition: all 0.2s !important; } #listen-btn:hover { background: rgba(139, 92, 246, 0.3) !important; } /* Hide the default keyboard buttons */ #keyboard-buttons { display: none !important; } /* Quick action buttons */ .quick-btn { background: #1f1f2e !important; border: 1px solid #2a2a3a !important; border-radius: 0.5rem !important; padding: 0.5rem !important; font-size: 0.75rem !important; transition: all 0.2s !important; } .quick-btn:hover { background: #2a2a3a !important; } /* Keyboard visualization container */ #keyboard-viz { position: fixed; bottom: 3.5rem; right: 2rem; z-index: 1000; pointer-events: none; } /* Gauges container */ #gauges-viz { position: fixed; bottom: 3.5rem; left: 2rem; z-index: 1000; pointer-events: none; } /* Hide Gradio footer or make room for it */ footer { opacity: 0.5; } /* Hidden pose state (keep in DOM for JS) */ #pose-state { position: absolute !important; opacity: 0 !important; pointer-events: none !important; height: 0 !important; overflow: hidden !important; } """ KEYBOARD_VIZ_HTML = """