TEN-framework · plutoless · Nov 24, 2025 · Nov 21, 2025 · Nov 21, 2025 · Nov 21, 2025
@@ -0,0 +1,44 @@
+# ElevenLabs ASR Python Extension
+
+ElevenLabs Speech Recognition Extension based on ElevenLabs real-time speech-to-text API.
+
+## Configuration Parameters
+
+### Required Parameters
+
+| Parameter | Type | Description |
+|-----------|------|-------------|
+| `api_key` | string | ElevenLabs API key, **must be provided** |
+
+### Optional Parameters
+
+| Parameter | Type | Default Value | Description |
+|-----------|------|---------------|-------------|
+| `ws_url` | string | `wss://api.elevenlabs.io/v1/speech-to-text/realtime` | WebSocket endpoint URL |
+| `sample_rate` | int | `16000` | Audio sample rate |
+| `audio_format` | string | `pcm_16000` | Audio format |
+| `model_id` | string | `scribe_v2_realtime` | ElevenLabs model ID |
+| `language_code` | string | `en` | Language code |
+| `include_timestamps` | bool | `true` | Whether to include timestamps |
+| `commit_strategy` | string | `manual` | Commit strategy |
+| `enable_logging` | bool | `true` | Whether to enable logging |
+
+## Configuration Examples
+
+### Complete Configuration
+
+```json
+{
+    "params": {
+        "api_key": "your_elevenlabs_api_key_here",
+        "ws_url": "wss://api.elevenlabs.io/v1/speech-to-text/realtime",
+        "sample_rate": 16000,
+        "audio_format": "pcm_16000",
+        "model_id": "scribe_v2_realtime",
+        "language_code": "en",
+        "include_timestamps": true,
+        "commit_strategy": "manual",
+        "enable_logging": true
+    }
+}
+```
@@ -0,0 +1 @@
+from . import addon
@@ -0,0 +1,13 @@
+from ten_runtime import (
+    Addon,
+    register_addon_as_extension,
+    TenEnv,
+)
+from .extension import ElevenLabsASRExtension
+
+
+@register_addon_as_extension("elevenlabs_asr_python")
+class ElevenLabsASRExtensionAddon(Addon):
+    def on_create_instance(self, ten: TenEnv, addon_name: str, context) -> None:
+        ten.log_info("on_create_instance")
+        ten.on_create_instance_done(ElevenLabsASRExtension(addon_name), context)
@@ -0,0 +1,98 @@
+import asyncio
+from typing import Optional
+
+from ten_runtime import AsyncTenEnv
+
+
+class AudioBufferManager:
+    """
+    A minimal async audio buffer providing a producer-consumer queue:
+    - Producer appends bytes via async `push_audio`
+    - Consumer reads fixed-size bytes via async `pull_chunk` (size = threshold)
+
+    Close behavior:
+    - After `close()`, a waiting `pull_chunk` will return the remaining bytes if
+      they are less than the threshold; if no bytes remain, it returns b"" (EOF).
+    """
+
+    def __init__(
+        self, ten_env: Optional[AsyncTenEnv] = None, threshold: int = 1600
+    ):
+        if not isinstance(threshold, int) or threshold <= 0:
+            raise ValueError("threshold must be a positive integer")
+
+        self._buffer = bytearray()
+        self._threshold = threshold
+        self._ten_env = ten_env
+
+        # Concurrency control
+        self._cond = asyncio.Condition()
+        self._closed = False
+
+        if self._ten_env:
+            self._ten_env.log_debug(
+                f"AudioBufferManager initialized. threshold={self._threshold}"
+            )
+
+    # -------------------- Producer API --------------------
+    async def push_audio(self, data: bytes) -> None:
+        """Append audio bytes into the buffer asynchronously."""
+        if not isinstance(data, (bytes, bytearray)):
+            raise TypeError("data must be bytes or bytearray")
+        async with self._cond:
+            self._buffer.extend(data)
+            if not self._closed:
+                self._cond.notify_all()
+
+    # -------------------- Consumer API --------------------
+    async def pull_chunk(self) -> bytes:
+        """
+        Retrieve one chunk asynchronously:
+        - If buffer size >= threshold, return exactly `threshold` bytes.
+        - If closed and remaining bytes < threshold, return the remaining bytes
+          (may be empty to indicate EOF).
+        """
+        async with self._cond:
+            await self._cond.wait_for(
+                lambda: len(self._buffer) >= self._threshold or self._closed
+            )
+
+            if self._closed:
+                if self._buffer:
+                    remaining = bytes(self._buffer)
+                    self._buffer.clear()
+                    if self._ten_env:
+                        self._ten_env.log_debug(
+                            f"pull_chunk: return tail {len(remaining)} bytes on close"
+                        )
+                    return remaining
+                if self._ten_env:
+                    self._ten_env.log_debug("pull_chunk: EOF (empty on close)")
+                return b""
+
+            if len(self._buffer) >= self._threshold:
+                chunk = bytes(self._buffer[: self._threshold])
+                del self._buffer[: self._threshold]
+                return chunk
+
+            return b""
+
+    # -------------------- Utility API --------------------
+
+    def close(self) -> None:
+        """Mark as closed and wake up any waiting consumers."""
+
+        # Non-async method for convenience in any context
+        async def _close():
+            async with self._cond:
+                self._closed = True
+                if self._ten_env:
+                    self._ten_env.log_debug("AudioBufferManager closed")
+                self._cond.notify_all()
+
+        # If inside an event loop, schedule it; otherwise run a new loop to avoid blocking
+        try:
+            loop = asyncio.get_running_loop()
+            loop.create_task(_close())
+        except RuntimeError:
+            asyncio.run(_close())
@@ -0,0 +1,48 @@
+from typing import Any, Dict
+from pydantic import BaseModel, Field
+from ten_ai_base.utils import encrypt
+
+
+class ElevenLabsASRConfig(BaseModel):
+    """ElevenLabs ASR Configuration"""
+
+    # Debugging and dumping
+    dump: bool = False
+    dump_path: str = "/tmp"
+
+    # Additional parameters
+    params: Dict[str, Any] = Field(default_factory=dict)
+
+    def update(self, params: Dict[str, Any]) -> None:
+        """Update configuration with additional parameters."""
+        for key, value in params.items():
+            if hasattr(self, key):
+                setattr(self, key, value)
+
+    def to_json(self) -> str:
+        """Convert config to JSON string."""
+        config_dict = self.model_dump()
+        if config_dict["params"]:
+            for key, value in config_dict["params"].items():
+                if key == "api_key":
+                    config_dict["params"][key] = encrypt(value)
+        return str(config_dict)
+
+    @property
+    def normalized_language(self) -> str:
+        """Convert language code to normalized format for ElevenLabs"""
+        # ElevenLabs uses ISO 639-1 language codes
+        language_map = {
+            "zh": "zh-CN",
+            "en": "en-US",
+            "ja": "ja-JP",
+            "ko": "ko-KR",
+            "de": "de-DE",
+            "fr": "fr-FR",
+            "ru": "ru-RU",
+            "es": "es-ES",
+            "pt": "pt-PT",
+            "it": "it-IT",
+        }
+        params_dict = self.params if isinstance(self.params, dict) else {}
+        return language_map.get(params_dict.get("language_code", ""), "en-US")
@@ -0,0 +1,2 @@
+DUMP_FILE_NAME = "elevenlabs_asr_in.pcm"
+TIMEOUT_CODE = 10105
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		DUMP_FILE_NAME = "elevenlabs_asr_in.pcm"
		TIMEOUT_CODE = 10105