Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# ElevenLabs ASR Python Extension

ElevenLabs Speech Recognition Extension based on ElevenLabs real-time speech-to-text API.

## Configuration Parameters

### Required Parameters

| Parameter | Type | Description |
|-----------|------|-------------|
| `api_key` | string | ElevenLabs API key, **must be provided** |

### Optional Parameters

| Parameter | Type | Default Value | Description |
|-----------|------|---------------|-------------|
| `ws_url` | string | `wss://api.elevenlabs.io/v1/speech-to-text/realtime` | WebSocket endpoint URL |
| `sample_rate` | int | `16000` | Audio sample rate |
| `audio_format` | string | `pcm_16000` | Audio format |
| `model_id` | string | `scribe_v2_realtime` | ElevenLabs model ID |
| `language_code` | string | `en` | Language code |
| `include_timestamps` | bool | `true` | Whether to include timestamps |
| `commit_strategy` | string | `manual` | Commit strategy |
| `enable_logging` | bool | `true` | Whether to enable logging |

## Configuration Examples

### Complete Configuration

```json
{
"params": {
"api_key": "your_elevenlabs_api_key_here",
"ws_url": "wss://api.elevenlabs.io/v1/speech-to-text/realtime",
"sample_rate": 16000,
"audio_format": "pcm_16000",
"model_id": "scribe_v2_realtime",
"language_code": "en",
"include_timestamps": true,
"commit_strategy": "manual",
"enable_logging": true
}
}
```
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from . import addon
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from ten_runtime import (
Addon,
register_addon_as_extension,
TenEnv,
)
from .extension import ElevenLabsASRExtension


@register_addon_as_extension("elevenlabs_asr_python")
class ElevenLabsASRExtensionAddon(Addon):
def on_create_instance(self, ten: TenEnv, addon_name: str, context) -> None:
ten.log_info("on_create_instance")
ten.on_create_instance_done(ElevenLabsASRExtension(addon_name), context)
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import asyncio
from typing import Optional

from ten_runtime import AsyncTenEnv


class AudioBufferManager:
"""
A minimal async audio buffer providing a producer-consumer queue:
- Producer appends bytes via async `push_audio`
- Consumer reads fixed-size bytes via async `pull_chunk` (size = threshold)

Close behavior:
- After `close()`, a waiting `pull_chunk` will return the remaining bytes if
they are less than the threshold; if no bytes remain, it returns b"" (EOF).
"""

def __init__(
self, ten_env: Optional[AsyncTenEnv] = None, threshold: int = 1600
):
if not isinstance(threshold, int) or threshold <= 0:
raise ValueError("threshold must be a positive integer")

self._buffer = bytearray()
self._threshold = threshold
self._ten_env = ten_env

# Concurrency control
self._cond = asyncio.Condition()
self._closed = False

if self._ten_env:
self._ten_env.log_debug(
f"AudioBufferManager initialized. threshold={self._threshold}"
)

# -------------------- Producer API --------------------
async def push_audio(self, data: bytes) -> None:
"""Append audio bytes into the buffer asynchronously."""
if not isinstance(data, (bytes, bytearray)):
raise TypeError("data must be bytes or bytearray")
async with self._cond:
self._buffer.extend(data)
if not self._closed:
self._cond.notify_all()

# -------------------- Consumer API --------------------
async def pull_chunk(self) -> bytes:
"""
Retrieve one chunk asynchronously:
- If buffer size >= threshold, return exactly `threshold` bytes.
- If closed and remaining bytes < threshold, return the remaining bytes
(may be empty to indicate EOF).
"""
async with self._cond:
await self._cond.wait_for(
lambda: len(self._buffer) >= self._threshold or self._closed
)

if self._closed:
if self._buffer:
remaining = bytes(self._buffer)
self._buffer.clear()
if self._ten_env:
self._ten_env.log_debug(
f"pull_chunk: return tail {len(remaining)} bytes on close"
)
return remaining
if self._ten_env:
self._ten_env.log_debug("pull_chunk: EOF (empty on close)")
return b""

if len(self._buffer) >= self._threshold:
chunk = bytes(self._buffer[: self._threshold])
del self._buffer[: self._threshold]
return chunk

return b""

# -------------------- Utility API --------------------

def close(self) -> None:
"""Mark as closed and wake up any waiting consumers."""

# Non-async method for convenience in any context
async def _close():
async with self._cond:
self._closed = True
if self._ten_env:
self._ten_env.log_debug("AudioBufferManager closed")
self._cond.notify_all()

# If inside an event loop, schedule it; otherwise run a new loop to avoid blocking
try:
loop = asyncio.get_running_loop()
loop.create_task(_close())
except RuntimeError:
asyncio.run(_close())
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from typing import Any, Dict
from pydantic import BaseModel, Field
from ten_ai_base.utils import encrypt


class ElevenLabsASRConfig(BaseModel):
"""ElevenLabs ASR Configuration"""

# Debugging and dumping
dump: bool = False
dump_path: str = "/tmp"

# Additional parameters
params: Dict[str, Any] = Field(default_factory=dict)

def update(self, params: Dict[str, Any]) -> None:
"""Update configuration with additional parameters."""
for key, value in params.items():
if hasattr(self, key):
setattr(self, key, value)

def to_json(self) -> str:
"""Convert config to JSON string."""
config_dict = self.model_dump()
if config_dict["params"]:
for key, value in config_dict["params"].items():
if key == "api_key":
config_dict["params"][key] = encrypt(value)
return str(config_dict)

@property
def normalized_language(self) -> str:
"""Convert language code to normalized format for ElevenLabs"""
# ElevenLabs uses ISO 639-1 language codes
language_map = {
"zh": "zh-CN",
"en": "en-US",
"ja": "ja-JP",
"ko": "ko-KR",
"de": "de-DE",
"fr": "fr-FR",
"ru": "ru-RU",
"es": "es-ES",
"pt": "pt-PT",
"it": "it-IT",
}
params_dict = self.params if isinstance(self.params, dict) else {}
return language_map.get(params_dict.get("language_code", ""), "en-US")
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
DUMP_FILE_NAME = "elevenlabs_asr_in.pcm"
TIMEOUT_CODE = 10105
Loading
Loading