Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ This project _loosely_ adheres to [Semantic Versioning](https://semver.org/spec/
- Added ability to handle multiple agents in launcher. [PR#459](https://github.com/coasys/ad4m/pull/459)
- Added a way to show & add new `AgentInfo` in launcher. [PR#463](https://github.com/coasys/ad4m/pull/463)
- `ad4m-executor` binary prints capability request challange to stdout to enable app hand-shake [PR#471](https://github.com/coasys/ad4m/pull/471)
- Add ability to select Whisper model size [PR#564](https://github.com/coasys/ad4m/pull/564)

### Changed
- Much improved ADAM Launcher setup flow [PR#440](https://github.com/coasys/ad4m/pull/440) and [PR#444](https://github.com/coasys/ad4m/pull/444):
Expand Down
82 changes: 72 additions & 10 deletions rust-executor/src/ai_service/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ impl AIService {
match model.model_type {
ModelType::Llm => self.spawn_llm_model(model, None).await?,
ModelType::Embedding => self.spawn_embedding_model(model).await,
ModelType::Transcription => Self::load_transcriber_model(&model).await,
ModelType::Transcription => Self::load_transcriber_model(model.id.clone()).await,
};
Ok(())
}
Expand Down Expand Up @@ -868,7 +868,66 @@ impl AIService {
// Whisper / Transcription
// -------------------------------------

pub async fn open_transcription_stream(&self, _model_id: String) -> Result<String> {
fn whisper_string_to_model(whisper_string: String) -> Result<WhisperSource> {
match whisper_string.as_str() {
"whisper_tiny" => Ok(WhisperSource::Tiny),
"whisper_tiny_quantized" => Ok(WhisperSource::QuantizedTiny),
"whisper_tiny_en" => Ok(WhisperSource::TinyEn),
"whisper_tiny_en_quantized" => Ok(WhisperSource::QuantizedTinyEn),
"whisper_base" => Ok(WhisperSource::Base),
"whisper_base_en" => Ok(WhisperSource::BaseEn),
"whisper_small" => Ok(WhisperSource::Small),
"whisper_small_en" => Ok(WhisperSource::SmallEn),
"whisper_medium" => Ok(WhisperSource::Medium),
"whisper_medium_en" => Ok(WhisperSource::MediumEn),
"whisper_medium_en_quantized_distil" => Ok(WhisperSource::QuantizedDistilMediumEn),
"whisper_large" => Ok(WhisperSource::Large),
"whisper_large_v2" => Ok(WhisperSource::LargeV2),
"whisper_distil_medium_en" => Ok(WhisperSource::DistilMediumEn),
"whisper_distil_large_v2" => Ok(WhisperSource::DistilLargeV2),
"whisper_distil_large_v3" => Ok(WhisperSource::DistilLargeV3),
"whisper_distil_large_v3_quantized" => Ok(WhisperSource::QuantizedDistilLargeV3),
"whisper_large_v3_turbo_quantized" => Ok(WhisperSource::QuantizedLargeV3Turbo),
_ => Err(anyhow!("Unknown whisper model: {}", whisper_string)),
}
}

fn get_whisper_model_size(model_id: String) -> Result<WhisperSource> {
// Try to treat string as model size string first
if let Ok(model) = Self::whisper_string_to_model(model_id.clone()) {
return Ok(model);
}

// Try to get model from DB by ID
if let Ok(Some(model)) = Ad4mDb::with_global_instance(|db| db.get_model(model_id.clone())) {
if model.model_type != ModelType::Transcription {
return Err(anyhow!("Model '{}' is not a transcription model", model_id));
}
// Use filename from local model config
if let Some(local) = model.local {
return Self::whisper_string_to_model(local.file_name);
}
}

// if nothing above works, see if we have a transcription model in the DB and use that
// Try to find first transcription model in DB
if let Ok(models) = Ad4mDb::with_global_instance(|db| db.get_models()) {
if let Some(model) = models
.into_iter()
.find(|m| m.model_type == ModelType::Transcription)
{
if let Some(local) = model.local {
return Self::whisper_string_to_model(local.file_name);
}
}
}

// Default to tiny if nothing found
Ok(WhisperSource::Tiny)
}

pub async fn open_transcription_stream(&self, model_id: String) -> Result<String> {
let model_size = Self::get_whisper_model_size(model_id)?;
let stream_id = uuid::Uuid::new_v4().to_string();
let stream_id_clone = stream_id.clone();
let (samples_tx, samples_rx) = futures_channel::mpsc::unbounded::<Vec<f32>>();
Expand All @@ -881,7 +940,7 @@ impl AIService {

rt.block_on(async {
let maybe_model = WhisperBuilder::default()
.with_source(WHISPER_MODEL)
.with_source(model_size)
.with_device(Self::new_candle_device())
.build()
.await;
Expand Down Expand Up @@ -974,22 +1033,25 @@ impl AIService {
}
}

async fn load_transcriber_model(model: &crate::types::Model) {
let id = &model.id;
publish_model_status(id.clone(), 0.0, "Loading", false, false).await;
async fn load_transcriber_model(model_id: String) {
publish_model_status(model_id.clone(), 0.0, "Loading", false, false).await;

let model_size = Self::get_whisper_model_size(model_id.clone())
.ok()
.unwrap_or(WHISPER_MODEL);

let _ = WhisperBuilder::default()
.with_source(WHISPER_MODEL)
.with_source(model_size)
.with_device(Self::new_candle_device())
.build_with_loading_handler({
let name = id.clone();
let name = model_id.clone();
move |progress| {
tokio::spawn(handle_progress(name.clone(), progress));
}
})
.await;

publish_model_status(id.clone(), 100.0, "Loaded", true, false).await;
publish_model_status(model_id.clone(), 100.0, "Loaded", true, false).await;
}

pub async fn update_model(&self, model_id: String, model_config: ModelInput) -> Result<()> {
Expand Down Expand Up @@ -1060,7 +1122,7 @@ impl AIService {
// TODO: Handle embedding model updates
}
ModelType::Transcription => {
// TODO: Handle transcription model updates
Self::load_transcriber_model(updated_model.id.clone()).await;
}
}

Expand Down
162 changes: 100 additions & 62 deletions ui/src/components/Login.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -155,15 +155,13 @@ const Login = () => {
}

async function saveModels() {
let whisperModel = "whisper_small";
// add llm model
if (aiMode !== "None") {
const llm = { name: "LLM Model 1", modelType: "LLM" } as ModelInput;
if (aiMode === "Local") {
llm.local = {
fileName: "Qwen2.5.1-Coder-7B-Instruct",
tokenizerSource: "",
modelParameters: "",
};
llm.local = { fileName: "Qwen2.5.1-Coder-7B-Instruct" };
whisperModel = "whisper_large_v3_turbo_quantized";
} else {
llm.api = {
baseUrl: apiUrl,
Expand All @@ -179,23 +177,16 @@ const Login = () => {
// add embedding model
client!.ai.addModel({
name: "bert",
local: {
fileName: "bert",
tokenizerSource: "",
modelParameters: "",
},
local: { fileName: "bert" },
modelType: "EMBEDDING",
});
// add transcription model
// add medium whisper model
client!.ai.addModel({
name: "Transcription Model 1",
local: {
fileName: "whisper",
tokenizerSource: "",
modelParameters: "",
},
name: "Whisper",
local: { fileName: whisperModel },
modelType: "TRANSCRIPTION",
});

setCurrentIndex(6);
}

Expand Down Expand Up @@ -493,39 +484,6 @@ const Login = () => {
Is your computer capabale of running Large Language Models
locally?
</j-text>
<j-text>
Regardless of your choice here, we will always download and use
small AI models (such as{" "}
<a
onClick={() =>
open("https://huggingface.co/openai/whisper-small")
}
style={{ cursor: "pointer" }}
>
Whisper small
</a>{" "}
and an{" "}
<a
onClick={() =>
open(
"https://huggingface.co/Snowflake/snowflake-arctic-embed-xs"
)
}
style={{ cursor: "pointer" }}
>
Embedding model
</a>
) to handle basic tasks on all devices.
<br></br>
<br></br>
When it comes to LLMs, it depends on you having either an Apple
Silicon mac (M1 or better) or an nVidia GPU (with enough vRAM).
<br></br>
<br></br>
Alternatively, you can configure ADAM to out-source LLM tasks to a
remote API. If you unsure, you can select "None" now and add,
remove or change model settings later-on in the <b>AI tab</b>.
</j-text>
</j-flex>

<j-flex gap="400" style={{ padding: "0 10px" }}>
Expand Down Expand Up @@ -594,18 +552,43 @@ const Login = () => {
style={{ marginTop: 30, maxWidth: 350 }}
>
<j-text>
This will download{" "}
<a
onClick={() =>
open(
"https://huggingface.co/bartowski/Qwen2.5.1-Coder-7B-Instruct-GGUF"
)
}
style={{ cursor: "pointer" }}
>
Qwen2.5 Coder 7B Instruct
</a>
{" "}(4.68GB)
This will download
<p>
<a
onClick={() =>
open(
"https://huggingface.co/bartowski/Qwen2.5.1-Coder-7B-Instruct-GGUF"
)
}
style={{ cursor: "pointer" }}
>
Qwen2.5 Coder 7B Instruct (4.68GB)
</a>
</p>
and
<p>
<a
onClick={() =>
open(
"https://huggingface.co/openai/whisper-large-v3-turbo"
)
}
style={{ cursor: "pointer" }}
>Whisper large v3 turbo (809MB)</a>
</p>
and
<p>
<a
onClick={() =>
open(
"https://huggingface.co/Snowflake/snowflake-arctic-embed-xs"
)
}
style={{ cursor: "pointer" }}
>
Bert Embedding model (90MB)
</a>
</p>
</j-text>
</j-flex>
)}
Expand Down Expand Up @@ -690,6 +673,33 @@ const Login = () => {
)}
</j-flex>

<j-text>
This will still download
<p>
<a
onClick={() =>
open(
"https://huggingface.co/openai/whisper-small"
)
}
style={{ cursor: "pointer" }}
>Whisper small (244MB)</a>
</p>
and
<p>
<a
onClick={() =>
open(
"https://huggingface.co/Snowflake/snowflake-arctic-embed-xs"
)
}
style={{ cursor: "pointer" }}
>
Bert Embedding model (90MB)
</a>
</p>
</j-text>

{apiValid && (
<j-flex direction="column" a="center" gap="400">
<j-flex a="center" gap="400">
Expand Down Expand Up @@ -730,6 +740,7 @@ const Login = () => {
</j-text>
)}
</j-flex>

)}

{(!apiModelValid || !apiValid) && (
Expand Down Expand Up @@ -768,6 +779,33 @@ const Login = () => {
Selecting <b>None</b> here and not having any LLM configured
might result in new Synergy features not working in Flux...
</j-text>

<j-text>
This will still download
<p>
<a
onClick={() =>
open(
"https://huggingface.co/openai/whisper-small"
)
}
style={{ cursor: "pointer" }}
>Whisper small (244MB)</a>
</p>
and
<p>
<a
onClick={() =>
open(
"https://huggingface.co/Snowflake/snowflake-arctic-embed-xs"
)
}
style={{ cursor: "pointer" }}
>
Bert Embedding model (90MB)
</a>
</p>
</j-text>
</j-flex>
)}

Expand Down
5 changes: 5 additions & 0 deletions ui/src/components/ModelCard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ export default function ModelCard(props: {
</j-button>
</j-flex>
)}
{modelType == "TRANSCRIPTION" && (
<j-button variant="subtle" size="sm" onClick={editModel}>
<j-icon name="pencil-square" size="sm" />
</j-button>
)}
</j-flex>
</j-box>
<j-flex direction="column" gap="400">
Expand Down
Loading