Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions sgl-model-gateway/src/app_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ use tracing::{debug, info};

use crate::{
config::RouterConfig,
core::{ConnectionMode, JobQueue, LoadMonitor, WorkerRegistry, WorkerService},
core::{
ConnectionMode, JobQueue, LoadMonitor, WorkerRegistry, WorkerService, UNKNOWN_MODEL_ID,
},
data_connector::{
create_storage, ConversationItemStorage, ConversationStorage, ResponseStorage,
},
Expand Down Expand Up @@ -451,25 +453,27 @@ impl AppContextBuilder {

/// Create tokenizer registry and optionally load tokenizer
/// If a tokenizer is successfully loaded, it is registered with a key derived from
/// tokenizer_path or model_path (falling back to "unknown" if neither exists).
/// tokenizer_path or model_path (falling back to UNKNOWN_MODEL_ID if neither exists).
fn with_tokenizer_registry(mut self, config: &RouterConfig) -> Result<Self, String> {
// Create empty tokenizer registry
let registry = Arc::new(TokenizerRegistry::new());

// Try to load router-level tokenizer if path is provided
if let Some(tokenizer) = Self::maybe_tokenizer(config)? {
// Determine registration key: prefer tokenizer_path, then model_path, finally "unknown"
let tokenizer_key = config
// Determine registration key: prefer tokenizer_path, then model_path, finally UNKNOWN_MODEL_ID
let source = config
.tokenizer_path
.as_ref()
.or(config.model_path.as_ref())
.map(|s| s.as_str())
.unwrap_or("unknown");
.unwrap_or(UNKNOWN_MODEL_ID);

registry.register(tokenizer_key, tokenizer.clone());
let tokenizer_id = TokenizerRegistry::generate_id();
registry.register(&tokenizer_id, source, source, tokenizer.clone());
info!(
"Tokenizer loaded and registered with key '{}' (vocab_size: {})",
tokenizer_key,
"Tokenizer loaded and registered with name '{}' id={} (vocab_size: {})",
source,
tokenizer_id,
tokenizer.vocab_size()
);
}
Expand Down
62 changes: 59 additions & 3 deletions sgl-model-gateway/src/core/job_queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ use crate::{
app_context::AppContext,
config::{RouterConfig, RoutingMode},
core::steps::{
McpServerConfigRequest, WasmModuleConfigRequest, WasmModuleRemovalRequest,
WorkerRemovalRequest,
McpServerConfigRequest, TokenizerConfigRequest, TokenizerRemovalRequest,
WasmModuleConfigRequest, WasmModuleRemovalRequest, WorkerRemovalRequest,
},
mcp::McpConfig,
protocols::worker_spec::{JobStatus, WorkerConfigRequest, WorkerUpdateRequest},
Expand Down Expand Up @@ -53,6 +53,12 @@ pub enum Job {
RemoveWasmModule {
request: Box<WasmModuleRemovalRequest>,
},
AddTokenizer {
config: Box<TokenizerConfigRequest>,
},
RemoveTokenizer {
request: Box<TokenizerRemovalRequest>,
},
}

impl Job {
Expand All @@ -67,10 +73,12 @@ impl Job {
Job::RegisterMcpServer { .. } => "RegisterMcpServer",
Job::AddWasmModule { .. } => "AddWasmModule",
Job::RemoveWasmModule { .. } => "RemoveWasmModule",
Job::AddTokenizer { .. } => "AddTokenizer",
Job::RemoveTokenizer { .. } => "RemoveTokenizer",
}
}

/// Get worker URL, MCP server name, or WASM module identifier for logging and status tracking
/// Get worker URL, MCP server name, WASM module, or tokenizer identifier for logging and status tracking
pub fn worker_url(&self) -> &str {
match self {
Job::AddWorker { config } => &config.url,
Expand All @@ -81,6 +89,8 @@ impl Job {
Job::RegisterMcpServer { config } => &config.name,
Job::AddWasmModule { config } => &config.descriptor.name,
Job::RemoveWasmModule { request } => &request.uuid_string,
Job::AddTokenizer { config } => &config.id,
Job::RemoveTokenizer { request } => &request.id,
}
}
}
Expand Down Expand Up @@ -658,6 +668,52 @@ impl JobQueue {
)
.await
}
Job::AddTokenizer { config } => {
let engine = context
.workflow_engine
.get()
.ok_or_else(|| "Workflow engine not initialized".to_string())?;

let mut workflow_context = WorkflowContext::new(WorkflowInstanceId::new());
let config_arc: Arc<TokenizerConfigRequest> = Arc::new(*config.clone());
workflow_context.set_arc("tokenizer_config", config_arc);
workflow_context.set_arc("app_context", Arc::clone(context));

let instance_id = engine
.start_workflow(WorkflowId::new("tokenizer_registration"), workflow_context)
.await
.map_err(|e| {
format!("Failed to start tokenizer registration workflow: {:?}", e)
})?;

debug!(
"Started tokenizer registration workflow for '{}' id={} (instance: {})",
config.name, config.id, instance_id
);

// Allow up to 10 minutes for HuggingFace downloads
let timeout_duration = Duration::from_secs(600);

Self::wait_for_workflow_completion(
engine,
instance_id,
&config.id,
timeout_duration,
)
.await
}
Job::RemoveTokenizer { request } => {
// Tokenizer removal is synchronous and fast
if let Some(entry) = context.tokenizer_registry.remove_by_id(&request.id) {
info!(
"Successfully removed tokenizer '{}' (id: {})",
entry.name, entry.id
);
Ok(format!("Tokenizer '{}' removed successfully", entry.name))
} else {
Err(format!("Tokenizer with id '{}' not found", request.id))
}
}
}
}

Expand Down
7 changes: 2 additions & 5 deletions sgl-model-gateway/src/core/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,8 @@
//! - Workflow steps for multi-step operations
//! - Common utilities

/// Default model identifier used when no model is specified.
///
/// This constant should be used instead of hardcoded "unknown" strings
/// throughout the codebase for consistency.
pub const UNKNOWN_MODEL_ID: &str = "unknown";
// Re-export UNKNOWN_MODEL_ID from protocols for use throughout core
pub use crate::protocols::UNKNOWN_MODEL_ID;

pub mod circuit_breaker;
pub mod error;
Expand Down
7 changes: 6 additions & 1 deletion sgl-model-gateway/src/core/steps/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
//! - Worker management (registration, removal, updates)
//! - MCP server registration
//! - WASM module registration and removal
//! - Future: Tokenizer fetching, LoRA updates, etc.
//! - Tokenizer registration

pub mod mcp_registration;
pub mod tokenizer_registration;
pub mod wasm_module_registration;
pub mod wasm_module_removal;
pub mod worker;
Expand Down Expand Up @@ -66,6 +67,10 @@ pub use mcp_registration::{
create_mcp_registration_workflow, ConnectMcpServerStep, DiscoverMcpInventoryStep,
McpServerConfigRequest, RegisterMcpServerStep, ValidateRegistrationStep,
};
pub use tokenizer_registration::{
create_tokenizer_registration_workflow, LoadTokenizerStep, TokenizerConfigRequest,
TokenizerRemovalRequest, ValidateTokenizerConfigStep,
};
pub use wasm_module_registration::{
create_wasm_module_registration_workflow, CalculateHashStep, CheckDuplicateStep,
LoadWasmBytesStep, RegisterModuleStep, ValidateDescriptorStep, ValidateWasmComponentStep,
Expand Down
Loading
Loading