Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 2 additions & 93 deletions sgl-model-gateway/src/routers/grpc/pd_router.rs
Original file line number Diff line number Diff line change
@@ -1,27 +1,14 @@
use std::sync::Arc;

use async_trait::async_trait;
use axum::{
body::Body,
extract::Request,
http::{HeaderMap, StatusCode},
response::{IntoResponse, Response},
};
use axum::{http::HeaderMap, response::Response};
use tracing::debug;

use super::{context::SharedComponents, pipeline::RequestPipeline};
use crate::{
app_context::AppContext,
core::{ConnectionMode, WorkerRegistry, WorkerType},
protocols::{
chat::ChatCompletionRequest,
classify::ClassifyRequest,
completion::CompletionRequest,
embedding::EmbeddingRequest,
generate::GenerateRequest,
rerank::RerankRequest,
responses::{ResponsesGetParams, ResponsesRequest},
},
protocols::{chat::ChatCompletionRequest, generate::GenerateRequest},
routers::RouterTrait,
};

Expand Down Expand Up @@ -160,26 +147,6 @@ impl RouterTrait for GrpcPDRouter {
self
}

async fn health_generate(&self, _req: Request<Body>) -> Response {
(
StatusCode::NOT_IMPLEMENTED,
"Health generate not yet implemented for gRPC PD",
)
.into_response()
}

async fn get_server_info(&self, _req: Request<Body>) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}

async fn get_models(&self, _req: Request<Body>) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}

async fn get_model_info(&self, _req: Request<Body>) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}

async fn route_generate(
&self,
headers: Option<&HeaderMap>,
Expand All @@ -198,64 +165,6 @@ impl RouterTrait for GrpcPDRouter {
self.route_chat_impl(headers, body, model_id).await
}

async fn route_completion(
&self,
_headers: Option<&HeaderMap>,
_body: &CompletionRequest,
_model_id: Option<&str>,
) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}

async fn route_responses(
&self,
_headers: Option<&HeaderMap>,
_body: &ResponsesRequest,
_model_id: Option<&str>,
) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}

async fn get_response(
&self,
_headers: Option<&HeaderMap>,
_response_id: &str,
_params: &ResponsesGetParams,
) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}

async fn cancel_response(&self, _headers: Option<&HeaderMap>, _response_id: &str) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}

async fn route_embeddings(
&self,
_headers: Option<&HeaderMap>,
_body: &EmbeddingRequest,
_model_id: Option<&str>,
) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}

async fn route_classify(
&self,
_headers: Option<&HeaderMap>,
_body: &ClassifyRequest,
_model_id: Option<&str>,
) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}

async fn route_rerank(
&self,
_headers: Option<&HeaderMap>,
_body: &RerankRequest,
_model_id: Option<&str>,
) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}

fn router_type(&self) -> &'static str {
"grpc_pd"
}
Expand Down
64 changes: 1 addition & 63 deletions sgl-model-gateway/src/routers/grpc/router.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@ use std::sync::Arc;

use async_trait::async_trait;
use axum::{
body::Body,
extract::Request,
http::{HeaderMap, StatusCode},
http::HeaderMap,
response::{IntoResponse, Response},
};
use tracing::debug;
Expand All @@ -27,11 +25,7 @@ use crate::{
core::WorkerRegistry,
protocols::{
chat::ChatCompletionRequest,
classify::ClassifyRequest,
completion::CompletionRequest,
embedding::EmbeddingRequest,
generate::GenerateRequest,
rerank::RerankRequest,
responses::{ResponsesGetParams, ResponsesRequest},
},
routers::RouterTrait,
Expand Down Expand Up @@ -259,26 +253,6 @@ impl RouterTrait for GrpcRouter {
self
}

async fn health_generate(&self, _req: Request<Body>) -> Response {
(
StatusCode::NOT_IMPLEMENTED,
"Health generate not yet implemented for gRPC",
)
.into_response()
}

async fn get_server_info(&self, _req: Request<Body>) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}

async fn get_models(&self, _req: Request<Body>) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}

async fn get_model_info(&self, _req: Request<Body>) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}

async fn route_generate(
&self,
headers: Option<&HeaderMap>,
Expand All @@ -297,15 +271,6 @@ impl RouterTrait for GrpcRouter {
self.route_chat_impl(headers, body, model_id).await
}

async fn route_completion(
&self,
_headers: Option<&HeaderMap>,
_body: &CompletionRequest,
_model_id: Option<&str>,
) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}

async fn route_responses(
&self,
headers: Option<&HeaderMap>,
Expand All @@ -328,33 +293,6 @@ impl RouterTrait for GrpcRouter {
cancel_response_impl(&self.responses_context, response_id).await
}

async fn route_embeddings(
&self,
_headers: Option<&HeaderMap>,
_body: &EmbeddingRequest,
_model_id: Option<&str>,
) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}

async fn route_classify(
&self,
_headers: Option<&HeaderMap>,
_body: &ClassifyRequest,
_model_id: Option<&str>,
) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}

async fn route_rerank(
&self,
_headers: Option<&HeaderMap>,
_body: &RerankRequest,
_model_id: Option<&str>,
) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}

fn router_type(&self) -> &'static str {
"grpc"
}
Expand Down
63 changes: 0 additions & 63 deletions sgl-model-gateway/src/routers/http/pd_router.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,10 @@ use crate::{
policies::{LoadBalancingPolicy, PolicyRegistry},
protocols::{
chat::{ChatCompletionRequest, ChatMessage, MessageContent},
classify::ClassifyRequest,
common::{InputIds, StringOrArray},
completion::CompletionRequest,
embedding::EmbeddingRequest,
generate::GenerateRequest,
rerank::RerankRequest,
responses::{ResponsesGetParams, ResponsesRequest},
},
routers::{header_utils, RouterTrait},
};
Expand Down Expand Up @@ -1190,66 +1187,6 @@ impl RouterTrait for PDRouter {
self.execute_dual_dispatch(headers, body, context).await
}

async fn route_responses(
&self,
_headers: Option<&HeaderMap>,
_body: &ResponsesRequest,
_model_id: Option<&str>,
) -> Response {
(
StatusCode::NOT_IMPLEMENTED,
"Responses endpoint not implemented for PD router",
)
.into_response()
}

async fn get_response(
&self,
_headers: Option<&HeaderMap>,
_response_id: &str,
_params: &ResponsesGetParams,
) -> Response {
(
StatusCode::NOT_IMPLEMENTED,
"Responses retrieve endpoint not implemented for PD router",
)
.into_response()
}

async fn cancel_response(&self, _headers: Option<&HeaderMap>, _response_id: &str) -> Response {
(
StatusCode::NOT_IMPLEMENTED,
"Responses cancel endpoint not implemented for PD router",
)
.into_response()
}

async fn route_classify(
&self,
_headers: Option<&HeaderMap>,
_body: &ClassifyRequest,
_model_id: Option<&str>,
) -> Response {
(
StatusCode::NOT_IMPLEMENTED,
"Classify endpoint not implemented for PD router",
)
.into_response()
}

async fn route_embeddings(
&self,
_headers: Option<&HeaderMap>,
_body: &EmbeddingRequest,
_model_id: Option<&str>,
) -> Response {
(
StatusCode::NOT_IMPLEMENTED,
"Embeddings endpoint not implemented for PD router",
)
.into_response()
}

async fn route_rerank(
&self,
headers: Option<&HeaderMap>,
Expand Down
Loading
Loading