Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,12 @@ RUN CMAKE_VERSION=3.31.1 \
&& cp -r "${CMAKE_INSTALLER}/share/"* /usr/local/share/ \
&& rm -rf "${CMAKE_INSTALLER}" "${CMAKE_INSTALLER}.tar.gz"

# Install S3 dependencies
RUN python3 -m pip install \
boto3 \
runai-model-streamer \
runai-model-streamer-s3

# Install just
RUN curl --proto '=https' --tlsv1.2 --retry 3 --retry-delay 2 -sSf https://just.systems/install.sh | \
sed "s|https://github.com|https://${GITHUB_ARTIFACTORY}|g" | \
Expand Down
9 changes: 6 additions & 3 deletions python/sglang/srt/configs/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ def __init__(
model_impl: Union[str, ModelImpl] = ModelImpl.AUTO,
sampling_defaults: str = "openai",
quantize_and_serve: bool = False,
device: Optional[str] = None,
Copy link
Collaborator

@b8zhong b8zhong Nov 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way to avoid adding this to model config? @ganochenkodg

Edit: disregard this comment.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ganochenkodg, why would you need this device config?

Copy link
Author

@ganochenkodg ganochenkodg Nov 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hnyls2002 In ModelConfig init function we run self._maybe_pull_model_tokenizer_from_remote() (line 116) that executes create_remote_connector, which requires 2 params - model_path and device (line 812). This device variable i can take in from_server_args method (line 240) and must store somewhere to pass in create_remote_connector.

I can remove storing device as a model parameter and pass it to pull_model_tokenizer function directly, like this:
- self.device = device
- self._maybe_pull_model_tokenizer_from_remote()
+ self._maybe_pull_model_tokenizer_from_remote(device)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please do it, the device parameter should not be stored in model config.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hnyls2002 done, check it now please

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ganochenkodg It still seems. tobe in ModelConfig?

Image

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@b8zhong i left it in parameters, because i have no choice. we doesn't store store it in this.device now, only pass next. ModelConfig init function calls self._maybe_pull_model_tokenizer_from_remote (here) -> which calls client_remote_connector (here) whioch requires device as a parameter. so i added passing device parameter to init function in from_server_args (here), i can't find any other way to take "device" other than passing it here. without device create_remote_connector obviously crashes

) -> None:
# Parse args
self.model_path = model_path
Expand All @@ -124,7 +125,7 @@ def __init__(
self._validate_quantize_and_serve_config()

# Get hf config
self._maybe_pull_model_tokenizer_from_remote()
self._maybe_pull_model_tokenizer_from_remote(device)
self.model_override_args = json.loads(model_override_args)
kwargs = {}
if override_config_file and override_config_file.strip():
Expand All @@ -134,6 +135,7 @@ def __init__(
trust_remote_code=trust_remote_code,
revision=revision,
model_override_args=self.model_override_args,
device=device,
**kwargs,
)
self.hf_text_config = get_hf_text_config(self.hf_config)
Expand Down Expand Up @@ -257,6 +259,7 @@ def from_server_args(
model_impl=server_args.model_impl,
sampling_defaults=server_args.sampling_defaults,
quantize_and_serve=server_args.quantize_and_serve,
device=server_args.device,
override_config_file=server_args.decrypted_config_file,
**kwargs,
)
Expand Down Expand Up @@ -837,7 +840,7 @@ def get_default_sampling_params(self) -> dict[str, Any]:

return default_sampling_params

def _maybe_pull_model_tokenizer_from_remote(self) -> None:
def _maybe_pull_model_tokenizer_from_remote(self, device: str) -> None:
"""
Pull the model config files to a temporary
directory in case of remote.
Expand All @@ -854,7 +857,7 @@ def _maybe_pull_model_tokenizer_from_remote(self) -> None:
# BaseConnector implements __del__() to clean up the local dir.
# Since config files need to exist all the time, so we DO NOT use
# with statement to avoid closing the client.
client = create_remote_connector(self.model_path)
client = create_remote_connector(self.model_path, device)
if is_remote_url(self.model_path):
client.pull_files(allow_pattern=["*config.json"])
self.model_weights = self.model_path
Expand Down
12 changes: 12 additions & 0 deletions python/sglang/srt/connector/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import enum
import logging
import os

from sglang.srt.connector.base_connector import (
BaseConnector,
Expand All @@ -23,6 +24,7 @@ class ConnectorType(str, enum.Enum):


def create_remote_connector(url, device, **kwargs) -> BaseConnector:
url = verify_if_url_is_gcs_bucket(url)
connector_type = parse_connector_type(url)
if connector_type == "redis":
return RedisConnector(url)
Expand All @@ -34,6 +36,16 @@ def create_remote_connector(url, device, **kwargs) -> BaseConnector:
raise ValueError(f"Invalid connector type: {url}")


def verify_if_url_is_gcs_bucket(url):
if url.startswith("gs://"):
os.environ["RUNAI_STREAMER_S3_ENDPOINT"] = "https://storage.googleapis.com"
os.environ["AWS_ENDPOINT_URL"] = "https://storage.googleapis.com"
os.environ["RUNAI_STREAMER_S3_USE_VIRTUAL_ADDRESSING"] = "0"
os.environ["AWS_EC2_METADATA_DISABLED"] = "true"
url = url.replace("gs://", "s3://", 1)
return url


def get_connector_type(client: BaseConnector) -> ConnectorType:
if isinstance(client, BaseKVConnector):
return ConnectorType.KV
Expand Down
4 changes: 3 additions & 1 deletion python/sglang/srt/connector/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,9 @@ def pull_files(
return

for file in files:
destination_file = os.path.join(self.local_dir, file.removeprefix(base_dir))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

The original code file.removeprefix(base_dir) can lead to incorrect file paths if base_dir does not end with a /. This can cause files to be saved in the root directory instead of the intended subdirectory. The corrected code file.removeprefix(f"{base_dir}/") ensures that the prefix always includes the trailing slash, preventing this issue.

It's important to ensure that base_dir always ends with a / to avoid unexpected behavior. If base_dir is guaranteed to always end with a /, then the change is correct. However, if there's a possibility that base_dir might not end with a /, it would be safer to add a check to ensure that it does before calling removeprefix.

I'm marking this as critical because the bug causes all files to be saved in the root folder, which is a major issue.

Suggested change
destination_file = os.path.join(self.local_dir, file.removeprefix(base_dir))
destination_file = os.path.join(self.local_dir, file.removeprefix(f"{base_dir}/"))

destination_file = os.path.join(
self.local_dir, file.removeprefix(f"{base_dir}/")
)
local_dir = Path(destination_file).parent
os.makedirs(local_dir, exist_ok=True)
self.client.download_file(bucket_name, file, destination_file)
Expand Down
1 change: 1 addition & 0 deletions python/sglang/srt/managers/tokenizer_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ def __init__(
tokenizer_mode=server_args.tokenizer_mode,
trust_remote_code=server_args.trust_remote_code,
revision=server_args.revision,
device=server_args.device,
)
self._initialize_multi_item_delimiter_text()

Expand Down
6 changes: 4 additions & 2 deletions python/sglang/srt/utils/hf_transformers_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ def get_config(
trust_remote_code: bool,
revision: Optional[str] = None,
model_override_args: Optional[dict] = None,
device: Optional[str] = None,
**kwargs,
):
is_gguf = check_gguf_file(model)
Expand All @@ -249,7 +250,7 @@ def get_config(
# BaseConnector implements __del__() to clean up the local dir.
# Since config files need to exist all the time, so we DO NOT use
# with statement to avoid closing the client.
client = create_remote_connector(model)
client = create_remote_connector(model, device)
client.pull_files(ignore_pattern=["*.pt", "*.safetensors", "*.bin"])
model = client.get_local_dir()

Expand Down Expand Up @@ -411,6 +412,7 @@ def get_tokenizer(
tokenizer_mode: str = "auto",
trust_remote_code: bool = False,
tokenizer_revision: Optional[str] = None,
device: Optional[str] = None,
**kwargs,
) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]:
"""Gets a tokenizer for the given model name via Huggingface."""
Expand All @@ -437,7 +439,7 @@ def get_tokenizer(
# BaseConnector implements __del__() to clean up the local dir.
# Since config files need to exist all the time, so we DO NOT use
# with statement to avoid closing the client.
client = create_remote_connector(tokenizer_name)
client = create_remote_connector(tokenizer_name, device)
client.pull_files(ignore_pattern=["*.pt", "*.safetensors", "*.bin"])
tokenizer_name = client.get_local_dir()

Expand Down
Loading