Skip to content
Merged
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
85da556
Fix phi4-mm prompt issue in bench_serving
ZailiWang Oct 15, 2025
3a6d149
update
ZailiWang Oct 15, 2025
d773b51
Merge branch 'main' of https://github.com/ZailiWang/sglang into mmmu-…
ZailiWang Oct 15, 2025
08aadf4
[temp] repo/branch name change
ZailiWang Oct 15, 2025
c3010eb
Merge branch 'main' of https://github.com/ZailiWang/sglang into mmmu-…
ZailiWang Oct 16, 2025
ed6a60d
Merge branch 'main' of https://github.com/ZailiWang/sglang into mmmu-…
ZailiWang Oct 17, 2025
b95f18f
Merge branch 'sgl-project:main' into mmmu-load
ZailiWang Oct 17, 2025
30dbf9a
change back repo/branch name
ZailiWang Oct 17, 2025
6abc3bc
Merge branch 'mmmu-load' of https://github.com/ZailiWang/sglang into …
ZailiWang Oct 17, 2025
6851fe6
gemini suggestion
ZailiWang Oct 17, 2025
d49e917
remove duplicated bracket
ZailiWang Oct 17, 2025
d8d3f4a
Merge branch 'main' into mmmu-load
ZailiWang Oct 18, 2025
0df4fb2
Merge branch 'main' into mmmu-load
ZailiWang Oct 20, 2025
5da6c0a
Adaptation for wrap prompt or not
ZailiWang Oct 20, 2025
81a8037
lint
ZailiWang Oct 20, 2025
df0c570
lint(cont)
ZailiWang Oct 20, 2025
d74a86b
Merge branch 'main' into mmmu-load
ZailiWang Oct 20, 2025
b4ebeb0
Merge branch 'main' into mmmu-load
ZailiWang Oct 21, 2025
72a932c
Merge branch 'main' into mmmu-load
ZailiWang Oct 21, 2025
a02b5ae
Merge branch 'main' into mmmu-load
ZailiWang Oct 22, 2025
533c5c5
Merge branch 'main' into mmmu-load
ZailiWang Oct 22, 2025
7292378
Merge branch 'main' into mmmu-load
ZailiWang Oct 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 27 additions & 8 deletions python/sglang/bench_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -764,6 +764,7 @@ def get_dataset(args, tokenizer, model_id=None):
image_content=args.image_content,
image_format=args.image_format,
image_resolution=args.image_resolution,
backend=args.backend,
)
elif args.dataset_name == "generated-shared-prefix":
assert not tokenize_prompt
Expand All @@ -781,6 +782,7 @@ def get_dataset(args, tokenizer, model_id=None):
input_requests = sample_mmmu_requests(
num_requests=args.num_prompts,
processor=processor,
backend=args.backend,
fixed_output_len=args.random_output_len,
random_sample=True,
)
Expand Down Expand Up @@ -1009,6 +1011,7 @@ async def get_mooncake_request_over_time(
def sample_mmmu_requests(
num_requests: int,
processor: AutoProcessor | AutoTokenizer,
backend: str,
fixed_output_len: Optional[int] = None,
random_sample: bool = True,
) -> List[DatasetRow]:
Expand Down Expand Up @@ -1081,7 +1084,7 @@ def sample_mmmu_requests(
text_prompt = f"Question: {question}\n\nAnswer: "
output_len = fixed_output_len if fixed_output_len is not None else 256
data_row = create_mm_data_row(
text_prompt, [image], [image_data], output_len, processor
text_prompt, [image], [image_data], output_len, processor, backend
)
filtered_dataset.append(data_row)

Expand Down Expand Up @@ -1316,13 +1319,19 @@ def parse_image_resolution(image_resolution: str) -> Tuple[int, int]:
)


def create_mm_data_row(text_prompt, images: list, images_base64, output_len, processor):
def create_mm_data_row(
text_prompt, images: list, images_base64, output_len, processor, backend
):
try:
content_items = [
{"type": "image", "image": {"url": image_base64}}
for image_base64 in images_base64
]
content_items.append({"type": "text", "text": text_prompt})
if type(processor).__name__ == "Phi4MMProcessor":
# <|endoftext10|> is the image token used in the phi-4-multimodal model.
content_items = text_prompt.replace("image 1", "|endoftext10|")
else:
content_items = [
{"type": "image", "image": {"url": image_base64}}
for image_base64 in images_base64
]
content_items.append({"type": "text", "text": text_prompt})
prompt_str = processor.apply_chat_template(
[{"role": "user", "content": content_items}],
add_generation_prompt=True,
Expand Down Expand Up @@ -1362,8 +1371,16 @@ def create_mm_data_row(text_prompt, images: list, images_base64, output_len, pro
# Vision tokens = total tokens - text tokens
vision_prompt_len = prompt_len - text_prompt_len

use_raw_prompt = backend in [
"sglang-oai",
"sglang-oai-chat",
"vllm",
"vllm-chat",
"lmdeploy",
"lmdeploy-chat",
]
return DatasetRow(
prompt=text_prompt,
prompt=text_prompt if use_raw_prompt else prompt_str,
prompt_len=prompt_len,
output_len=output_len,
text_prompt_len=text_prompt_len,
Expand All @@ -1382,6 +1399,7 @@ def sample_image_requests(
image_content: str,
image_format: str,
image_resolution: str,
backend: str,
) -> List[DatasetRow]:
"""Generate requests with images.

Expand Down Expand Up @@ -1447,6 +1465,7 @@ def _gen_random_image_data_uri(
list(images_base64),
int(output_lens[i]),
processor,
backend,
)

dataset.append(data_row)
Expand Down
Loading