Skip to content

Commit fe7f91e

Browse files
authored
[Feat] Add received_time in serving_base (#13432)
Signed-off-by: zhanghaotong <zhanghaotong.zht@antgroup.com>
1 parent cef5ba6 commit fe7f91e

File tree

3 files changed

+16
-1
lines changed

3 files changed

+16
-1
lines changed

python/sglang/srt/entrypoints/openai/serving_base.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ async def handle_request(
8888
"""Handle the specific request type with common pattern
8989
If you want to override this method, you should be careful to record the validation time.
9090
"""
91+
received_time = time.time()
92+
received_time_perf = time.perf_counter()
93+
9194
try:
9295
# Validate request
9396
validation_start = time.perf_counter()
@@ -103,6 +106,12 @@ async def handle_request(
103106
if hasattr(adapted_request, "validation_time"):
104107
adapted_request.validation_time = validation_time
105108

109+
if hasattr(adapted_request, "received_time"):
110+
adapted_request.received_time = received_time
111+
112+
if hasattr(adapted_request, "received_time_perf"):
113+
adapted_request.received_time_perf = received_time_perf
114+
106115
# Note(Xinyuan): raw_request below is only used for detecting the connection of the client
107116
if hasattr(request, "stream") and request.stream:
108117
return await self._handle_streaming_request(

python/sglang/srt/managers/io_struct.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,12 @@ class GenerateReqInput(BaseReq):
204204
# Validation step duration
205205
validation_time: Optional[float] = None
206206

207+
# For metrics
208+
received_time: Optional[float] = None
209+
210+
# Perf_counter equivalents for accurate time calculations
211+
received_time_perf: Optional[float] = None
212+
207213
# For data parallel rank routing
208214
data_parallel_rank: Optional[int] = None
209215

python/sglang/srt/managers/tokenizer_manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@ async def generate_request(
431431
obj: Union[GenerateReqInput, EmbeddingReqInput],
432432
request: Optional[fastapi.Request] = None,
433433
):
434-
created_time = time.time()
434+
created_time = obj.received_time if obj.received_time else time.time()
435435
self.auto_create_handle_loop()
436436
obj.normalize_batch_and_arguments()
437437

0 commit comments

Comments
 (0)