Skip to content

Commit cc8f531

Browse files
authored
[Feature] Add timestamp for profiler (#4726)
* [Feature] Add timestamp for profiler * fix bug for offine inference * fix for ci * fix * fix ci
1 parent 876e4a8 commit cc8f531

File tree

7 files changed

+17
-1
lines changed

7 files changed

+17
-1
lines changed

fastdeploy/engine/async_llm.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,7 @@ async def add_request(
402402

403403
try:
404404
request = Request.from_dict(prompt)
405+
request.llm_engine_recv_req_timestamp = time.time()
405406

406407
# Check if already preprocessed by AsyncEngineClient
407408
is_preprocessed = prompt.get("_preprocessed", False)

fastdeploy/engine/common_engine.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -839,6 +839,7 @@ def _insert_zmq_task_to_scheduler(self):
839839
err_msg = None
840840
try:
841841
request = Request.from_dict(data)
842+
request.llm_engine_recv_req_timestamp = time.time()
842843
start_span("ENQUEUE_ZMQ", data, trace.SpanKind.PRODUCER)
843844
main_process_metrics.requests_number.inc()
844845
self.llm_logger.debug(f"Receive request: {request}")

fastdeploy/engine/engine.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ def add_requests(self, task, sampling_params=None, **kwargs):
249249
if sampling_params is not None:
250250
task.update(asdict(sampling_params))
251251
request = Request.from_dict(task)
252+
request.llm_engine_recv_req_timestamp = time.time()
252253
llm_logger.info(f"Receive request {request}")
253254
if sampling_params is not None:
254255
request.sampling_params = sampling_params

fastdeploy/engine/request.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,9 @@ class RequestMetrics:
399399
model_forward_time: Optional[float] = None
400400
model_execute_time: Optional[float] = None
401401
request_start_time: Optional[float] = None
402+
llm_engine_recv_req_timestamp: Optional[float] = None
403+
llm_engine_send_req_to_engine_timestamp: Optional[float] = None
404+
llm_engine_recv_token_timestamp: Optional[float] = None
402405

403406
def to_dict(self):
404407
"""
@@ -413,6 +416,9 @@ def to_dict(self):
413416
"model_forward_time": self.model_forward_time,
414417
"model_execute_time": self.model_execute_time,
415418
"request_start_time": self.request_start_time,
419+
"llm_engine_recv_req_timestamp": self.llm_engine_recv_req_timestamp,
420+
"llm_engine_send_req_to_engine_timestamp": self.llm_engine_send_req_to_engine_timestamp,
421+
"llm_engine_recv_token_timestamp": self.llm_engine_recv_token_timestamp,
416422
}
417423

418424
@classmethod

fastdeploy/output/token_processor.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -630,15 +630,20 @@ def _process_batch_output(self):
630630
time_in_queue=task.schedule_start_time - task.preprocess_end_time,
631631
preprocess_cost_time=task.preprocess_end_time - task.preprocess_start_time,
632632
request_start_time=task.arrival_time,
633+
llm_engine_recv_req_timestamp=task.llm_engine_recv_req_timestamp,
634+
llm_engine_send_req_to_engine_timestamp=task.inference_start_time,
635+
llm_engine_recv_token_timestamp=time.time(),
633636
)
634-
635637
self._record_first_token_metrics(task, current_time)
636638

637639
else:
638640
metrics = RequestMetrics(
639641
arrival_time=time.time(),
640642
request_start_time=task.arrival_time,
641643
model_execute_time=time.time() - task.inference_start_time,
644+
llm_engine_recv_req_timestamp=task.llm_engine_recv_req_timestamp,
645+
llm_engine_send_req_to_engine_timestamp=task.inference_start_time,
646+
llm_engine_recv_token_timestamp=time.time(),
642647
)
643648
self.number_of_output_tokens += len(token_ids)
644649
self._record_metrics(task, current_time, token_ids)

tests/output/test_get_save_output_v1.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def __init__(self):
6868
self.prefill_chunk_info = None
6969
self.prefill_chunk_num = 0
7070
self.pooling_params = None
71+
self.llm_engine_recv_req_timestamp = time.time()
7172

7273
def get(self, key: str, default_value=None):
7374
if hasattr(self, key):

tests/output/test_process_batch_output.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def __init__(self):
4646
self.disaggregate_info = None
4747
self.prefill_chunk_info = None
4848
self.prefill_chunk_num = 0
49+
self.llm_engine_recv_req_timestamp = time.time()
4950

5051
def get(self, key: str, default_value=None):
5152
if hasattr(self, key):

0 commit comments

Comments
 (0)