Skip to content

Commit c6ca163

Browse files
hebiao064nanjiangwill
authored andcommitted
[RL] Fix illegal memory for _import_static_state (sgl-project#7733)
Co-authored-by: nanjiangwill <willjiang2018@gmail.com>
1 parent a3700fb commit c6ca163

1 file changed

Lines changed: 2 additions & 0 deletions

File tree

python/sglang/srt/managers/scheduler.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2346,6 +2346,7 @@ def release_memory_occupation(self, recv_req: ReleaseMemoryOccupationReqInput):
23462346
self.stashed_model_static_state = _export_static_state(
23472347
self.tp_worker.worker.model_runner.model
23482348
)
2349+
torch.distributed.barrier(self.tp_cpu_group)
23492350
self.memory_saver_adapter.pause(GPU_MEMORY_TYPE_WEIGHTS)
23502351

23512352
return ReleaseMemoryOccupationReqOutput()
@@ -2357,6 +2358,7 @@ def resume_memory_occupation(self, recv_req: ResumeMemoryOccupationReqInput):
23572358

23582359
if GPU_MEMORY_TYPE_WEIGHTS in tags:
23592360
self.memory_saver_adapter.resume(GPU_MEMORY_TYPE_WEIGHTS)
2361+
torch.distributed.barrier(self.tp_cpu_group)
23602362
_import_static_state(
23612363
self.tp_worker.worker.model_runner.model,
23622364
self.stashed_model_static_state,

0 commit comments

Comments
 (0)