[Perf] Move gc.freeze logic from EngineCoreProc to EngineCore for better coverage (#27896)

Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com>
2025-11-10 15:34:18 -08:00
parent d17ecc6b19
commit b30372cbd0
5 changed files with 30 additions and 14 deletions
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -1,8 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-
 import asyncio
-import gc
 import hashlib
 import importlib
 import inspect
@@ -118,6 +116,7 @@ from vllm.reasoning import ReasoningParserManager
 from vllm.tasks import POOLING_TASKS
 from vllm.usage.usage_lib import UsageContext
 from vllm.utils.argparse_utils import FlexibleArgumentParser
+from vllm.utils.gc_utils import freeze_gc_heap
 from vllm.utils.network_utils import is_valid_ipv6_address
 from vllm.utils.system_utils import decorate_logs, set_ulimit
 from vllm.v1.engine.exceptions import EngineDeadError
@@ -153,8 +152,7 @@ async def lifespan(app: FastAPI):

        # Mark the startup heap as static so that it's ignored by GC.
        # Reduces pause times of oldest generation collections.
-        gc.collect()
-        gc.freeze()
+        freeze_gc_heap()
        try:
            yield
        finally: