[Perf] Move gc.freeze logic from EngineCoreProc to EngineCore for better coverage (#27896)

Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com>
This commit is contained in:
Jialin Ouyang
2025-11-10 15:34:18 -08:00
committed by GitHub
parent d17ecc6b19
commit b30372cbd0
5 changed files with 30 additions and 14 deletions

View File

@@ -1,8 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import asyncio
import gc
import hashlib
import importlib
import inspect
@@ -118,6 +116,7 @@ from vllm.reasoning import ReasoningParserManager
from vllm.tasks import POOLING_TASKS
from vllm.usage.usage_lib import UsageContext
from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.gc_utils import freeze_gc_heap
from vllm.utils.network_utils import is_valid_ipv6_address
from vllm.utils.system_utils import decorate_logs, set_ulimit
from vllm.v1.engine.exceptions import EngineDeadError
@@ -153,8 +152,7 @@ async def lifespan(app: FastAPI):
# Mark the startup heap as static so that it's ignored by GC.
# Reduces pause times of oldest generation collections.
gc.collect()
gc.freeze()
freeze_gc_heap()
try:
yield
finally: