Signed-off-by: morrison-turnansky <mturnans@redhat.com> Signed-off-by: adabeyta <aabeyta@redhat.com> Signed-off-by: Morrison Turnansky <mturnans@redhat.com> Co-authored-by: adabeyta <aabeyta@redhat.com> Co-authored-by: Luka Govedič <ProExpertProg@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
00d3310d2d
commit
0838b52e2e
@@ -77,6 +77,7 @@ from vllm.config.observability import DetailedTraceModules
|
||||
from vllm.config.parallel import DistributedExecutorBackend, ExpertPlacementStrategy
|
||||
from vllm.config.scheduler import SchedulerPolicy
|
||||
from vllm.config.utils import get_field
|
||||
from vllm.config.vllm import OptimizationLevel
|
||||
from vllm.logger import init_logger, suppress_logging
|
||||
from vllm.platforms import CpuArchEnum, current_platform
|
||||
from vllm.plugins import load_general_plugins
|
||||
@@ -560,6 +561,7 @@ class EngineArgs:
|
||||
stream_interval: int = SchedulerConfig.stream_interval
|
||||
|
||||
kv_sharing_fast_prefill: bool = CacheConfig.kv_sharing_fast_prefill
|
||||
optimization_level: OptimizationLevel = VllmConfig.optimization_level
|
||||
|
||||
kv_offloading_size: float | None = CacheConfig.kv_offloading_size
|
||||
kv_offloading_backend: KVOffloadingBackend | None = (
|
||||
@@ -1114,6 +1116,10 @@ class EngineArgs:
|
||||
"--structured-outputs-config", **vllm_kwargs["structured_outputs_config"]
|
||||
)
|
||||
|
||||
vllm_group.add_argument(
|
||||
"--optimization-level", **vllm_kwargs["optimization_level"]
|
||||
)
|
||||
|
||||
# Other arguments
|
||||
parser.add_argument(
|
||||
"--disable-log-stats",
|
||||
@@ -1733,7 +1739,6 @@ class EngineArgs:
|
||||
compilation_config.max_cudagraph_capture_size = (
|
||||
self.max_cudagraph_capture_size
|
||||
)
|
||||
|
||||
config = VllmConfig(
|
||||
model_config=model_config,
|
||||
cache_config=cache_config,
|
||||
@@ -1750,6 +1755,7 @@ class EngineArgs:
|
||||
kv_events_config=self.kv_events_config,
|
||||
ec_transfer_config=self.ec_transfer_config,
|
||||
additional_config=self.additional_config,
|
||||
optimization_level=self.optimization_level,
|
||||
)
|
||||
|
||||
return config
|
||||
|
||||
Reference in New Issue
Block a user