[Bugfix][Core] Fix get decoding config from ray (#4335)

2024-04-27 19:30:08 +08:00
parent 3da24c2df7
commit 7134303cbb
6 changed files with 174 additions and 3 deletions
--- a/vllm/engine/async_llm_engine.py
+++ b/vllm/engine/async_llm_engine.py
@@ -7,7 +7,7 @@ from typing import (Any, AsyncIterator, Callable, Dict, Iterable, List,

 from transformers import PreTrainedTokenizer

-from vllm.config import ModelConfig
+from vllm.config import DecodingConfig, ModelConfig
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.engine.llm_engine import LLMEngine
 from vllm.executor.ray_utils import initialize_ray_cluster, ray
@@ -697,6 +697,14 @@ class AsyncLLMEngine:
        else:
            return self.engine.get_model_config()

+    async def get_decoding_config(self) -> DecodingConfig:
+        """Get the decoding configuration of the vLLM engine."""
+        if self.engine_use_ray:
+            return await self.engine.get_decoding_config.remote(  # type: ignore
+            )
+        else:
+            return self.engine.get_decoding_config()
+
    async def do_log_stats(self) -> None:
        if self.engine_use_ray:
            await self.engine.do_log_stats.remote()  # type: ignore