[CORE] Adding support for insertion of soft-tuned prompts (#4645)
Co-authored-by: Swapnil Parekh <swapnilp@ibm.com> Co-authored-by: Joe G <joseph.granados@h2o.ai> Co-authored-by: Antoni Baum <antoni.baum@protonmail.com>
This commit is contained in:
@@ -7,6 +7,7 @@ from vllm.config import CacheConfig, ModelConfig, SchedulerConfig
|
||||
from vllm.executor.executor_base import ExecutorAsyncBase, ExecutorBase
|
||||
from vllm.logger import init_logger
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.prompt_adapter.request import PromptAdapterRequest
|
||||
from vllm.sequence import ExecuteModelRequest, SamplerOutput
|
||||
from vllm.utils import (get_distributed_init_method, get_ip, get_open_port,
|
||||
make_async)
|
||||
@@ -48,6 +49,7 @@ class CPUExecutor(ExecutorBase):
|
||||
lora_config=self.lora_config,
|
||||
multimodal_config=self.multimodal_config,
|
||||
kv_cache_dtype=self.cache_config.cache_dtype,
|
||||
prompt_adapter_config=self.prompt_adapter_config,
|
||||
is_driver_worker=True,
|
||||
)
|
||||
self.driver_worker.init_device()
|
||||
@@ -90,6 +92,19 @@ class CPUExecutor(ExecutorBase):
|
||||
def list_loras(self) -> Set[int]:
|
||||
return self.driver_worker.list_loras()
|
||||
|
||||
def add_prompt_adapter(
|
||||
self, prompt_adapter_request: PromptAdapterRequest) -> bool:
|
||||
return self.driver_worker.add_prompt_adapter(prompt_adapter_request)
|
||||
|
||||
def remove_prompt_adapter(self, prompt_adapter_id: int) -> bool:
|
||||
return self.driver_worker.remove_prompt_adapter(prompt_adapter_id)
|
||||
|
||||
def list_prompt_adapters(self) -> Set[int]:
|
||||
return self.driver_worker.list_prompt_adapters()
|
||||
|
||||
def pin_prompt_adapter(self, prompt_adapter_id: int) -> bool:
|
||||
return self.driver_worker.pin_prompt_adapter(prompt_adapter_id)
|
||||
|
||||
def check_health(self) -> None:
|
||||
# CPUExecutor will always be healthy as long as
|
||||
# it's running.
|
||||
|
||||
Reference in New Issue
Block a user