[Hardware][Intel] Isolate CPUModelRunner and ModelRunner for better maintenance (#3824)
This commit is contained in:
@@ -25,6 +25,7 @@ class CPUExecutor(ExecutorBase):
|
||||
assert lora_config is None, "cpu backend doesn't support LoRA"
|
||||
model_config = _verify_and_get_model_config(model_config)
|
||||
cache_config = _verify_and_get_cache_config(cache_config)
|
||||
scheduler_config = _verify_and_get_scheduler_config(scheduler_config)
|
||||
|
||||
self.model_config = model_config
|
||||
self.cache_config = cache_config
|
||||
@@ -116,6 +117,15 @@ def _verify_and_get_model_config(config: ModelConfig) -> ModelConfig:
|
||||
return config
|
||||
|
||||
|
||||
def _verify_and_get_scheduler_config(
|
||||
config: SchedulerConfig) -> SchedulerConfig:
|
||||
if config.chunked_prefill_enabled:
|
||||
logger.warning("Chunked prefill is not supported on CPU, disable it.")
|
||||
config.chunked_prefill_enabled = False
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def _verify_and_get_cache_config(config: CacheConfig) -> CacheConfig:
|
||||
_GB = 1 << 30
|
||||
if config.enable_prefix_caching:
|
||||
|
||||
Reference in New Issue
Block a user