[Hardware][Intel] Isolate CPUModelRunner and ModelRunner for better maintenance (#3824)

This commit is contained in:
bigPYJ1151
2024-04-12 02:56:49 +08:00
committed by GitHub
parent 08ccee1e83
commit 8afca50889
5 changed files with 443 additions and 61 deletions

View File

@@ -25,6 +25,7 @@ class CPUExecutor(ExecutorBase):
assert lora_config is None, "cpu backend doesn't support LoRA"
model_config = _verify_and_get_model_config(model_config)
cache_config = _verify_and_get_cache_config(cache_config)
scheduler_config = _verify_and_get_scheduler_config(scheduler_config)
self.model_config = model_config
self.cache_config = cache_config
@@ -116,6 +117,15 @@ def _verify_and_get_model_config(config: ModelConfig) -> ModelConfig:
return config
def _verify_and_get_scheduler_config(
config: SchedulerConfig) -> SchedulerConfig:
if config.chunked_prefill_enabled:
logger.warning("Chunked prefill is not supported on CPU, disable it.")
config.chunked_prefill_enabled = False
return config
def _verify_and_get_cache_config(config: CacheConfig) -> CacheConfig:
_GB = 1 << 30
if config.enable_prefix_caching: