[FlashInfer] Upgrade to 0.2.0 (#11194)
Signed-off-by: Bowen Wang <abmfy@icloud.com> Signed-off-by: youkaichao <youkaichao@gmail.com> Co-authored-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
@@ -310,14 +310,15 @@ class ModelConfig:
|
||||
(self.hf_text_config.model_type in ["gemma2", "cohere2"]))
|
||||
|
||||
if (not self.disable_sliding_window and has_interleaved_attention):
|
||||
if envs.VLLM_ATTENTION_BACKEND == "XFORMERS":
|
||||
if (backend :=
|
||||
envs.VLLM_ATTENTION_BACKEND) in ("XFORMERS", "FLASHINFER"):
|
||||
sliding_window_len_min = get_min_sliding_window(
|
||||
self.hf_text_config.sliding_window)
|
||||
|
||||
logger.warning_once(
|
||||
f"{self.hf_text_config.model_type} has interleaved "
|
||||
"attention, which is currently not supported by the "
|
||||
"XFORMERS backend. Disabling sliding window and capping "
|
||||
f"{backend} backend. Disabling sliding window and capping "
|
||||
"the max length to the sliding window size "
|
||||
f"({sliding_window_len_min}).")
|
||||
self.disable_sliding_window = True
|
||||
@@ -3310,7 +3311,7 @@ _current_vllm_config: Optional[VllmConfig] = None
|
||||
|
||||
|
||||
@contextmanager
|
||||
def set_current_vllm_config(vllm_config: VllmConfig):
|
||||
def set_current_vllm_config(vllm_config: VllmConfig, check_compile=False):
|
||||
"""
|
||||
Temporarily set the current VLLM config.
|
||||
Used during model initialization.
|
||||
@@ -3330,7 +3331,8 @@ def set_current_vllm_config(vllm_config: VllmConfig):
|
||||
vllm_config.compilation_config.enabled_custom_ops)
|
||||
logger.debug("disabled custom ops: %s",
|
||||
vllm_config.compilation_config.disabled_custom_ops)
|
||||
if vllm_config.compilation_config.level == CompilationLevel.PIECEWISE \
|
||||
if check_compile and \
|
||||
vllm_config.compilation_config.level == CompilationLevel.PIECEWISE \
|
||||
and compilation_counter.num_models_seen == num_models_seen:
|
||||
# If the model supports compilation,
|
||||
# compilation_counter.num_models_seen should be increased
|
||||
|
||||
Reference in New Issue
Block a user