[Hardware][CPU] using current_platform.is_cpu (#9536)

This commit is contained in:
wangshuai09
2024-10-22 15:50:43 +08:00
committed by GitHub
parent 0d02747f2e
commit 3ddbe25502
17 changed files with 60 additions and 64 deletions

View File

@@ -21,7 +21,7 @@ from vllm.model_executor.models import ModelRegistry
from vllm.multimodal.base import NestedTensors
from vllm.platforms import current_platform
from vllm.sequence import IntermediateTensors
from vllm.utils import is_cpu, is_pin_memory_available
from vllm.utils import is_pin_memory_available
logger = init_logger(__name__)
@@ -474,7 +474,7 @@ def make_empty_intermediate_tensors_factory(keys: List[str], hidden_size: int):
class LLMWrapper(nn.Module):
"""
To align with the key names of LoRA trained with PEFT, we need to add an
To align with the key names of LoRA trained with PEFT, we need to add an
additional layer to the llm's implementation.
"""
@@ -515,7 +515,7 @@ def get_vit_attn_backend() -> _Backend:
"so we use xformers backend instead. You can run "
"`pip install flash-attn` to use flash-attention backend.")
selected_backend = _Backend.XFORMERS
elif is_cpu():
elif current_platform.is_cpu():
selected_backend = _Backend.TORCH_SDPA
else:
selected_backend = _Backend.XFORMERS