VLLM_USE_TRITON_FLASH_ATTN V0 variable deprecation (#27611)
Signed-off-by: Andreas Karatzas <akaratza@amd.com> Signed-off-by: Andreas Karatzas <Andreas.Karatzas@amd.com>
This commit is contained in:
@@ -5,7 +5,6 @@ image, embedding, and video support for different VLMs in vLLM.
|
||||
"""
|
||||
|
||||
import math
|
||||
import os
|
||||
from collections import defaultdict
|
||||
from pathlib import PosixPath
|
||||
|
||||
@@ -38,13 +37,6 @@ from .vlm_utils.types import (
|
||||
VLMTestType,
|
||||
)
|
||||
|
||||
# This hack is needed for phi3v & paligemma models
|
||||
# ROCm Triton FA can run into shared memory issues with these models,
|
||||
# use other backends in the meantime
|
||||
# FIXME (mattwong, gshtrasb, hongxiayan)
|
||||
if current_platform.is_rocm():
|
||||
os.environ["VLLM_USE_TRITON_FLASH_ATTN"] = "0"
|
||||
|
||||
COMMON_BROADCAST_SETTINGS = {
|
||||
"test_type": VLMTestType.IMAGE,
|
||||
"dtype": "half",
|
||||
|
||||
@@ -11,7 +11,6 @@ from huggingface_hub import snapshot_download
|
||||
from vllm.assets.image import ImageAsset
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.multimodal.image import rescale_image_size
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
from ....conftest import (
|
||||
IMAGE_ASSETS,
|
||||
@@ -46,12 +45,6 @@ models = [model_path]
|
||||
|
||||
target_dtype = "half"
|
||||
|
||||
# ROCm Triton FA can run into shared memory issues with these models,
|
||||
# use other backends in the meantime
|
||||
# FIXME (mattwong, gshtrasb, hongxiayan)
|
||||
if current_platform.is_rocm():
|
||||
os.environ["VLLM_USE_TRITON_FLASH_ATTN"] = "0"
|
||||
|
||||
|
||||
def run_test(
|
||||
hf_runner: type[HfRunner],
|
||||
|
||||
@@ -14,7 +14,6 @@ from vllm.assets.image import ImageAsset
|
||||
from vllm.logprobs import SampleLogprobs
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.multimodal.image import convert_image_mode, rescale_image_size
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
from ....conftest import (
|
||||
IMAGE_ASSETS,
|
||||
@@ -68,12 +67,6 @@ def vllm_to_hf_output(
|
||||
|
||||
target_dtype = "half"
|
||||
|
||||
# ROCm Triton FA can run into shared memory issues with these models,
|
||||
# use other backends in the meantime
|
||||
# FIXME (mattwong, gshtrasb, hongxiayan)
|
||||
if current_platform.is_rocm():
|
||||
os.environ["VLLM_USE_TRITON_FLASH_ATTN"] = "0"
|
||||
|
||||
|
||||
def run_test(
|
||||
hf_runner: type[HfRunner],
|
||||
|
||||
Reference in New Issue
Block a user