[Chore] Update more locations to use attention_config.backend (#31153)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -104,7 +104,6 @@ def run_benchmark_with_batch_invariant(
|
|||||||
random.seed(seed)
|
random.seed(seed)
|
||||||
|
|
||||||
# Set environment variables
|
# Set environment variables
|
||||||
os.environ["VLLM_ATTENTION_BACKEND"] = backend
|
|
||||||
if batch_invariant:
|
if batch_invariant:
|
||||||
os.environ["VLLM_BATCH_INVARIANT"] = "1"
|
os.environ["VLLM_BATCH_INVARIANT"] = "1"
|
||||||
else:
|
else:
|
||||||
@@ -140,6 +139,7 @@ def run_benchmark_with_batch_invariant(
|
|||||||
max_model_len=max_model_len,
|
max_model_len=max_model_len,
|
||||||
dtype="bfloat16",
|
dtype="bfloat16",
|
||||||
tensor_parallel_size=tp_size,
|
tensor_parallel_size=tp_size,
|
||||||
|
attention_config={"backend": backend},
|
||||||
enable_prefix_caching=False,
|
enable_prefix_caching=False,
|
||||||
)
|
)
|
||||||
init_time = time.perf_counter() - start_init
|
init_time = time.perf_counter() - start_init
|
||||||
|
|||||||
@@ -557,7 +557,8 @@ def test_rms_group_quant(
|
|||||||
# To capture subprocess logs, we need to know whether spawn or fork is used.
|
# To capture subprocess logs, we need to know whether spawn or fork is used.
|
||||||
# Force spawn as it is more general.
|
# Force spawn as it is more general.
|
||||||
monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn")
|
monkeypatch.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn")
|
||||||
monkeypatch.setenv("VLLM_ATTENTION_BACKEND", backend.name)
|
|
||||||
|
model_kwargs["attention_config"] = {"backend": backend.name}
|
||||||
|
|
||||||
compilation_config = CompilationConfig(
|
compilation_config = CompilationConfig(
|
||||||
# Testing properties
|
# Testing properties
|
||||||
|
|||||||
Reference in New Issue
Block a user