[Performance][B200] silu_mul_quant: pack scales in int32 (#28358)
Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com> Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
This commit is contained in:
committed by
GitHub
parent
fdfd5075aa
commit
fe1cd7704d
@@ -1384,3 +1384,16 @@ def image_urls(request, local_asset_server) -> list[str]:
|
||||
"""Indirect fixture: takes a list of names, returns list of full URLs."""
|
||||
names: list[str] = request.param
|
||||
return [local_asset_server.url_for(name) for name in names]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def disable_deepgemm_ue8m0(monkeypatch):
|
||||
from vllm.utils.deep_gemm import is_deep_gemm_e8m0_used
|
||||
|
||||
with monkeypatch.context() as monkeypatch_ctx:
|
||||
monkeypatch_ctx.setenv("VLLM_USE_DEEP_GEMM_E8M0", "0")
|
||||
is_deep_gemm_e8m0_used.cache_clear()
|
||||
yield
|
||||
# Clear cache so the next time it is used it is processed with the
|
||||
# default VLLM_USE_DEEP_GEMM_E8M0 setting.
|
||||
is_deep_gemm_e8m0_used.cache_clear()
|
||||
|
||||
Reference in New Issue
Block a user