[Hardware][Gaudi][Feature] Support Contiguous Cache Fetch (#12139)

Signed-off-by: yuzhou <yuzhou@habana.ai>
Signed-off-by: zhouyu5 <yu.zhou@intel.com>
Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>
This commit is contained in:
Yu-Zhou
2025-02-19 11:40:19 +08:00
committed by GitHub
parent 00b69c2d27
commit d0a7a2769d
4 changed files with 80 additions and 47 deletions

View File

@@ -89,6 +89,7 @@ if TYPE_CHECKING:
VLLM_RAY_PER_WORKER_GPUS: float = 1.0
VLLM_RAY_BUNDLE_INDICES: str = ""
VLLM_CUDART_SO_PATH: Optional[str] = None
VLLM_USE_HPU_CONTIGUOUS_CACHE_FETCH: bool = True
def get_default_cache_root():
@@ -585,6 +586,13 @@ environment_variables: Dict[str, Callable[[], Any]] = {
# specify the path through environment variable VLLM_CUDART_SO_PATH.
"VLLM_CUDART_SO_PATH":
lambda: os.getenv("VLLM_CUDART_SO_PATH", None),
# Contiguous cache fetching to avoid using costly gather operation on
# Gaudi3. This is only applicable to HPU contiguous cache. If set to true,
# contiguous cache fetch will be used.
"VLLM_USE_HPU_CONTIGUOUS_CACHE_FETCH":
lambda: os.environ.get("VLLM_CONTIGUOUS_PA", "true").lower() in
("1", "true"),
}
# end-env-vars-definition