[SpecDecode][Kernel] Flashinfer Rejection Sampling (#7244)

This commit is contained in:
Lily Liu
2024-09-01 21:23:29 -07:00
committed by GitHub
parent f8d60145b4
commit e6a26ed037
9 changed files with 306 additions and 109 deletions

View File

@@ -31,6 +31,7 @@ if TYPE_CHECKING:
VLLM_TRACE_FUNCTION: int = 0
VLLM_ATTENTION_BACKEND: Optional[str] = None
VLLM_USE_FLASHINFER_SAMPLER: bool = False
VLLM_USE_FLASHINFER_REJECTION_SAMPLER: bool = False
VLLM_PP_LAYER_PARTITION: Optional[str] = None
VLLM_CPU_KVCACHE_SPACE: int = 0
VLLM_CPU_OMP_THREADS_BIND: str = ""