[Core] Use flashinfer sampling kernel when available (#7137)

Co-authored-by: Michael Goin <michael@neuralmagic.com>
This commit is contained in:
Peng Guanwen
2024-08-19 11:24:03 +08:00
committed by GitHub
parent ff7ec82c4d
commit f710fb5265
5 changed files with 129 additions and 27 deletions

View File

@@ -192,7 +192,9 @@ steps:
- vllm/model_executor/layers
- vllm/sampling_metadata.py
- tests/samplers
command: pytest -v -s samplers
commands:
- pytest -v -s samplers
- VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers
- label: LogitsProcessor Test # 5min
mirror_hardwares: [amd]