[Core] Use flashinfer sampling kernel when available (#7137)
Co-authored-by: Michael Goin <michael@neuralmagic.com>
This commit is contained in:
@@ -192,7 +192,9 @@ steps:
|
||||
- vllm/model_executor/layers
|
||||
- vllm/sampling_metadata.py
|
||||
- tests/samplers
|
||||
command: pytest -v -s samplers
|
||||
commands:
|
||||
- pytest -v -s samplers
|
||||
- VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers
|
||||
|
||||
- label: LogitsProcessor Test # 5min
|
||||
mirror_hardwares: [amd]
|
||||
|
||||
Reference in New Issue
Block a user