[Bugfix] Allow vllm to still work if triton is not installed. (#6786)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
This commit is contained in:
Thomas Parnell
2024-07-29 23:51:27 +02:00
committed by GitHub
parent 7f8d612d24
commit 9a7e2d0534
13 changed files with 65 additions and 37 deletions

View File

@@ -5,11 +5,12 @@ import torch
import triton
import triton.language as tl
from vllm.model_executor.layers.ops.sample import (
MAX_TRITON_N_COLS, _uniform_to_exponential, get_num_triton_sampler_splits,
sample)
from vllm.model_executor.layers.ops.sample import (_uniform_to_exponential,
sample)
from vllm.model_executor.sampling_metadata import SamplingTensors
from vllm.model_executor.utils import set_random_seed
from vllm.triton_utils.sample import (MAX_TRITON_N_COLS,
get_num_triton_sampler_splits)
SINGLE_SPLIT_VOCAB_SIZE = 32000 # llama/mistral/mixtral vocab size
MULTI_SPLIT_VOCAB_SIZE = MAX_TRITON_N_COLS + 100