[Bugfix] Allow vllm to still work if triton is not installed. (#6786)
Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
This commit is contained in:
@@ -5,11 +5,12 @@ import torch
|
||||
import triton
|
||||
import triton.language as tl
|
||||
|
||||
from vllm.model_executor.layers.ops.sample import (
|
||||
MAX_TRITON_N_COLS, _uniform_to_exponential, get_num_triton_sampler_splits,
|
||||
sample)
|
||||
from vllm.model_executor.layers.ops.sample import (_uniform_to_exponential,
|
||||
sample)
|
||||
from vllm.model_executor.sampling_metadata import SamplingTensors
|
||||
from vllm.model_executor.utils import set_random_seed
|
||||
from vllm.triton_utils.sample import (MAX_TRITON_N_COLS,
|
||||
get_num_triton_sampler_splits)
|
||||
|
||||
SINGLE_SPLIT_VOCAB_SIZE = 32000 # llama/mistral/mixtral vocab size
|
||||
MULTI_SPLIT_VOCAB_SIZE = MAX_TRITON_N_COLS + 100
|
||||
|
||||
Reference in New Issue
Block a user