diff --git a/vllm/v1/worker/gpu/block_table.py b/vllm/v1/worker/gpu/block_table.py index ff24e88ed..b31e9b179 100644 --- a/vllm/v1/worker/gpu/block_table.py +++ b/vllm/v1/worker/gpu/block_table.py @@ -3,10 +3,9 @@ from collections.abc import Iterable import torch -import triton -import triton.language as tl from vllm.attention.backends.utils import PAD_SLOT_ID +from vllm.triton_utils import tl, triton from vllm.utils.math_utils import cdiv from vllm.v1.utils import CpuGpuBuffer diff --git a/vllm/v1/worker/gpu/input_batch.py b/vllm/v1/worker/gpu/input_batch.py index 89f375649..8313b32d2 100644 --- a/vllm/v1/worker/gpu/input_batch.py +++ b/vllm/v1/worker/gpu/input_batch.py @@ -7,9 +7,8 @@ import numba import numba.types as types import numpy as np import torch -import triton -import triton.language as tl +from vllm.triton_utils import tl, triton from vllm.utils import random_uuid from vllm.utils.math_utils import cdiv from vllm.v1.utils import CpuGpuBuffer