[Misc] Use deprecated for seed_everything (#31780)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -8,10 +8,9 @@ import torch
|
||||
|
||||
import vllm.model_executor.layers.activation # noqa F401
|
||||
from vllm.model_executor.custom_op import CustomOp
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.triton_utils import triton
|
||||
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
||||
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
|
||||
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE, set_random_seed
|
||||
|
||||
batch_size_range = [1, 16, 128]
|
||||
seq_len_range = [1, 16, 64, 1024, 4096]
|
||||
@@ -30,7 +29,7 @@ def benchmark_activation(
|
||||
device = "cuda"
|
||||
num_tokens = batch_size * seq_len
|
||||
dim = intermediate_size
|
||||
current_platform.seed_everything(42)
|
||||
set_random_seed(42)
|
||||
torch.set_default_device(device)
|
||||
|
||||
if func_name == "gelu_and_mul":
|
||||
|
||||
@@ -6,9 +6,8 @@ import time
|
||||
import torch
|
||||
|
||||
from vllm.model_executor.layers.layernorm import RMSNorm
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
||||
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
|
||||
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE, set_random_seed
|
||||
|
||||
|
||||
@torch.inference_mode()
|
||||
@@ -22,7 +21,7 @@ def main(
|
||||
num_warmup_iters: int = 5,
|
||||
num_iters: int = 100,
|
||||
) -> None:
|
||||
current_platform.seed_everything(seed)
|
||||
set_random_seed(seed)
|
||||
torch.set_default_device("cuda")
|
||||
|
||||
layer = RMSNorm(hidden_size).to(dtype=dtype)
|
||||
|
||||
@@ -24,6 +24,7 @@ from vllm.platforms import current_platform
|
||||
from vllm.transformers_utils.config import get_config
|
||||
from vllm.triton_utils import triton
|
||||
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
|
||||
FP8_DTYPE = current_platform.fp8_dtype()
|
||||
|
||||
@@ -431,7 +432,7 @@ def merge_unique_dicts(list1, list2):
|
||||
class BenchmarkWorker:
|
||||
def __init__(self, seed: int) -> None:
|
||||
torch.set_default_device("cuda")
|
||||
current_platform.seed_everything(seed)
|
||||
set_random_seed(seed)
|
||||
self.seed = seed
|
||||
# Get the device ID to allocate tensors and kernels
|
||||
# on the respective GPU. This is required for Ray to work
|
||||
@@ -451,7 +452,7 @@ class BenchmarkWorker:
|
||||
block_quant_shape: list[int] = None,
|
||||
use_deep_gemm: bool = False,
|
||||
) -> tuple[dict[str, int], float]:
|
||||
current_platform.seed_everything(self.seed)
|
||||
set_random_seed(self.seed)
|
||||
dtype_str = _get_config_dtype_str(
|
||||
dtype, use_int8_w8a16=use_int8_w8a16, use_fp8_w8a8=use_fp8_w8a8
|
||||
)
|
||||
|
||||
@@ -18,6 +18,7 @@ from vllm.model_executor.layers.fused_moe.moe_permute_unpermute import (
|
||||
from vllm.model_executor.layers.fused_moe.utils import _fp8_quantize
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
|
||||
FP8_DTYPE = current_platform.fp8_dtype()
|
||||
|
||||
@@ -261,7 +262,7 @@ def benchmark_unpermute(
|
||||
class BenchmarkWorker:
|
||||
def __init__(self, seed: int) -> None:
|
||||
torch.set_default_device("cuda")
|
||||
current_platform.seed_everything(seed)
|
||||
set_random_seed(seed)
|
||||
self.seed = seed
|
||||
# Get the device ID to allocate tensors and kernels
|
||||
# on the respective GPU. This is required for Ray to work
|
||||
@@ -279,7 +280,7 @@ class BenchmarkWorker:
|
||||
use_int8_w8a16: bool,
|
||||
use_customized_permute: bool = False,
|
||||
) -> tuple[dict[str, int], float]:
|
||||
current_platform.seed_everything(self.seed)
|
||||
set_random_seed(self.seed)
|
||||
|
||||
permute_time = benchmark_permute(
|
||||
num_tokens,
|
||||
|
||||
@@ -37,9 +37,9 @@ import numpy as np
|
||||
import torch
|
||||
|
||||
from vllm.model_executor.layers.rotary_embedding import get_rope
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.transformers_utils.config import get_config
|
||||
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
@@ -94,7 +94,7 @@ def benchmark_mrope(
|
||||
benchmark_iter: int = 100,
|
||||
csv_writer=None,
|
||||
):
|
||||
current_platform.seed_everything(seed)
|
||||
set_random_seed(seed)
|
||||
torch.set_default_device(device)
|
||||
# the parameters to compute the q k v size based on tp_size
|
||||
mrope_helper_class = get_rope(
|
||||
|
||||
@@ -13,6 +13,7 @@ from vllm.utils.argparse_utils import FlexibleArgumentParser
|
||||
from vllm.utils.torch_utils import (
|
||||
STR_DTYPE_TO_TORCH_DTYPE,
|
||||
create_kv_caches_with_random,
|
||||
set_random_seed,
|
||||
)
|
||||
|
||||
logger = init_logger(__name__)
|
||||
@@ -38,7 +39,7 @@ def main(
|
||||
device: str = "cuda",
|
||||
kv_cache_dtype: str | None = None,
|
||||
) -> None:
|
||||
current_platform.seed_everything(seed)
|
||||
set_random_seed(seed)
|
||||
|
||||
scale = float(1.0 / (head_size**0.5))
|
||||
query = torch.empty(
|
||||
|
||||
@@ -6,9 +6,8 @@ import time
|
||||
import torch
|
||||
|
||||
from vllm import _custom_ops as ops
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
||||
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE
|
||||
from vllm.utils.torch_utils import STR_DTYPE_TO_TORCH_DTYPE, set_random_seed
|
||||
|
||||
|
||||
@torch.inference_mode()
|
||||
@@ -23,7 +22,7 @@ def main(
|
||||
num_warmup_iters: int = 5,
|
||||
num_iters: int = 100,
|
||||
) -> None:
|
||||
current_platform.seed_everything(seed)
|
||||
set_random_seed(seed)
|
||||
torch.set_default_device("cuda")
|
||||
|
||||
x = torch.randn(num_tokens, hidden_size, dtype=dtype)
|
||||
|
||||
@@ -8,11 +8,11 @@ from tabulate import tabulate
|
||||
|
||||
from vllm import _custom_ops as ops
|
||||
from vllm.logger import init_logger
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
||||
from vllm.utils.torch_utils import (
|
||||
STR_DTYPE_TO_TORCH_DTYPE,
|
||||
create_kv_caches_with_random,
|
||||
set_random_seed,
|
||||
)
|
||||
|
||||
logger = init_logger(__name__)
|
||||
@@ -36,7 +36,7 @@ def run_benchmark(
|
||||
if kv_cache_dtype == "fp8" and head_size % 16:
|
||||
raise ValueError("fp8 kv-cache requires head_size to be a multiple of 16.")
|
||||
|
||||
current_platform.seed_everything(42)
|
||||
set_random_seed(42)
|
||||
torch.set_default_device(device)
|
||||
|
||||
# create random key / value tensors [T, H, D].
|
||||
|
||||
@@ -11,11 +11,11 @@ from vllm.attention.ops.triton_reshape_and_cache_flash import (
|
||||
triton_reshape_and_cache_flash,
|
||||
)
|
||||
from vllm.logger import init_logger
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
||||
from vllm.utils.torch_utils import (
|
||||
STR_DTYPE_TO_TORCH_DTYPE,
|
||||
create_kv_caches_with_random_flash,
|
||||
set_random_seed,
|
||||
)
|
||||
|
||||
logger = init_logger(__name__)
|
||||
@@ -49,7 +49,7 @@ def run_benchmark(
|
||||
if implementation == "triton" and kv_cache_layout == "HND":
|
||||
return float("nan") # Triton does not support HND layout yet.
|
||||
|
||||
current_platform.seed_everything(42)
|
||||
set_random_seed(42)
|
||||
torch.set_default_device(device)
|
||||
|
||||
# create random key / value tensors [T, H, D].
|
||||
|
||||
@@ -23,9 +23,9 @@ import torch
|
||||
from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
|
||||
persistent_masked_m_silu_mul_quant,
|
||||
)
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.triton_utils import tl, triton
|
||||
from vllm.utils.deep_gemm import is_deep_gemm_e8m0_used
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
|
||||
|
||||
@triton.jit
|
||||
@@ -207,7 +207,7 @@ def benchmark(
|
||||
):
|
||||
def generate_data(seed_offset=0):
|
||||
"""Generate input data with given seed offset"""
|
||||
current_platform.seed_everything(42 + seed_offset)
|
||||
set_random_seed(42 + seed_offset)
|
||||
y = torch.rand((E, T, 2 * H), dtype=torch.bfloat16, device="cuda").contiguous()
|
||||
|
||||
if gen_strategy == "random_imbalanced":
|
||||
|
||||
Reference in New Issue
Block a user