[Platform] Deprecate seed_everything (#31659)
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -10,7 +10,7 @@ from tqdm import tqdm
|
||||
|
||||
from vllm.config import VllmConfig, set_current_vllm_config
|
||||
from vllm.model_executor.layers.fused_moe.config import FUSED_MOE_UNQUANTIZED_CONFIG
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
|
||||
from .common import (
|
||||
Config,
|
||||
@@ -40,7 +40,7 @@ def rank_worker(
|
||||
config: Config,
|
||||
weights: WeightTensors,
|
||||
):
|
||||
current_platform.seed_everything(pgi.rank)
|
||||
set_random_seed(pgi.rank)
|
||||
|
||||
# sanity check
|
||||
from vllm import envs
|
||||
|
||||
@@ -9,7 +9,7 @@ from typing import Any
|
||||
import torch
|
||||
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
|
||||
from .common import Config, RankTensors, WeightTensors, make_modular_kernel
|
||||
from .parallel_utils import ProcessGroupInfo, parallel_launch_with_config
|
||||
@@ -82,7 +82,7 @@ def rank_worker(
|
||||
config: Config,
|
||||
weights: WeightTensors,
|
||||
):
|
||||
current_platform.seed_everything(pgi.rank)
|
||||
set_random_seed(pgi.rank)
|
||||
|
||||
# sanity check
|
||||
from vllm import envs
|
||||
|
||||
@@ -21,6 +21,7 @@ from vllm.model_executor.layers.fused_moe.fused_batched_moe import (
|
||||
from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.triton_utils import tl
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
|
||||
MNK_FACTORS = [
|
||||
(1, 128, 128),
|
||||
@@ -115,7 +116,7 @@ def test_batched_mm(
|
||||
):
|
||||
"""Note: float8_e4m3fn is not supported on CUDA architecture < 89,
|
||||
and those tests will be skipped on unsupported hardware."""
|
||||
current_platform.seed_everything(7)
|
||||
set_random_seed(7)
|
||||
|
||||
use_fp8_w8a8 = dtype == torch.float8_e4m3fn
|
||||
|
||||
@@ -252,7 +253,7 @@ def test_fused_moe_batched_experts(
|
||||
):
|
||||
"""Note: float8_e4m3fn is not supported on CUDA architecture < 89,
|
||||
and those tests will be skipped on unsupported hardware."""
|
||||
current_platform.seed_everything(7)
|
||||
set_random_seed(7)
|
||||
|
||||
use_fp8_w8a8 = dtype == torch.float8_e4m3fn
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ from tests.kernels.allclose_default import get_default_atol, get_default_rtol
|
||||
from vllm._custom_ops import cpu_fused_moe, cpu_prepack_moe_weight
|
||||
from vllm.model_executor.layers.activation import SiluAndMul, SwigluOAIAndMul
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
|
||||
if not current_platform.is_cpu():
|
||||
pytest.skip("skipping CPU-only tests", allow_module_level=True)
|
||||
@@ -114,7 +115,7 @@ def test_cpu_fused_moe(
|
||||
act: str,
|
||||
isa: str,
|
||||
):
|
||||
current_platform.seed_everything(0)
|
||||
set_random_seed(0)
|
||||
|
||||
topk_num = max(expert_num // 2, 1)
|
||||
up_dim = 2 * intermediate_size
|
||||
|
||||
@@ -20,6 +20,7 @@ from vllm.model_executor.layers.fused_moe.cutlass_moe import (
|
||||
from vllm.model_executor.layers.fused_moe.fused_moe import fused_experts, fused_topk
|
||||
from vllm.model_executor.layers.fused_moe.utils import moe_kernel_quantize_input
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
|
||||
NUM_EXPERTS = [40, 64]
|
||||
TOP_KS = [6, 8]
|
||||
@@ -277,7 +278,7 @@ def test_cutlass_moe_8_bit_no_graph(
|
||||
workspace_init,
|
||||
ep_size: int | None = None,
|
||||
):
|
||||
current_platform.seed_everything(7)
|
||||
set_random_seed(7)
|
||||
monkeypatch.setenv("VLLM_FUSED_MOE_CHUNK_SIZE", "8192")
|
||||
with set_current_vllm_config(vllm_config):
|
||||
mt = MOETensors8Bit.make_moe_tensors_8bit(m, k, n, e, per_act_token, per_out_ch)
|
||||
@@ -332,7 +333,7 @@ def test_cutlass_moe_8_bit_cuda_graph(
|
||||
monkeypatch,
|
||||
workspace_init,
|
||||
):
|
||||
current_platform.seed_everything(7)
|
||||
set_random_seed(7)
|
||||
monkeypatch.setenv("VLLM_FUSED_MOE_CHUNK_SIZE", "8192")
|
||||
with set_current_vllm_config(vllm_config):
|
||||
dtype = torch.half
|
||||
@@ -469,7 +470,7 @@ def test_run_cutlass_moe_fp8(
|
||||
ep_size: int,
|
||||
workspace_init,
|
||||
):
|
||||
current_platform.seed_everything(7)
|
||||
set_random_seed(7)
|
||||
with set_current_vllm_config(vllm_config):
|
||||
mt = MOETensors8Bit.make_moe_tensors_8bit(
|
||||
m, k, n, e, per_act_token, per_out_channel
|
||||
|
||||
@@ -22,13 +22,13 @@ from vllm.model_executor.layers.fused_moe.config import (
|
||||
)
|
||||
from vllm.model_executor.layers.fused_moe.fused_moe import fused_experts
|
||||
from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEModularKernel
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.deep_gemm import (
|
||||
get_mk_alignment_for_contiguous_layout,
|
||||
is_deep_gemm_e8m0_used,
|
||||
is_deep_gemm_supported,
|
||||
)
|
||||
from vllm.utils.import_utils import has_deep_ep, has_deep_gemm
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
from vllm.v1.worker.workspace import init_workspace_manager
|
||||
|
||||
from ...utils import multi_gpu_test
|
||||
@@ -367,7 +367,7 @@ def _test_deepep_deepgemm_moe(
|
||||
device = torch.device(f"cuda:{pgi.local_rank}")
|
||||
init_workspace_manager(device)
|
||||
|
||||
current_platform.seed_everything(pgi.rank)
|
||||
set_random_seed(pgi.rank)
|
||||
|
||||
w1 = w1.to(device=torch.cuda.current_device())
|
||||
w2 = w2.to(device=torch.cuda.current_device())
|
||||
@@ -456,7 +456,7 @@ def test_ht_deepep_deepgemm_moe(
|
||||
"""
|
||||
|
||||
m, n, k = mnk
|
||||
current_platform.seed_everything(7)
|
||||
set_random_seed(7)
|
||||
|
||||
if topk > num_experts:
|
||||
pytest.skip(f"Skipping test: topk={topk} > E={num_experts}")
|
||||
@@ -531,7 +531,7 @@ def test_ll_deepep_deepgemm_moe(
|
||||
assert not is_deep_gemm_e8m0_used()
|
||||
|
||||
m, n, k = mnk
|
||||
current_platform.seed_everything(7)
|
||||
set_random_seed(7)
|
||||
|
||||
if topk > num_experts:
|
||||
pytest.skip(f"Skipping test: topk={topk} > E={num_experts}")
|
||||
|
||||
@@ -20,8 +20,8 @@ from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEModularK
|
||||
from vllm.model_executor.layers.quantization.utils.fp8_utils import (
|
||||
per_token_group_quant_fp8,
|
||||
)
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.import_utils import has_deep_ep
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
from vllm.v1.worker.workspace import init_workspace_manager
|
||||
|
||||
from ...utils import multi_gpu_test
|
||||
@@ -446,7 +446,7 @@ def test_deep_ep_moe(
|
||||
low_latency_mode = False
|
||||
use_fp8_dispatch = False
|
||||
|
||||
current_platform.seed_everything(7)
|
||||
set_random_seed(7)
|
||||
world_size, dp_size = world_dp_size
|
||||
config = TestConfig(dtype=dtype, topk=topk, m=m, k=k, n=n, num_experts=num_experts)
|
||||
|
||||
@@ -507,7 +507,7 @@ def test_low_latency_deep_ep_moe(
|
||||
f"hidden sizes {DeepEPLLPrepareAndFinalize.SUPPORTED_HIDDEN_SIZES}"
|
||||
)
|
||||
|
||||
current_platform.seed_everything(7)
|
||||
set_random_seed(7)
|
||||
world_size, dp_size = world_dp_size
|
||||
config = TestConfig(dtype=dtype, topk=topk, m=m, k=k, n=n, num_experts=num_experts)
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ from vllm.model_executor.layers.quantization.utils.flashinfer_utils import (
|
||||
from vllm.model_executor.layers.quantization.utils.fp8_utils import input_to_float8
|
||||
from vllm.model_executor.models.llama4 import Llama4MoE
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
|
||||
try:
|
||||
from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe
|
||||
@@ -158,7 +159,7 @@ def test_flashinfer_per_tensor_moe_fp8_no_graph(
|
||||
):
|
||||
if not current_platform.has_device_capability(100):
|
||||
pytest.skip("Test is only supported for sm >= 100")
|
||||
current_platform.seed_everything(7)
|
||||
set_random_seed(7)
|
||||
monkeypatch.setenv("VLLM_FUSED_MOE_CHUNK_SIZE", "8192")
|
||||
with set_current_vllm_config(vllm_config):
|
||||
td = TestData.make_moe_tensors_8bit(m, k, n, e, reorder=True)
|
||||
@@ -222,7 +223,7 @@ def test_flashinfer_cutlass_moe_fp8_no_graph(
|
||||
monkeypatch,
|
||||
workspace_init,
|
||||
):
|
||||
current_platform.seed_everything(7)
|
||||
set_random_seed(7)
|
||||
monkeypatch.setenv("VLLM_FUSED_MOE_CHUNK_SIZE", "8192")
|
||||
with set_current_vllm_config(vllm_config):
|
||||
td = TestData.make_moe_tensors_8bit(
|
||||
|
||||
@@ -23,6 +23,7 @@ from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk
|
||||
from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEModularKernel
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
|
||||
if not has_flashinfer_cutlass_fused_moe() or not current_platform.has_device_capability(
|
||||
100
|
||||
@@ -60,7 +61,7 @@ def test_flashinfer_fp4_moe_no_graph(
|
||||
activation: str,
|
||||
workspace_init,
|
||||
):
|
||||
current_platform.seed_everything(7)
|
||||
set_random_seed(7)
|
||||
with set_current_vllm_config(
|
||||
VllmConfig(parallel_config=ParallelConfig(pipeline_parallel_size=1))
|
||||
):
|
||||
|
||||
@@ -19,6 +19,7 @@ from vllm.model_executor.layers.fused_moe.fused_moe import (
|
||||
fused_grouped_topk,
|
||||
)
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
@@ -52,7 +53,7 @@ def test_grouped_topk(
|
||||
)
|
||||
get_cached_compilation_config.cache_clear()
|
||||
|
||||
current_platform.seed_everything(0)
|
||||
set_random_seed(0)
|
||||
hidden_states = torch.randn((n_token, n_hidden), dtype=dtype, device="cuda")
|
||||
gating_output = torch.randn((n_token, n_expert), dtype=dtype, device="cuda")
|
||||
e_score_correction_bias = torch.randn(
|
||||
|
||||
@@ -15,7 +15,7 @@ from vllm.config import VllmConfig, set_current_vllm_config
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe
|
||||
from vllm.utils.import_utils import has_deep_ep, has_deep_gemm, has_pplx
|
||||
from vllm.utils.torch_utils import cuda_device_count_stateless
|
||||
from vllm.utils.torch_utils import cuda_device_count_stateless, set_random_seed
|
||||
from vllm.v1.worker.workspace import init_workspace_manager
|
||||
|
||||
from .modular_kernel_tools.common import (
|
||||
@@ -82,7 +82,7 @@ def rank_worker(
|
||||
device = torch.device(f"cuda:{pgi.local_rank}")
|
||||
init_workspace_manager(device)
|
||||
|
||||
current_platform.seed_everything(pgi.rank)
|
||||
set_random_seed(pgi.rank)
|
||||
|
||||
# sanity check
|
||||
from vllm import envs
|
||||
|
||||
@@ -34,6 +34,7 @@ from vllm.model_executor.layers.fused_moe.prepare_finalize import (
|
||||
)
|
||||
from vllm.model_executor.layers.utils import shuffle_weight
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
|
||||
MNK = [
|
||||
(1, 512, 384),
|
||||
@@ -211,7 +212,7 @@ def test_oai_triton_moe(
|
||||
unfused: bool,
|
||||
workspace_init,
|
||||
):
|
||||
current_platform.seed_everything(0)
|
||||
set_random_seed(0)
|
||||
(
|
||||
w1,
|
||||
w2,
|
||||
|
||||
@@ -60,6 +60,7 @@ from vllm.model_executor.layers.quantization.utils.quant_utils import quantize_w
|
||||
from vllm.model_executor.models.mixtral import MixtralMoE
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.scalar_type import ScalarType, scalar_types
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
from vllm.v1.worker.workspace import init_workspace_manager
|
||||
|
||||
NUM_EXPERTS = [8, 64, 192]
|
||||
@@ -234,7 +235,7 @@ def test_fused_moe(
|
||||
monkeypatch,
|
||||
workspace_init,
|
||||
):
|
||||
current_platform.seed_everything(7)
|
||||
set_random_seed(7)
|
||||
|
||||
monkeypatch.setenv("VLLM_FUSED_MOE_CHUNK_SIZE", str(chunk_size))
|
||||
|
||||
|
||||
@@ -14,12 +14,13 @@ from vllm.model_executor.layers.fused_moe.moe_align_block_size import (
|
||||
)
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.math_utils import round_up
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
|
||||
NUM_TOKENS = [1, 3, 256, 2256, 4096]
|
||||
NUM_EXPERTS = [32, 160, 256, 257]
|
||||
TOP_KS = [1, 2, 16, 32]
|
||||
BLOCK_SIZES = [32, 128]
|
||||
current_platform.seed_everything(0)
|
||||
set_random_seed(0)
|
||||
|
||||
|
||||
def _group_tokens_by_expert(
|
||||
|
||||
@@ -17,11 +17,12 @@ from vllm.model_executor.layers.fused_moe.moe_permute_unpermute import (
|
||||
moe_unpermute,
|
||||
)
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
|
||||
NUM_EXPERTS = [16, 64, 256]
|
||||
TOP_KS = [2, 6, 8]
|
||||
EP_SIZE = [1, 4, 16]
|
||||
current_platform.seed_everything(0)
|
||||
set_random_seed(0)
|
||||
|
||||
if current_platform.is_rocm():
|
||||
pytest.skip(
|
||||
@@ -226,7 +227,7 @@ def test_moe_permute_unpermute(
|
||||
n_local_expert, expert_map, _ = determine_expert_map(ep_size, ep_rank, n_expert)
|
||||
expert_map = expert_map.cuda()
|
||||
start_expert = n_local_expert * ep_rank
|
||||
current_platform.seed_everything(0)
|
||||
set_random_seed(0)
|
||||
hidden_states = torch.randn((n_token, n_hidden), device="cuda").to(dtype)
|
||||
gating_output = torch.randn((n_token, n_expert), device="cuda").to(dtype)
|
||||
topk_weights, topk_ids, token_expert_indices = fused_topk(
|
||||
|
||||
@@ -16,6 +16,7 @@ from vllm.model_executor.layers.fused_moe.config import nvfp4_moe_quant_config
|
||||
from vllm.model_executor.layers.fused_moe.cutlass_moe import cutlass_moe_fp4
|
||||
from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
|
||||
if not current_platform.has_device_capability(100):
|
||||
pytest.skip(
|
||||
@@ -42,7 +43,7 @@ MNK_FACTORS = [
|
||||
def test_cutlass_fp4_moe_no_graph(
|
||||
m: int, n: int, k: int, e: int, topk: int, dtype: torch.dtype, workspace_init
|
||||
):
|
||||
current_platform.seed_everything(7)
|
||||
set_random_seed(7)
|
||||
with set_current_vllm_config(
|
||||
VllmConfig(parallel_config=ParallelConfig(pipeline_parallel_size=1))
|
||||
):
|
||||
|
||||
@@ -14,6 +14,7 @@ from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk
|
||||
from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEModularKernel
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.math_utils import cdiv
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
|
||||
from ...utils import multi_gpu_test
|
||||
from .parallel_utils import ProcessGroupInfo, parallel_launch
|
||||
@@ -290,7 +291,7 @@ def test_cutlass_moe_pplx(
|
||||
world_dp_size: tuple[int, int],
|
||||
use_internode: bool,
|
||||
):
|
||||
current_platform.seed_everything(7)
|
||||
set_random_seed(7)
|
||||
|
||||
with set_current_vllm_config(vllm_config):
|
||||
dtype = torch.half
|
||||
|
||||
@@ -44,8 +44,8 @@ from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEModularK
|
||||
from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import (
|
||||
TopKWeightAndReduceDelegate,
|
||||
)
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.math_utils import round_up
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
from vllm.v1.worker.workspace import init_workspace_manager
|
||||
|
||||
from ...utils import multi_gpu_test
|
||||
@@ -184,7 +184,7 @@ def test_fused_moe_batched_experts(
|
||||
dtype: torch.dtype,
|
||||
workspace_init,
|
||||
):
|
||||
current_platform.seed_everything(7)
|
||||
set_random_seed(7)
|
||||
|
||||
a = torch.randn((m, k), device="cuda", dtype=dtype) / 10
|
||||
w1 = torch.randn((e, 2 * n, k), device="cuda", dtype=dtype) / 10
|
||||
@@ -491,7 +491,7 @@ def test_pplx_prepare_finalize_slow(
|
||||
if per_act_token_quant and block_shape is not None:
|
||||
pytest.skip("Skip illegal quantization combination")
|
||||
|
||||
current_platform.seed_everything(7)
|
||||
set_random_seed(7)
|
||||
m, n, k = mnk
|
||||
world_size, dp_size = world_dp_size
|
||||
device = "cuda"
|
||||
@@ -809,7 +809,7 @@ def test_pplx_moe_slow(
|
||||
block_shape: list[int] | None,
|
||||
use_internode: bool,
|
||||
):
|
||||
current_platform.seed_everything(7)
|
||||
set_random_seed(7)
|
||||
m, n, k = mnk
|
||||
world_size, dp_size = world_dp_size
|
||||
|
||||
@@ -888,7 +888,7 @@ def _pplx_test_loop(
|
||||
new_vllm_config.parallel_config.enable_expert_parallel = True
|
||||
_set_vllm_config(new_vllm_config, pgi.world_size, pgi.rank, pgi.local_rank)
|
||||
|
||||
current_platform.seed_everything(7)
|
||||
set_random_seed(7)
|
||||
combos = itertools.product(
|
||||
PPLX_COMBOS, NUM_EXPERTS, TOP_KS, DTYPES, [False, True], [None, [128, 128]]
|
||||
)
|
||||
@@ -982,7 +982,7 @@ def test_pplx_prepare_finalize(
|
||||
world_dp_size: tuple[int, int],
|
||||
use_internode: bool,
|
||||
):
|
||||
current_platform.seed_everything(7)
|
||||
set_random_seed(7)
|
||||
world_size, dp_size = world_dp_size
|
||||
parallel_launch(
|
||||
world_size * dp_size,
|
||||
@@ -1005,7 +1005,7 @@ def test_pplx_moe(
|
||||
use_internode: bool,
|
||||
use_shared_experts: bool,
|
||||
):
|
||||
current_platform.seed_everything(7)
|
||||
set_random_seed(7)
|
||||
world_size, dp_size = world_dp_size
|
||||
parallel_launch(
|
||||
world_size,
|
||||
|
||||
@@ -13,6 +13,7 @@ from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import (
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.deep_gemm import DeepGemmQuantScaleFMT, has_deep_gemm
|
||||
from vllm.utils.math_utils import cdiv, round_up
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
|
||||
if current_platform.is_fp8_fnuz():
|
||||
pytest.skip(
|
||||
@@ -201,7 +202,7 @@ def token_random(E, T, H2, tokens_per_expert):
|
||||
@torch.inference_mode()
|
||||
def test_silu_mul_fp8_quant_deep_gemm(E: int, T: int, H: int, fp8_type: torch.dtype):
|
||||
group_size = 128
|
||||
current_platform.seed_everything(42)
|
||||
set_random_seed(42)
|
||||
|
||||
tokens_per_expert = torch.randint(
|
||||
low=0,
|
||||
|
||||
@@ -11,6 +11,7 @@ from vllm.model_executor.layers.quantization.utils.fp8_utils import (
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.triton_utils import triton
|
||||
from vllm.utils.deep_gemm import is_deep_gemm_e8m0_used
|
||||
from vllm.utils.torch_utils import set_random_seed
|
||||
|
||||
FLOAT8_DTYPE = torch.float8_e4m3fn
|
||||
GROUP_SIZE = 128
|
||||
@@ -72,7 +73,7 @@ def reference(x: torch.Tensor, use_ue8m0: bool) -> tuple[torch.Tensor, torch.Ten
|
||||
reason="ROCm does not support DeepGemm.",
|
||||
)
|
||||
def test_silu_mul_fp8_quant_deep_gemm(T: int, N: int):
|
||||
current_platform.seed_everything(42)
|
||||
set_random_seed(42)
|
||||
|
||||
input = torch.rand((T, N), dtype=torch.bfloat16, device="cuda")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user