[Bugfix] Add init_workspace_manager to moe kernel benchmarks (#31042)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
@@ -15,6 +15,7 @@ from vllm.model_executor.layers.fused_moe.cutlass_moe import cutlass_moe_fp8
|
||||
from vllm.model_executor.layers.fused_moe.fused_moe import fused_experts, fused_topk
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
||||
from vllm.v1.worker.workspace import init_workspace_manager
|
||||
|
||||
# Weight shapes for different models: [num_experts, topk, hidden_size,
|
||||
# intermediate_size]
|
||||
@@ -297,6 +298,10 @@ def bench_run(
|
||||
|
||||
|
||||
def main(args):
|
||||
# Initialize workspace manager (required for CUTLASS MoE kernels)
|
||||
device = torch.device("cuda:0")
|
||||
init_workspace_manager(device)
|
||||
|
||||
print("Benchmarking models:")
|
||||
for i, model in enumerate(args.models):
|
||||
print(f"[{i}] {model}")
|
||||
|
||||
@@ -21,6 +21,7 @@ from vllm.model_executor.layers.fused_moe.cutlass_moe import cutlass_moe_fp4
|
||||
from vllm.model_executor.layers.fused_moe.fused_moe import fused_experts, fused_topk
|
||||
from vllm.scalar_type import scalar_types
|
||||
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
||||
from vllm.v1.worker.workspace import init_workspace_manager
|
||||
|
||||
WEIGHT_SHAPES_MOE = {
|
||||
"nvidia/DeepSeek-R1-FP4": [
|
||||
@@ -441,6 +442,10 @@ def bench_run(
|
||||
|
||||
|
||||
def main(args):
|
||||
# Initialize workspace manager (required for CUTLASS MoE kernels)
|
||||
device = torch.device("cuda:0")
|
||||
init_workspace_manager(device)
|
||||
|
||||
print("Benchmarking models:")
|
||||
for i, model in enumerate(args.models):
|
||||
print(f"[{i}] {model}")
|
||||
@@ -14,6 +14,7 @@ from vllm.model_executor.layers.fused_moe.fused_moe import (
|
||||
fused_topk,
|
||||
)
|
||||
from vllm.utils.argparse_utils import FlexibleArgumentParser
|
||||
from vllm.v1.worker.workspace import init_workspace_manager
|
||||
|
||||
DEFAULT_MODELS = [
|
||||
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
||||
@@ -364,6 +365,10 @@ def bench_run(
|
||||
|
||||
|
||||
def main(args):
|
||||
# Initialize workspace manager (required for CUTLASS MoE kernels)
|
||||
device = torch.device("cuda:0")
|
||||
init_workspace_manager(device)
|
||||
|
||||
print("Benchmarking models:")
|
||||
for i, model in enumerate(args.models):
|
||||
print(f"[{i}] {model}")
|
||||
|
||||
Reference in New Issue
Block a user