[Refactor] Remove dead code for attention benchmark script (#35418)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
Wentao Ye
2026-02-26 12:53:46 -05:00
committed by GitHub
parent d940607629
commit 05970c772c
2 changed files with 0 additions and 95 deletions

View File

@@ -15,7 +15,6 @@ from .common import (
BenchmarkConfig,
BenchmarkResult,
MockLayer,
MockModelConfig,
ResultsFormatter,
get_attention_scale,
is_mla_backend,
@@ -36,7 +35,6 @@ __all__ = [
"ResultsFormatter",
# Mock objects
"MockLayer",
"MockModelConfig",
# Utilities
"setup_mla_dims",
"get_attention_scale",

View File

@@ -10,7 +10,6 @@ from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Any
import numpy as np
import torch
from batch_spec import get_batch_type, parse_batch_spec
from rich.console import Console
@@ -62,10 +61,7 @@ class MockHfConfig:
# Import AttentionLayerBase at module level to avoid circular dependencies
try:
from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
_HAS_ATTENTION_LAYER_BASE = True
except ImportError:
_HAS_ATTENTION_LAYER_BASE = False
AttentionLayerBase = object # Fallback
@@ -167,95 +163,6 @@ class MockLayer(AttentionLayerBase):
return self._kv_cache_spec
class MockModelConfig:
"""Mock model configuration."""
def __init__(
self,
num_q_heads: int,
num_kv_heads: int,
head_dim: int,
dtype: torch.dtype = torch.float16,
max_model_len: int = 32768,
):
self._n_q = num_q_heads
self._n_kv = num_kv_heads
self._d = head_dim
self.dtype = dtype
self.max_model_len = max_model_len
def get_num_attention_heads(self, _=None) -> int:
return self._n_q
def get_num_kv_heads(self, _=None) -> int:
return self._n_kv
def get_head_size(self) -> int:
return self._d
def get_num_layers(self) -> int:
"""Mock method for layer count queries."""
return 1
def get_sliding_window_for_layer(self, _layer_idx: int):
"""Mock method for sliding window queries."""
return None
def get_logits_soft_cap_for_layer(self, _layer_idx: int):
"""Mock method for logits soft cap queries."""
return None
def get_sm_scale_for_layer(self, _layer_idx: int) -> float:
"""Mock method for SM scale queries."""
return 1.0 / (self.get_head_size() ** 0.5)
class MockParallelConfig:
"""Mock parallel configuration."""
pass
class MockCompilationConfig:
"""Mock compilation configuration."""
def __init__(self):
self.full_cuda_graph = False
self.static_forward_context = {}
class MockVLLMConfig:
"""Mock VLLM configuration."""
def __init__(self):
self.compilation_config = MockCompilationConfig()
class MockRunner:
"""Mock GPU runner for metadata builders."""
def __init__(
self,
seq_lens: np.ndarray,
query_start_locs: np.ndarray,
device: torch.device,
num_q_heads: int,
num_kv_heads: int,
head_dim: int,
dtype: torch.dtype,
):
self.model_config = MockModelConfig(num_q_heads, num_kv_heads, head_dim, dtype)
self.parallel_config = MockParallelConfig()
self.vllm_config = MockVLLMConfig()
self.seq_lens_np = seq_lens
self.query_start_loc_np = query_start_locs
self.device = device
self.attention_chunk_size = None
self.num_query_heads = num_q_heads
self.num_kv_heads = num_kv_heads
self.dtype = dtype
@dataclass
class ParameterSweep:
"""Configuration for sweeping a backend parameter."""