Remove ScaledActivation for AWQ (#10057)

Signed-off-by: mgoin <michael@neuralmagic.com>
2024-11-06 09:27:06 -05:00
parent 406d4cc480
commit 399c798608
34 changed files with 19 additions and 124 deletions
--- a/vllm/model_executor/layers/activation.py
+++ b/vllm/model_executor/layers/activation.py
@@ -9,7 +9,6 @@ import torch.nn.functional as F
 from vllm.distributed import (divide, get_tensor_model_parallel_rank,
                              get_tensor_model_parallel_world_size)
 from vllm.model_executor.custom_op import CustomOp
-from vllm.model_executor.layers.quantization import QuantizationConfig
 from vllm.model_executor.utils import set_weight_attrs
 from vllm.utils import LazyDict

@@ -277,28 +276,14 @@ _ACTIVATION_REGISTRY = LazyDict({
 })


-def get_act_fn(
-    act_fn_name: str,
-    quant_config: Optional[QuantizationConfig] = None,
-    intermediate_size: Optional[int] = None,
-    input_is_parallel: bool = True,
-    params_dtype: Optional[torch.dtype] = None,
-) -> nn.Module:
+def get_act_fn(act_fn_name: str) -> nn.Module:
    """Get an activation function by name."""
    act_fn_name = act_fn_name.lower()
    if act_fn_name not in _ACTIVATION_REGISTRY:
        raise ValueError(
            f"Activation function {act_fn_name!r} is not supported.")

-    act_fn = _ACTIVATION_REGISTRY[act_fn_name]
-    if (quant_config is not None
-            and act_fn_name in quant_config.get_scaled_act_names()):
-        if intermediate_size is None:
-            raise ValueError("intermediate_size must be specified for scaled "
-                             "activation functions.")
-        return ScaledActivation(act_fn, intermediate_size, input_is_parallel,
-                                params_dtype)
-    return act_fn
+    return _ACTIVATION_REGISTRY[act_fn_name]


 _ACTIVATION_AND_MUL_REGISTRY = LazyDict({
@@ -307,25 +292,11 @@ _ACTIVATION_AND_MUL_REGISTRY = LazyDict({
 })


-def get_act_and_mul_fn(
-    act_fn_name: str,
-    quant_config: Optional[QuantizationConfig] = None,
-    intermediate_size: Optional[int] = None,
-    input_is_parallel: bool = True,
-    params_dtype: Optional[torch.dtype] = None,
-) -> nn.Module:
+def get_act_and_mul_fn(act_fn_name: str) -> nn.Module:
    """Get an activation-and-mul (i.e. SiluAndMul) function by name."""
    act_fn_name = act_fn_name.lower()
    if act_fn_name not in _ACTIVATION_AND_MUL_REGISTRY:
        raise ValueError(
            f"Activation function {act_fn_name!r} is not supported.")

-    act_fn = _ACTIVATION_AND_MUL_REGISTRY[act_fn_name]
-    if (quant_config is not None
-            and act_fn_name in quant_config.get_scaled_act_names()):
-        if intermediate_size is None:
-            raise ValueError("intermediate_size must be specified for scaled "
-                             "activation functions.")
-        return ScaledActivation(act_fn, intermediate_size, input_is_parallel,
-                                params_dtype)
-    return act_fn
+    return _ACTIVATION_AND_MUL_REGISTRY[act_fn_name]