Convert formatting to use ruff instead of yapf + isort (#26247)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""Custom activation functions."""
|
||||
|
||||
import math
|
||||
from typing import Optional
|
||||
|
||||
@@ -8,8 +9,11 @@ import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from vllm.distributed import (divide, get_tensor_model_parallel_rank,
|
||||
get_tensor_model_parallel_world_size)
|
||||
from vllm.distributed import (
|
||||
divide,
|
||||
get_tensor_model_parallel_rank,
|
||||
get_tensor_model_parallel_world_size,
|
||||
)
|
||||
from vllm.logger import init_logger
|
||||
from vllm.model_executor.custom_op import CustomOp
|
||||
from vllm.model_executor.utils import set_weight_attrs
|
||||
@@ -32,7 +36,7 @@ class FatreluAndMul(CustomOp):
|
||||
return: (num_tokens, d) or (batch_size, seq_len, d)
|
||||
"""
|
||||
|
||||
def __init__(self, threshold: float = 0.):
|
||||
def __init__(self, threshold: float = 0.0):
|
||||
super().__init__()
|
||||
self.threshold = threshold
|
||||
if current_platform.is_cuda_alike():
|
||||
@@ -49,7 +53,7 @@ class FatreluAndMul(CustomOp):
|
||||
|
||||
def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
|
||||
d = x.shape[-1] // 2
|
||||
output_shape = (x.shape[:-1] + (d, ))
|
||||
output_shape = x.shape[:-1] + (d,)
|
||||
out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
|
||||
self.op(out, x, self.threshold)
|
||||
return out
|
||||
@@ -72,6 +76,7 @@ class SiluAndMul(CustomOp):
|
||||
self.op = torch.ops._C.silu_and_mul
|
||||
elif current_platform.is_xpu():
|
||||
from vllm._ipex_ops import ipex_ops
|
||||
|
||||
self.op = ipex_ops.silu_and_mul
|
||||
elif current_platform.is_cpu():
|
||||
self._forward_method = self.forward_native
|
||||
@@ -83,14 +88,14 @@ class SiluAndMul(CustomOp):
|
||||
|
||||
def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
|
||||
d = x.shape[-1] // 2
|
||||
output_shape = (x.shape[:-1] + (d, ))
|
||||
output_shape = x.shape[:-1] + (d,)
|
||||
out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
|
||||
self.op(out, x)
|
||||
return out
|
||||
|
||||
def forward_xpu(self, x: torch.Tensor) -> torch.Tensor:
|
||||
d = x.shape[-1] // 2
|
||||
output_shape = (x.shape[:-1] + (d, ))
|
||||
output_shape = x.shape[:-1] + (d,)
|
||||
out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
|
||||
self.op(out, x)
|
||||
return out
|
||||
@@ -113,6 +118,7 @@ class MulAndSilu(CustomOp):
|
||||
self.op = torch.ops._C.mul_and_silu
|
||||
elif current_platform.is_xpu():
|
||||
from vllm._ipex_ops import ipex_ops
|
||||
|
||||
self.op = ipex_ops.silu_and_mul
|
||||
elif current_platform.is_cpu():
|
||||
self._forward_method = self.forward_native
|
||||
@@ -124,7 +130,7 @@ class MulAndSilu(CustomOp):
|
||||
|
||||
def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
|
||||
d = x.shape[-1] // 2
|
||||
output_shape = (x.shape[:-1] + (d, ))
|
||||
output_shape = x.shape[:-1] + (d,)
|
||||
out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
|
||||
self.op(out, x)
|
||||
return out
|
||||
@@ -156,10 +162,8 @@ class GeluAndMulSparse(CustomOp):
|
||||
|
||||
# Sparsity.
|
||||
if activation_sparsity == 0.0:
|
||||
raise ValueError(
|
||||
"activation_sparsity is 0.0. Please use GeluAndMul.")
|
||||
target_sparsity_tensor = torch.tensor(activation_sparsity,
|
||||
dtype=torch.float32)
|
||||
raise ValueError("activation_sparsity is 0.0. Please use GeluAndMul.")
|
||||
target_sparsity_tensor = torch.tensor(activation_sparsity, dtype=torch.float32)
|
||||
normal_dist = torch.distributions.normal.Normal(0, 1)
|
||||
self.std_multiplier = normal_dist.icdf(target_sparsity_tensor)
|
||||
|
||||
@@ -207,6 +211,7 @@ class GeluAndMul(CustomOp):
|
||||
self.op = torch.ops._C.gelu_tanh_and_mul
|
||||
elif current_platform.is_xpu():
|
||||
from vllm._ipex_ops import ipex_ops
|
||||
|
||||
if approximate == "none":
|
||||
self.op = ipex_ops.gelu_and_mul
|
||||
else:
|
||||
@@ -219,20 +224,20 @@ class GeluAndMul(CustomOp):
|
||||
|
||||
def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
|
||||
d = x.shape[-1] // 2
|
||||
output_shape = (x.shape[:-1] + (d, ))
|
||||
output_shape = x.shape[:-1] + (d,)
|
||||
out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
|
||||
self.op(out, x)
|
||||
return out
|
||||
|
||||
def forward_xpu(self, x: torch.Tensor) -> torch.Tensor:
|
||||
d = x.shape[-1] // 2
|
||||
output_shape = (x.shape[:-1] + (d, ))
|
||||
output_shape = x.shape[:-1] + (d,)
|
||||
out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
|
||||
self.op(out, x)
|
||||
return out
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
return f'approximate={repr(self.approximate)}'
|
||||
return f"approximate={repr(self.approximate)}"
|
||||
|
||||
|
||||
@CustomOp.register("swigluoai_and_mul")
|
||||
@@ -255,7 +260,7 @@ class SwigluOAIAndMul(CustomOp):
|
||||
|
||||
def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
|
||||
d = x.shape[-1] // 2
|
||||
output_shape = (x.shape[:-1] + (d, ))
|
||||
output_shape = x.shape[:-1] + (d,)
|
||||
out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
|
||||
torch.ops._C.swigluoai_and_mul(out, x, self.alpha, self.limit)
|
||||
return out
|
||||
@@ -266,20 +271,19 @@ class SwigluOAIAndMul(CustomOp):
|
||||
|
||||
@CustomOp.register("gelu_new")
|
||||
class NewGELU(CustomOp):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
if current_platform.is_cuda_alike() or current_platform.is_cpu():
|
||||
self.op = torch.ops._C.gelu_new
|
||||
elif current_platform.is_xpu():
|
||||
from vllm._ipex_ops import ipex_ops
|
||||
|
||||
self.op = ipex_ops.gelu_new
|
||||
|
||||
def forward_native(self, x: torch.Tensor) -> torch.Tensor:
|
||||
"""PyTorch-native implementation equivalent to forward()."""
|
||||
c = math.sqrt(2.0 / math.pi)
|
||||
return 0.5 * x * (1.0 + torch.tanh(c *
|
||||
(x + 0.044715 * torch.pow(x, 3.0))))
|
||||
return 0.5 * x * (1.0 + torch.tanh(c * (x + 0.044715 * torch.pow(x, 3.0))))
|
||||
|
||||
def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
|
||||
out = torch.empty_like(x)
|
||||
@@ -292,19 +296,18 @@ class NewGELU(CustomOp):
|
||||
|
||||
@CustomOp.register("gelu_fast")
|
||||
class FastGELU(CustomOp):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
if current_platform.is_cuda_alike() or current_platform.is_cpu():
|
||||
self.op = torch.ops._C.gelu_fast
|
||||
elif current_platform.is_xpu():
|
||||
from vllm._ipex_ops import ipex_ops
|
||||
|
||||
self.op = ipex_ops.gelu_fast
|
||||
|
||||
def forward_native(self, x: torch.Tensor) -> torch.Tensor:
|
||||
"""PyTorch-native implementation equivalent to forward()."""
|
||||
return 0.5 * x * (1.0 + torch.tanh(x * 0.7978845608 *
|
||||
(1.0 + 0.044715 * x * x)))
|
||||
return 0.5 * x * (1.0 + torch.tanh(x * 0.7978845608 * (1.0 + 0.044715 * x * x)))
|
||||
|
||||
def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
|
||||
out = torch.empty_like(x)
|
||||
@@ -324,6 +327,7 @@ class QuickGELU(CustomOp):
|
||||
self.op = torch.ops._C.gelu_quick
|
||||
elif current_platform.is_xpu():
|
||||
from vllm._ipex_ops import ipex_ops
|
||||
|
||||
self.op = ipex_ops.gelu_quick
|
||||
|
||||
def forward_native(self, x: torch.Tensor) -> torch.Tensor:
|
||||
@@ -355,7 +359,7 @@ class ReLUSquaredActivation(CustomOp):
|
||||
return torch.square(F.relu(x))
|
||||
|
||||
def forward_cuda(self, x: torch.Tensor) -> torch.Tensor:
|
||||
#TODO : implement cuda kernels
|
||||
# TODO : implement cuda kernels
|
||||
return self.forward_native(x)
|
||||
|
||||
|
||||
@@ -378,12 +382,15 @@ class XIELU(CustomOp):
|
||||
):
|
||||
super().__init__()
|
||||
self.alpha_p = nn.Parameter(
|
||||
torch.log(torch.exp(torch.tensor(alpha_p_init, dtype=dtype)) -
|
||||
1).unsqueeze(0))
|
||||
torch.log(torch.exp(torch.tensor(alpha_p_init, dtype=dtype)) - 1).unsqueeze(
|
||||
0
|
||||
)
|
||||
)
|
||||
self.alpha_n = nn.Parameter(
|
||||
torch.log(
|
||||
torch.exp(torch.tensor(alpha_n_init - beta, dtype=dtype)) -
|
||||
1).unsqueeze(0))
|
||||
torch.exp(torch.tensor(alpha_n_init - beta, dtype=dtype)) - 1
|
||||
).unsqueeze(0)
|
||||
)
|
||||
self.register_buffer("beta", torch.tensor(beta, dtype=dtype))
|
||||
self.register_buffer("eps", torch.tensor(eps, dtype=dtype))
|
||||
self.with_vector_loads = with_vector_loads
|
||||
@@ -403,8 +410,10 @@ class XIELU(CustomOp):
|
||||
self._xielu_cuda_fn = allow_in_graph(self._xielu_cuda)
|
||||
msg += " Enabled torch._dynamo for xIELU CUDA."
|
||||
except Exception as err:
|
||||
msg += (f" Could not enable torch._dynamo for xIELU ({err}) - "
|
||||
"this may result in slower performance.")
|
||||
msg += (
|
||||
f" Could not enable torch._dynamo for xIELU ({err}) - "
|
||||
"this may result in slower performance."
|
||||
)
|
||||
self._xielu_cuda_fn = self._xielu_cuda
|
||||
logger.warning_once(msg)
|
||||
except Exception as err:
|
||||
@@ -421,14 +430,12 @@ class XIELU(CustomOp):
|
||||
return torch.where(
|
||||
x > 0,
|
||||
alpha_p * x * x + self.beta * x,
|
||||
(torch.expm1(torch.min(x, self.eps)) - x) * alpha_n +
|
||||
self.beta * x,
|
||||
(torch.expm1(torch.min(x, self.eps)) - x) * alpha_n + self.beta * x,
|
||||
)
|
||||
|
||||
def _xielu_cuda(self, x: torch.Tensor) -> torch.Tensor:
|
||||
"""Firewall function to prevent torch.compile from seeing .item()"""
|
||||
assert self._xielu_cuda_obj is not None, (
|
||||
"XIELU CUDA object must not be None")
|
||||
assert self._xielu_cuda_obj is not None, "XIELU CUDA object must not be None"
|
||||
original_shape = x.shape
|
||||
# CUDA kernel expects 3D tensors, reshape if needed
|
||||
while x.dim() < 3:
|
||||
@@ -486,14 +493,14 @@ class ScaledActivation(nn.Module):
|
||||
self.input_is_parallel = input_is_parallel
|
||||
if input_is_parallel:
|
||||
tp_size = get_tensor_model_parallel_world_size()
|
||||
intermediate_size_per_partition = divide(intermediate_size,
|
||||
tp_size)
|
||||
intermediate_size_per_partition = divide(intermediate_size, tp_size)
|
||||
else:
|
||||
intermediate_size_per_partition = intermediate_size
|
||||
if params_dtype is None:
|
||||
params_dtype = torch.get_default_dtype()
|
||||
self.scales = nn.Parameter(
|
||||
torch.empty(intermediate_size_per_partition, dtype=params_dtype))
|
||||
torch.empty(intermediate_size_per_partition, dtype=params_dtype)
|
||||
)
|
||||
set_weight_attrs(self.scales, {"weight_loader": self.weight_loader})
|
||||
|
||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||
@@ -510,30 +517,21 @@ class ScaledActivation(nn.Module):
|
||||
param_data.copy_(loaded_weight)
|
||||
|
||||
|
||||
_ACTIVATION_REGISTRY = LazyDict({
|
||||
"gelu":
|
||||
lambda: nn.GELU(),
|
||||
"gelu_fast":
|
||||
lambda: FastGELU(),
|
||||
"gelu_new":
|
||||
lambda: NewGELU(),
|
||||
"gelu_pytorch_tanh":
|
||||
lambda: nn.GELU(approximate="tanh"),
|
||||
"relu":
|
||||
lambda: nn.ReLU(),
|
||||
"relu2":
|
||||
lambda: ReLUSquaredActivation(),
|
||||
"silu":
|
||||
lambda: nn.SiLU(),
|
||||
"quick_gelu":
|
||||
lambda: QuickGELU(),
|
||||
"tanh":
|
||||
lambda: nn.Tanh(),
|
||||
"sigmoid":
|
||||
lambda: nn.Sigmoid(),
|
||||
"xielu":
|
||||
lambda: XIELU(),
|
||||
})
|
||||
_ACTIVATION_REGISTRY = LazyDict(
|
||||
{
|
||||
"gelu": lambda: nn.GELU(),
|
||||
"gelu_fast": lambda: FastGELU(),
|
||||
"gelu_new": lambda: NewGELU(),
|
||||
"gelu_pytorch_tanh": lambda: nn.GELU(approximate="tanh"),
|
||||
"relu": lambda: nn.ReLU(),
|
||||
"relu2": lambda: ReLUSquaredActivation(),
|
||||
"silu": lambda: nn.SiLU(),
|
||||
"quick_gelu": lambda: QuickGELU(),
|
||||
"tanh": lambda: nn.Tanh(),
|
||||
"sigmoid": lambda: nn.Sigmoid(),
|
||||
"xielu": lambda: XIELU(),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def get_act_fn(act_fn_name: str) -> nn.Module:
|
||||
@@ -547,29 +545,25 @@ def get_act_fn(act_fn_name: str) -> nn.Module:
|
||||
act_fn_name = activation_name
|
||||
|
||||
if act_fn_name not in _ACTIVATION_REGISTRY:
|
||||
raise ValueError(
|
||||
f"Activation function {act_fn_name!r} is not supported.")
|
||||
raise ValueError(f"Activation function {act_fn_name!r} is not supported.")
|
||||
|
||||
return _ACTIVATION_REGISTRY[act_fn_name]
|
||||
|
||||
|
||||
_ACTIVATION_AND_MUL_REGISTRY = LazyDict({
|
||||
"gelu":
|
||||
lambda: GeluAndMul(),
|
||||
"silu":
|
||||
lambda: SiluAndMul(),
|
||||
"geglu":
|
||||
lambda: GeluAndMul(),
|
||||
"swigluoai":
|
||||
lambda *args, **kwargs: SwigluOAIAndMul(*args, **kwargs),
|
||||
})
|
||||
_ACTIVATION_AND_MUL_REGISTRY = LazyDict(
|
||||
{
|
||||
"gelu": lambda: GeluAndMul(),
|
||||
"silu": lambda: SiluAndMul(),
|
||||
"geglu": lambda: GeluAndMul(),
|
||||
"swigluoai": lambda *args, **kwargs: SwigluOAIAndMul(*args, **kwargs),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def get_act_and_mul_fn(act_fn_name: str) -> nn.Module:
|
||||
"""Get an activation-and-mul (i.e. SiluAndMul) function by name."""
|
||||
act_fn_name = act_fn_name.lower()
|
||||
if act_fn_name not in _ACTIVATION_AND_MUL_REGISTRY:
|
||||
raise ValueError(
|
||||
f"Activation function {act_fn_name!r} is not supported.")
|
||||
raise ValueError(f"Activation function {act_fn_name!r} is not supported.")
|
||||
|
||||
return _ACTIVATION_AND_MUL_REGISTRY[act_fn_name]
|
||||
|
||||
Reference in New Issue
Block a user