[Models] Step-3.5-Flash (#33523)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
Co-authored-by: i-zhangmingming <i-zhangmingming@stepfun.com>
Co-authored-by: xiewuxun <xiewuxun@stepfun.com>
Co-authored-by: zetaohong <i-hongzetao@stepfun.com>
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
csy0225
2026-02-02 10:21:18 +08:00
committed by GitHub
parent a01ef3fa51
commit c3b40dc3e7
18 changed files with 3107 additions and 4 deletions

View File

@@ -17,6 +17,8 @@ from vllm.model_executor.layers.activation import (
QuickGELU,
SiluAndMul,
SwigluOAIAndMul,
SwigluStepAndMul,
swiglustep_and_mul_triton,
)
from vllm.utils.torch_utils import set_random_seed
@@ -36,6 +38,7 @@ CUDA_DEVICES = [f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 e
"gelu_tanh",
"fatrelu",
"swigluoai_and_mul",
"swiglustep_and_mul",
],
)
@pytest.mark.parametrize("num_tokens", NUM_TOKENS)
@@ -75,9 +78,12 @@ def test_act_and_mul(
elif activation == "swigluoai_and_mul":
layer = SwigluOAIAndMul()
fn = torch.ops._C.swigluoai_and_mul
elif activation == "swiglustep_and_mul":
layer = SwigluStepAndMul()
fn = swiglustep_and_mul_triton
out = layer(x)
ref_out = layer.forward_native(x)
if activation == "swigluoai_and_mul":
if activation in ["swigluoai_and_mul", "swiglustep_and_mul"]:
rtol = {
# For fp16, change the relative tolerance from 1e-3 to 2e-3
torch.float16: 2e-3,
@@ -104,7 +110,7 @@ def test_act_and_mul(
opcheck(fn, (out, x, threshold))
elif activation == "swigluoai_and_mul":
opcheck(fn, (out, x, layer.alpha, layer.limit))
else:
elif activation != "swiglustep_and_mul":
opcheck(fn, (out, x))