[V1] v1 engine + full CUDA graph support for PLaMo2 (#23998)

Signed-off-by: Hemmi Shinichi <shemmi@preferred.jp>
Signed-off-by: nopperl <54780682+nopperl@users.noreply.github.com>
Co-authored-by: Hemmi Shinichi <shemmi@preferred.jp>
Co-authored-by: Thomas Parnell <tom.parnell@gmail.com>
This commit is contained in:
nopperl
2025-09-04 00:24:02 +09:00
committed by GitHub
parent 6d80ae83e1
commit fa4311d85f
6 changed files with 349 additions and 125 deletions

View File

@@ -25,8 +25,7 @@ SSM_MODELS = [
HYBRID_MODELS = [
"ai21labs/Jamba-tiny-dev",
# skipping until vLLM implementation issues are resolved
# "pfnet/plamo-2-1b",
"pfnet/plamo-2-1b",
"Zyphra/Zamba2-1.2B-instruct",
"hmellor/tiny-random-BambaForCausalLM",
"ibm-granite/granite-4.0-tiny-preview",
@@ -37,6 +36,7 @@ HYBRID_MODELS = [
V1_SUPPORTED_MODELS = [
"state-spaces/mamba-130m-hf",
"ai21labs/Jamba-tiny-dev",
"pfnet/plamo-2-1b",
"yujiepan/mamba2-codestral-v0.1-tiny-random",
"Zyphra/Zamba2-1.2B-instruct",
"hmellor/tiny-random-BambaForCausalLM",
@@ -47,6 +47,7 @@ V1_SUPPORTED_MODELS = [
FULL_CUDA_GRAPH_MODELS = [
"ai21labs/Jamba-tiny-dev",
"pfnet/plamo-2-1b",
"Zyphra/Zamba2-1.2B-instruct",
]