[V1] v1 engine + full CUDA graph support for PLaMo2 (#23998)
Signed-off-by: Hemmi Shinichi <shemmi@preferred.jp> Signed-off-by: nopperl <54780682+nopperl@users.noreply.github.com> Co-authored-by: Hemmi Shinichi <shemmi@preferred.jp> Co-authored-by: Thomas Parnell <tom.parnell@gmail.com>
This commit is contained in:
@@ -25,8 +25,7 @@ SSM_MODELS = [
|
||||
|
||||
HYBRID_MODELS = [
|
||||
"ai21labs/Jamba-tiny-dev",
|
||||
# skipping until vLLM implementation issues are resolved
|
||||
# "pfnet/plamo-2-1b",
|
||||
"pfnet/plamo-2-1b",
|
||||
"Zyphra/Zamba2-1.2B-instruct",
|
||||
"hmellor/tiny-random-BambaForCausalLM",
|
||||
"ibm-granite/granite-4.0-tiny-preview",
|
||||
@@ -37,6 +36,7 @@ HYBRID_MODELS = [
|
||||
V1_SUPPORTED_MODELS = [
|
||||
"state-spaces/mamba-130m-hf",
|
||||
"ai21labs/Jamba-tiny-dev",
|
||||
"pfnet/plamo-2-1b",
|
||||
"yujiepan/mamba2-codestral-v0.1-tiny-random",
|
||||
"Zyphra/Zamba2-1.2B-instruct",
|
||||
"hmellor/tiny-random-BambaForCausalLM",
|
||||
@@ -47,6 +47,7 @@ V1_SUPPORTED_MODELS = [
|
||||
|
||||
FULL_CUDA_GRAPH_MODELS = [
|
||||
"ai21labs/Jamba-tiny-dev",
|
||||
"pfnet/plamo-2-1b",
|
||||
"Zyphra/Zamba2-1.2B-instruct",
|
||||
]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user