[V1] v1 engine + full CUDA graph support for PLaMo2 (#23998)

Signed-off-by: Hemmi Shinichi <shemmi@preferred.jp> Signed-off-by: nopperl <54780682+nopperl@users.noreply.github.com> Co-authored-by: Hemmi Shinichi <shemmi@preferred.jp> Co-authored-by: Thomas Parnell <tom.parnell@gmail.com>
2025-09-04 00:24:02 +09:00
parent 6d80ae83e1
commit fa4311d85f
6 changed files with 349 additions and 125 deletions
--- a/tests/models/language/generation/test_hybrid.py
+++ b/tests/models/language/generation/test_hybrid.py
@@ -25,8 +25,7 @@ SSM_MODELS = [

 HYBRID_MODELS = [
    "ai21labs/Jamba-tiny-dev",
-    # skipping until vLLM implementation issues are resolved
-    # "pfnet/plamo-2-1b",
+    "pfnet/plamo-2-1b",
    "Zyphra/Zamba2-1.2B-instruct",
    "hmellor/tiny-random-BambaForCausalLM",
    "ibm-granite/granite-4.0-tiny-preview",
@@ -37,6 +36,7 @@ HYBRID_MODELS = [
 V1_SUPPORTED_MODELS = [
    "state-spaces/mamba-130m-hf",
    "ai21labs/Jamba-tiny-dev",
+    "pfnet/plamo-2-1b",
    "yujiepan/mamba2-codestral-v0.1-tiny-random",
    "Zyphra/Zamba2-1.2B-instruct",
    "hmellor/tiny-random-BambaForCausalLM",
@@ -47,6 +47,7 @@ V1_SUPPORTED_MODELS = [

 FULL_CUDA_GRAPH_MODELS = [
    "ai21labs/Jamba-tiny-dev",
+    "pfnet/plamo-2-1b",
    "Zyphra/Zamba2-1.2B-instruct",
 ]