Refactor Transformers backend to use mixins (#26906)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-16 22:50:39 +01:00
parent b2f78cbad4
commit fb5e10d3fb
17 changed files with 1510 additions and 1248 deletions
--- a/tests/models/test_transformers.py
+++ b/tests/models/test_transformers.py
@@ -211,11 +211,7 @@ def test_embed_loading(vllm_runner, model):
 def test_pooling(hf_runner, vllm_runner, example_prompts, arch):
    model = get_model(arch)

-    vllm_kwargs = dict(
-        max_model_len=None,
-        model_impl="transformers",
-        compilation_config=dict(cudagraph_capture_sizes=[8]),
-    )
+    vllm_kwargs = dict(max_model_len=None, model_impl="transformers")

    hf_kwargs = dict()
    if arch == "TransformersEmbeddingModel":