Refactor Transformers backend to use mixins (#26906)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-16 22:50:39 +01:00
committed by GitHub
parent b2f78cbad4
commit fb5e10d3fb
17 changed files with 1510 additions and 1248 deletions

View File

@@ -211,11 +211,7 @@ def test_embed_loading(vllm_runner, model):
def test_pooling(hf_runner, vllm_runner, example_prompts, arch):
model = get_model(arch)
vllm_kwargs = dict(
max_model_len=None,
model_impl="transformers",
compilation_config=dict(cudagraph_capture_sizes=[8]),
)
vllm_kwargs = dict(max_model_len=None, model_impl="transformers")
hf_kwargs = dict()
if arch == "TransformersEmbeddingModel":