Refactor Transformers backend to use mixins (#26906)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -211,11 +211,7 @@ def test_embed_loading(vllm_runner, model):
|
||||
def test_pooling(hf_runner, vllm_runner, example_prompts, arch):
|
||||
model = get_model(arch)
|
||||
|
||||
vllm_kwargs = dict(
|
||||
max_model_len=None,
|
||||
model_impl="transformers",
|
||||
compilation_config=dict(cudagraph_capture_sizes=[8]),
|
||||
)
|
||||
vllm_kwargs = dict(max_model_len=None, model_impl="transformers")
|
||||
|
||||
hf_kwargs = dict()
|
||||
if arch == "TransformersEmbeddingModel":
|
||||
|
||||
Reference in New Issue
Block a user