[Frontend] Re-enable running MaxSim on GPU (#38620)

Signed-off-by: wang.yuqi <yuqi.wang@daocloud.io>
This commit is contained in:
wang.yuqi
2026-04-03 00:03:13 +08:00
committed by GitHub
parent d9408ffba3
commit a9b4f07ba2
12 changed files with 207 additions and 54 deletions

View File

@@ -4,12 +4,12 @@
import pytest
import torch
from vllm.entrypoints.pooling.scoring.utils import compute_maxsim_score
from vllm.pooling_params import LateInteractionParams, PoolingParams
from vllm.v1.pool.late_interaction import (
LATE_INTERACTION_MODE_CACHE_QUERY,
build_late_interaction_doc_params,
build_late_interaction_query_params,
compute_maxsim_score,
)
from vllm.v1.worker.gpu.pool.late_interaction_runner import LateInteractionRunner