[VLM] Support caching in merged multi-modal processor (#11396)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2024-12-28 01:22:48 +08:00
committed by GitHub
parent 5ce4627a7e
commit 101418096f
20 changed files with 1459 additions and 452 deletions

View File

@@ -201,6 +201,7 @@ VLM_TEST_SETTINGS = {
vllm_output_post_proc=model_utils.fuyu_vllm_to_hf_output,
num_logprobs=10,
image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
marks=[large_gpu_mark(min_gb=48)],
),
"glm4": VLMTestInfo(
models=["THUDM/glm-4v-9b"],
@@ -212,7 +213,7 @@ VLM_TEST_SETTINGS = {
dtype="bfloat16",
get_stop_token_ids=lambda tok: [151329, 151336, 151338],
patch_hf_runner=model_utils.glm_patch_hf_runner,
marks=[large_gpu_mark(min_gb=48)],
marks=[large_gpu_mark(min_gb=32)],
),
"h2ovl": VLMTestInfo(
models = [
@@ -261,6 +262,7 @@ VLM_TEST_SETTINGS = {
dtype="bfloat16",
use_tokenizer_eos=True,
patch_hf_runner=model_utils.internvl_patch_hf_runner,
marks=[large_gpu_mark(min_gb=32)],
),
"llava_next": VLMTestInfo(
models=["llava-hf/llava-v1.6-mistral-7b-hf"],