[Core] Interface for accessing model from VllmRunner (#10353)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -5,7 +5,6 @@ import pytest
|
||||
import torch
|
||||
from PIL import Image
|
||||
|
||||
from vllm.entrypoints.llm import LLM
|
||||
from vllm.multimodal.image import rescale_image_size
|
||||
from vllm.multimodal.video import rescale_video_size, sample_frames_from_video
|
||||
|
||||
@@ -69,7 +68,7 @@ class Qwen2VLPromptVideoEmbeddingInput(TypedDict):
|
||||
|
||||
def batch_make_image_embeddings(
|
||||
image_batches: List[Union[Image.Image, List[Image.Image]]], processor,
|
||||
llm: LLM) -> List[Qwen2VLPromptImageEmbeddingInput]:
|
||||
llm: VllmRunner) -> List[Qwen2VLPromptImageEmbeddingInput]:
|
||||
"""batched image embeddings for Qwen2-VL
|
||||
|
||||
This will infer all images' embeddings in a single batch,
|
||||
@@ -106,16 +105,18 @@ def batch_make_image_embeddings(
|
||||
image_grid_thw = preprocess_result["image_grid_thw"]
|
||||
|
||||
# pixel values to embeddings & grid_thws
|
||||
with torch.no_grad():
|
||||
visual = llm.llm_engine.model_executor.driver_worker. \
|
||||
model_runner.model.visual
|
||||
def get_image_embeds(model):
|
||||
with torch.no_grad():
|
||||
visual = model.visual
|
||||
|
||||
pixel_values_on_device = pixel_values.to(visual.device,
|
||||
dtype=visual.dtype)
|
||||
image_grid_thw_on_device = image_grid_thw.to(visual.device,
|
||||
dtype=torch.int64)
|
||||
image_embeds = visual(pixel_values_on_device,
|
||||
grid_thw=image_grid_thw_on_device)
|
||||
pixel_values_on_device = pixel_values.to(visual.device,
|
||||
dtype=visual.dtype)
|
||||
image_grid_thw_on_device = image_grid_thw.to(visual.device,
|
||||
dtype=torch.int64)
|
||||
return visual(pixel_values_on_device,
|
||||
grid_thw=image_grid_thw_on_device)
|
||||
|
||||
image_embeds = torch.concat(llm.apply_model(get_image_embeds))
|
||||
|
||||
# split into original batches
|
||||
result: List[Qwen2VLPromptImageEmbeddingInput] = []
|
||||
@@ -150,7 +151,7 @@ def batch_make_image_embeddings(
|
||||
|
||||
def batch_make_video_embeddings(
|
||||
video_batches: PromptVideoInput, processor,
|
||||
llm: LLM) -> List[Qwen2VLPromptVideoEmbeddingInput]:
|
||||
llm: VllmRunner) -> List[Qwen2VLPromptVideoEmbeddingInput]:
|
||||
"""batched video embeddings for Qwen2-VL
|
||||
|
||||
A NDArray represents a single video's all frames.
|
||||
@@ -187,16 +188,18 @@ def batch_make_video_embeddings(
|
||||
video_grid_thw = preprocess_result["video_grid_thw"]
|
||||
|
||||
# pixel values to embeddings & grid_thws
|
||||
with torch.no_grad():
|
||||
visual = llm.llm_engine.model_executor.driver_worker.\
|
||||
model_runner.model.visual
|
||||
def get_image_embeds(model):
|
||||
with torch.no_grad():
|
||||
visual = model.visual
|
||||
|
||||
pixel_values_on_device = pixel_values.to(visual.device,
|
||||
dtype=visual.dtype)
|
||||
video_grid_thw_on_device = video_grid_thw.to(visual.device,
|
||||
dtype=torch.int64)
|
||||
video_embeds = visual(pixel_values_on_device,
|
||||
grid_thw=video_grid_thw_on_device)
|
||||
pixel_values_on_device = pixel_values.to(visual.device,
|
||||
dtype=visual.dtype)
|
||||
video_grid_thw_on_device = video_grid_thw.to(visual.device,
|
||||
dtype=torch.int64)
|
||||
return visual(pixel_values_on_device,
|
||||
grid_thw=video_grid_thw_on_device)
|
||||
|
||||
video_embeds = torch.concat(llm.apply_model(get_image_embeds))
|
||||
|
||||
# split into original batches
|
||||
result: List[Qwen2VLPromptVideoEmbeddingInput] = []
|
||||
@@ -278,9 +281,9 @@ def run_embedding_input_test(
|
||||
max_tokens,
|
||||
num_logprobs=num_logprobs,
|
||||
images=batch_make_image_embeddings(
|
||||
images, processor, vllm_model.model) if images else None,
|
||||
images, processor, vllm_model) if images else None,
|
||||
videos=batch_make_video_embeddings(
|
||||
videos, processor, vllm_model.model) if videos else None)
|
||||
videos, processor, vllm_model) if videos else None)
|
||||
for prompts, images, videos in inputs
|
||||
]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user