[Model]: add some tests for aria model (#10770)
Signed-off-by: xffxff <1247714429@qq.com> Signed-off-by: Isotr0py <2037008807@qq.com> Co-authored-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
@@ -29,6 +29,8 @@ def run_test(
|
||||
postprocess_inputs: Callable[[BatchEncoding], BatchEncoding],
|
||||
comparator: Callable[..., None],
|
||||
get_stop_token_ids: Optional[Callable[[AutoTokenizer], List[int]]],
|
||||
stop_str: Optional[List[str]],
|
||||
tokenizer_mode: str,
|
||||
limit_mm_per_prompt: Dict[str, int],
|
||||
model_kwargs: Optional[Dict[str, Any]],
|
||||
patch_hf_runner: Optional[Callable[[HfRunner], HfRunner]],
|
||||
@@ -50,11 +52,14 @@ def run_test(
|
||||
# vLLM needs a fresh new process without cuda initialization.
|
||||
# if we run HF first, the cuda initialization will be done and it
|
||||
# will hurt multiprocessing backend with fork method (the default method).
|
||||
vllm_kwargs = {}
|
||||
vllm_kwargs: Dict[str, Any] = {}
|
||||
if get_stop_token_ids is not None:
|
||||
vllm_kwargs["stop_token_ids"] = get_stop_token_ids(tokenizer)
|
||||
if stop_str:
|
||||
vllm_kwargs["stop"] = stop_str
|
||||
|
||||
with vllm_runner(model,
|
||||
tokenizer_mode=tokenizer_mode,
|
||||
max_model_len=max_model_len,
|
||||
max_num_seqs=max_num_seqs,
|
||||
dtype=dtype,
|
||||
@@ -85,6 +90,8 @@ def run_test(
|
||||
hf_kwargs = {}
|
||||
if use_tokenizer_eos:
|
||||
hf_kwargs["eos_token_id"] = tokenizer.eos_token_id
|
||||
if stop_str:
|
||||
hf_kwargs["stop_strings"] = stop_str
|
||||
|
||||
with hf_model, torch.no_grad():
|
||||
for prompts, media in inputs:
|
||||
@@ -138,4 +145,4 @@ def process_runner_outputs(
|
||||
def process_outputs(output_processor, model, outputs_per_image):
|
||||
"""Applies a model specific post-processor function to a runner's output"""
|
||||
return [[output_processor(res, model) for res in outputs]
|
||||
for outputs in outputs_per_image]
|
||||
for outputs in outputs_per_image]
|
||||
@@ -97,6 +97,9 @@ class VLMTestInfo(NamedTuple):
|
||||
|
||||
# Optional callable which gets a list of token IDs from the model tokenizer
|
||||
get_stop_token_ids: Optional[Callable[[AutoTokenizer], List[int]]] = None
|
||||
# Optional list of strings to stop generation, useful when stop tokens are
|
||||
# not special tokens in the tokenizer
|
||||
stop_str: Optional[List[str]] = None
|
||||
|
||||
# Exposed options for HF runner
|
||||
model_kwargs: Optional[Dict[str, Any]] = None
|
||||
@@ -148,6 +151,8 @@ class VLMTestInfo(NamedTuple):
|
||||
|
||||
marks: Optional[List[MarkDecorator]] = None
|
||||
|
||||
tokenizer_mode: str = "auto"
|
||||
|
||||
def get_non_parametrized_runner_kwargs(self):
|
||||
"""Returns a dictionary of expandable kwargs for items that are used
|
||||
in all test types, which are NOT used when creating the parametrized
|
||||
@@ -166,8 +171,10 @@ class VLMTestInfo(NamedTuple):
|
||||
"postprocess_inputs": self.postprocess_inputs,
|
||||
"comparator": self.comparator,
|
||||
"get_stop_token_ids": self.get_stop_token_ids,
|
||||
"stop_str": self.stop_str,
|
||||
"model_kwargs": self.model_kwargs,
|
||||
"patch_hf_runner": self.patch_hf_runner,
|
||||
"tokenizer_mode": self.tokenizer_mode
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user