[Model] Add Mistral Tokenization to improve robustness and chat encoding (#7739)

This commit is contained in:
Patrick von Platen
2024-08-27 14:40:02 +02:00
committed by GitHub
parent 9606c7197d
commit 6fc4e6e07a
12 changed files with 275 additions and 60 deletions

View File

@@ -30,9 +30,11 @@ def test_models(
hf_outputs = hf_model.generate_greedy_logprobs_limit(
example_prompts, max_tokens, num_logprobs)
with vllm_runner(model, dtype=dtype) as vllm_model:
with vllm_runner(model, dtype=dtype,
tokenizer_mode="mistral") as vllm_model:
vllm_outputs = vllm_model.generate_greedy_logprobs(
example_prompts, max_tokens, num_logprobs)
check_logprobs_close(
outputs_0_lst=hf_outputs,
outputs_1_lst=vllm_outputs,