[Model] Add Mistral Tokenization to improve robustness and chat encoding (#7739)

2024-08-27 14:40:02 +02:00
parent 9606c7197d
commit 6fc4e6e07a
12 changed files with 275 additions and 60 deletions
--- a/tests/models/test_mistral.py
+++ b/tests/models/test_mistral.py
@@ -30,9 +30,11 @@ def test_models(
        hf_outputs = hf_model.generate_greedy_logprobs_limit(
            example_prompts, max_tokens, num_logprobs)

-    with vllm_runner(model, dtype=dtype) as vllm_model:
+    with vllm_runner(model, dtype=dtype,
+                     tokenizer_mode="mistral") as vllm_model:
        vllm_outputs = vllm_model.generate_greedy_logprobs(
            example_prompts, max_tokens, num_logprobs)
+
    check_logprobs_close(
        outputs_0_lst=hf_outputs,
        outputs_1_lst=vllm_outputs,