44 lines
1.4 KiB
Python
44 lines
1.4 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
import pytest
|
|
|
|
from ...utils import dummy_hf_overrides
|
|
|
|
MODELS = ["xai-org/grok-2"]
|
|
|
|
|
|
def _grok2_dummy_overrides(hf_config):
|
|
hf_config = dummy_hf_overrides(hf_config, model_arch="Grok1ForCausalLM")
|
|
text_config = hf_config.get_text_config()
|
|
text_config.update(
|
|
{
|
|
"hidden_size": 256,
|
|
"intermediate_size": 512,
|
|
"moe_intermediate_size": 256,
|
|
"num_attention_heads": 4,
|
|
"num_key_value_heads": 2,
|
|
"head_dim": 64,
|
|
}
|
|
)
|
|
return hf_config
|
|
|
|
|
|
@pytest.mark.parametrize("model", MODELS)
|
|
def test_dummy_generate(vllm_runner, monkeypatch, model: str) -> None:
|
|
with monkeypatch.context() as m:
|
|
m.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1")
|
|
with vllm_runner(
|
|
model,
|
|
load_format="dummy",
|
|
max_model_len=128,
|
|
hf_overrides=_grok2_dummy_overrides,
|
|
enforce_eager=True,
|
|
) as llm:
|
|
prompt = "Hello from Grok-2"
|
|
tokenizer = llm.get_llm().get_tokenizer()
|
|
prompt_len = len(tokenizer.encode(prompt))
|
|
outputs = llm.generate_greedy([prompt], max_tokens=1)
|
|
output_ids, output_str = outputs[0]
|
|
assert len(output_ids) > prompt_len
|
|
assert output_str is not None
|