"""Original test, the LoRA model has the common target modules, not all"""
iftorch.cuda.device_count()<tp_size:
pytest.skip(f"Not enough GPUs for tensor parallelism {tp_size}")
prompts=["Write a story about a sheep and a goat."]
llm=vllm.LLM(
MODEL_PATH,
enable_lora=True,
max_num_seqs=16,
max_loras=4,
distributed_executor_backend="ray",
tensor_parallel_size=tp_size,
)
expected_jamba_output=[
"""Once upon a time, in a lush green meadow, there lived a sheep named Clara and a goat named Billy. Clara was a gentle creature, always nibbling on the soft grass and humming"""# noqa: E501