diff --git a/tests/lora/test_layers.py b/tests/lora/test_layers.py index c9c551143..d3c1f3deb 100644 --- a/tests/lora/test_layers.py +++ b/tests/lora/test_layers.py @@ -469,7 +469,7 @@ def test_lm_head_logits_processor( @torch.inference_mode() -@pytest.mark.parametrize("vocab_size", [512, 32000, 258049, 300000]) +@pytest.mark.parametrize("vocab_size", [258049, 300000]) @pytest.mark.parametrize("device", DEVICES) def test_lm_head_logits_processor_invalid_vocab_size( default_vllm_config, dist_init, vocab_size, device @@ -489,7 +489,7 @@ def test_lm_head_logits_processor_invalid_vocab_size( logits_processor, 1024, torch.float16, device, None ) - with pytest.raises(ValueError, match="vocab size must be > 32000 and <= 258048"): + with pytest.raises(ValueError, match="vocab size must be <= 258048"): lora_logits_processor.create_lora_weights(max_loras, lora_config) diff --git a/vllm/lora/layers/logits_processor.py b/vllm/lora/layers/logits_processor.py index 217c46fbe..237a61eac 100644 --- a/vllm/lora/layers/logits_processor.py +++ b/vllm/lora/layers/logits_processor.py @@ -88,10 +88,8 @@ class LogitsProcessorWithLoRA(BaseLayerWithLoRA): model_config: PretrainedConfig | None = None, ) -> None: # TODO: Verify if this condition can be further relaxed - if self.base_layer.vocab_size <= 32000 or self.base_layer.vocab_size > 258048: - raise ValueError( - "When using LoRA, vocab size must be > 32000 and <= 258048" - ) + if self.base_layer.vocab_size > 258048: + raise ValueError("When using LoRA, vocab size must be <= 258048") self.lora_a_stacked = torch.zeros( ( max_loras,