[V0 Deprecation] Remove V0 sampling metadata (#25345)
Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai>
This commit is contained in:
@@ -54,7 +54,6 @@ from vllm.model_executor.models.mamba_cache import (MambaCacheManager,
|
||||
from vllm.model_executor.models.utils import (
|
||||
AutoWeightsLoader, WeightsMapper, make_empty_intermediate_tensors_factory,
|
||||
make_layers, maybe_prefix)
|
||||
from vllm.model_executor.sampling_metadata import SamplingMetadata
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.transformers_utils.configs import NemotronHConfig
|
||||
from vllm.utils import LayerBlockType
|
||||
@@ -622,10 +621,8 @@ class NemotronHForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP,
|
||||
def compute_logits(
|
||||
self,
|
||||
hidden_states: torch.Tensor,
|
||||
sampling_metadata: SamplingMetadata,
|
||||
) -> Optional[torch.Tensor]:
|
||||
logits = self.logits_processor(self.lm_head, hidden_states,
|
||||
sampling_metadata)
|
||||
logits = self.logits_processor(self.lm_head, hidden_states)
|
||||
return logits
|
||||
|
||||
def load_weights(self, weights: Iterable[tuple[str,
|
||||
|
||||
Reference in New Issue
Block a user