[Chore] Remove Sampler from Model Code (#17084)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2025-04-24 02:49:33 -07:00
parent 2bc0f72ae5
commit b411418ff0
103 changed files with 48 additions and 1099 deletions
--- a/vllm/model_executor/models/skyworkr1v.py
+++ b/vllm/model_executor/models/skyworkr1v.py
@@ -8,7 +8,6 @@
 # --------------------------------------------------------
 from abc import ABC, abstractmethod
 from collections.abc import Iterable, Mapping, Sequence
-from functools import cached_property
 from typing import Literal, Optional, Set, Tuple, TypedDict, TypeVar, Union

 import torch
@@ -21,7 +20,6 @@ from vllm.config import VllmConfig
 from vllm.model_executor.layers.linear import ReplicatedLinear
 from vllm.model_executor.layers.quantization import QuantizationConfig
 from vllm.model_executor.layers.quantization.awq import AWQConfig
-from vllm.model_executor.layers.sampler import SamplerOutput, get_sampler
 from vllm.model_executor.models.intern_vit import (InternVisionModel,
                                                   InternVisionPatchModel)
 from vllm.model_executor.sampling_metadata import SamplingMetadata
@@ -699,13 +697,6 @@ class SkyworkR1VChatModel(nn.Module, SupportsMultiModal, SupportsPP):
                (llm_quant_config is not None):
                quant_config.modules_to_not_convert.append("vision_model")

-    @cached_property
-    def sampler(self):
-        if hasattr(self.language_model, "sampler"):
-            return self.language_model.sampler
-
-        return get_sampler()
-
    def _init_vision_model(
        self,
        config: PretrainedConfig,
@@ -908,7 +899,7 @@ class SkyworkR1VChatModel(nn.Module, SupportsMultiModal, SupportsPP):
        intermediate_tensors: Optional[IntermediateTensors] = None,
        inputs_embeds: Optional[torch.Tensor] = None,
        **kwargs: object,
-    ) -> Union[SamplerOutput, IntermediateTensors]:
+    ) -> IntermediateTensors:

        if intermediate_tensors is not None:
            input_ids = None
@@ -946,13 +937,6 @@ class SkyworkR1VChatModel(nn.Module, SupportsMultiModal, SupportsPP):
        return self.language_model.compute_logits(hidden_states,
                                                  sampling_metadata)

-    def sample(
-        self,
-        logits: torch.Tensor,
-        sampling_metadata: SamplingMetadata,
-    ) -> Optional[SamplerOutput]:
-        return self.language_model.sample(logits, sampling_metadata)
-
    def load_weights(self, weights: Iterable[Tuple[str,
                                                   torch.Tensor]]) -> Set[str]:
        skip_prefixes = [