[Core] Pipeline Parallel Support (#4412)

Signed-off-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai>
2024-07-02 10:58:08 -07:00
parent 15aba081f3
commit c5832d2ae9
82 changed files with 1096 additions and 400 deletions
--- a/vllm/model_executor/models/phi3v.py
+++ b/vllm/model_executor/models/phi3v.py
@@ -35,7 +35,7 @@ from vllm.model_executor.models.clip import CLIPVisionModel
 from vllm.model_executor.models.llama import LlamaModel
 from vllm.model_executor.sampling_metadata import SamplingMetadata
 from vllm.multimodal import MULTIMODAL_REGISTRY
-from vllm.sequence import SamplerOutput
+from vllm.sequence import IntermediateTensors, SamplerOutput

 from .clip import dummy_image_for_clip, dummy_seq_data_for_clip
 from .interfaces import SupportsVision
@@ -381,9 +381,13 @@ class Phi3VForCausalLM(nn.Module, SupportsVision):

        return None

-    def forward(self, input_ids: torch.Tensor, positions: torch.Tensor,
+    def forward(self,
+                input_ids: torch.Tensor,
+                positions: torch.Tensor,
                kv_caches: List[torch.Tensor],
-                attn_metadata: AttentionMetadata, **kwargs: object):
+                attn_metadata: AttentionMetadata,
+                intermediate_tensors: Optional[IntermediateTensors] = None,
+                **kwargs: object):
        image_input = self._parse_and_validate_image_input(**kwargs)

        if image_input is not None:
@@ -398,6 +402,7 @@ class Phi3VForCausalLM(nn.Module, SupportsVision):
                                   positions,
                                   kv_caches,
                                   attn_metadata,
+                                   intermediate_tensors,
                                   inputs_embeds=inputs_embeds)

        return hidden_states