Inclusion of InternVLChatModel In PP_SUPPORTED_MODELS(Pipeline Parallelism) (#7860)
This commit is contained in:
committed by
GitHub
parent
288a938872
commit
8685ba1a1e
@@ -12,6 +12,7 @@ from vllm.model_executor.layers.quantization import QuantizationConfig
|
||||
from vllm.model_executor.model_loader.loader import build_model
|
||||
from vllm.model_executor.models import ModelRegistry
|
||||
from vllm.multimodal.base import NestedTensors
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.utils import is_pin_memory_available
|
||||
|
||||
|
||||
@@ -279,3 +280,18 @@ def is_pp_missing_parameter(name: str, model: torch.nn.Module) -> bool:
|
||||
if name.startswith(missing_layer_name):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def make_empty_intermediate_tensors_factory(keys: List[str], hidden_size: int):
|
||||
|
||||
def make_empty_intermediate_tensors(
|
||||
batch_size: int, dtype: torch.dtype,
|
||||
device: torch.device) -> IntermediateTensors:
|
||||
return IntermediateTensors({
|
||||
key: torch.zeros((batch_size, hidden_size),
|
||||
dtype=dtype,
|
||||
device=device)
|
||||
for key in keys
|
||||
})
|
||||
|
||||
return make_empty_intermediate_tensors
|
||||
|
||||
Reference in New Issue
Block a user