[V1] Multiprocessing Tensor Parallel Support for v1 (#9856)
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
committed by
GitHub
parent
bc192a2b09
commit
28b3a1c7e5
@@ -5,6 +5,7 @@ from typing import Optional
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.distributed import (tensor_model_parallel_all_gather,
|
||||
tensor_model_parallel_gather)
|
||||
from vllm.model_executor.layers.vocab_parallel_embedding import (
|
||||
@@ -42,7 +43,9 @@ class LogitsProcessor(nn.Module):
|
||||
# Soft cap the logits. Used in Gemma 2.
|
||||
self.soft_cap = soft_cap
|
||||
# Whether to use gather or all-gather to gather the logits.
|
||||
self.use_gather = not current_platform.is_tpu()
|
||||
|
||||
self.use_gather = not current_platform.is_tpu(
|
||||
) and not envs.VLLM_USE_V1
|
||||
|
||||
def forward(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user