[Core] Optimize SPMD architecture with delta + serialization optimization (#7109)
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
from array import array
|
||||
from itertools import count
|
||||
from typing import Callable, Dict, List, Optional
|
||||
from typing import Sequence as GenericSequence
|
||||
@@ -9,7 +10,8 @@ import torch
|
||||
from vllm.engine.arg_utils import EngineArgs
|
||||
from vllm.model_executor.utils import set_random_seed
|
||||
from vllm.sampling_params import SamplingParams
|
||||
from vllm.sequence import (CompletionSequenceGroupOutput, Logprob,
|
||||
from vllm.sequence import (VLLM_TOKEN_ID_ARRAY_TYPE,
|
||||
CompletionSequenceGroupOutput, Logprob,
|
||||
SamplerOutput, SequenceData, SequenceGroupMetadata,
|
||||
SequenceOutput)
|
||||
from vllm.utils import get_distributed_init_method, get_ip, get_open_port
|
||||
@@ -138,8 +140,9 @@ def create_seq_group_metadata_from_prompts(
|
||||
seq_data={
|
||||
i:
|
||||
SequenceData(
|
||||
prompt_token_ids=prompt_token_ids[:],
|
||||
output_token_ids=cont_token_ids[:],
|
||||
array(VLLM_TOKEN_ID_ARRAY_TYPE, prompt_token_ids[:]),
|
||||
_output_token_ids=array(VLLM_TOKEN_ID_ARRAY_TYPE,
|
||||
cont_token_ids[:]),
|
||||
),
|
||||
},
|
||||
sampling_params=SamplingParams(temperature=0.0, ),
|
||||
|
||||
Reference in New Issue
Block a user