[Core] Optimize SPMD architecture with delta + serialization optimization (#7109)

This commit is contained in:
SangBin Cho
2024-08-18 17:57:20 -07:00
committed by GitHub
parent 200a2ffa6b
commit ff7ec82c4d
36 changed files with 722 additions and 346 deletions

View File

@@ -1,6 +1,9 @@
from array import array
import pytest
from vllm.sequence import (CompletionSequenceGroupOutput, SamplerOutput,
from vllm.sequence import (VLLM_TOKEN_ID_ARRAY_TYPE,
CompletionSequenceGroupOutput, SamplerOutput,
SequenceData, SequenceOutput)
from .core.utils import create_dummy_prompt
@@ -54,7 +57,7 @@ def test_sampler_output_eq(sample_outputs):
def test_sequence_data_prefill():
seq_data = SequenceData(prompt_token_ids=[1, 2, 3, 4])
seq_data = SequenceData(array(VLLM_TOKEN_ID_ARRAY_TYPE, [1, 2, 3, 4]))
assert seq_data.get_num_uncomputed_tokens() == 4
assert seq_data.get_num_computed_tokens() == 0
# advance by 2