[Core] Optimize SPMD architecture with delta + serialization optimization (#7109)
This commit is contained in:
@@ -1,6 +1,9 @@
|
||||
from array import array
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.sequence import (CompletionSequenceGroupOutput, SamplerOutput,
|
||||
from vllm.sequence import (VLLM_TOKEN_ID_ARRAY_TYPE,
|
||||
CompletionSequenceGroupOutput, SamplerOutput,
|
||||
SequenceData, SequenceOutput)
|
||||
|
||||
from .core.utils import create_dummy_prompt
|
||||
@@ -54,7 +57,7 @@ def test_sampler_output_eq(sample_outputs):
|
||||
|
||||
|
||||
def test_sequence_data_prefill():
|
||||
seq_data = SequenceData(prompt_token_ids=[1, 2, 3, 4])
|
||||
seq_data = SequenceData(array(VLLM_TOKEN_ID_ARRAY_TYPE, [1, 2, 3, 4]))
|
||||
assert seq_data.get_num_uncomputed_tokens() == 4
|
||||
assert seq_data.get_num_computed_tokens() == 0
|
||||
# advance by 2
|
||||
|
||||
Reference in New Issue
Block a user