[Core] Optimize SPMD architecture with delta + serialization optimization (#7109)
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import functools
|
||||
from array import array
|
||||
from collections import UserDict
|
||||
from dataclasses import dataclass
|
||||
from typing import (TYPE_CHECKING, Callable, Dict, Mapping, Optional, Protocol,
|
||||
@@ -21,6 +22,10 @@ logger = init_logger(__name__)
|
||||
|
||||
C = TypeVar("C", bound=PretrainedConfig, default=PretrainedConfig)
|
||||
|
||||
# NOTE: This has to match with sequence.py's VLLM_TOKEN_ID_ARRAY_TYPE.
|
||||
# We cannot import it here because of circular dependencies.
|
||||
VLLM_TOKEN_ID_ARRAY_TYPE = "l"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class InputContext:
|
||||
@@ -118,7 +123,8 @@ class InputRegistry:
|
||||
# Avoid circular import
|
||||
from vllm.sequence import SequenceData
|
||||
|
||||
dummy_seq_data = SequenceData([0] * seq_len)
|
||||
dummy_seq_data = SequenceData(
|
||||
array(VLLM_TOKEN_ID_ARRAY_TYPE, [0]) * seq_len)
|
||||
dummy_multi_modal_data = None
|
||||
|
||||
return dummy_seq_data, dummy_multi_modal_data
|
||||
|
||||
Reference in New Issue
Block a user