[BugFix] Fix use of per-request seed with pipeline parallel (#6698)
This commit is contained in:
@@ -411,14 +411,6 @@ class Sequence:
|
||||
f"num_blocks={self.n_blocks}, ")
|
||||
|
||||
|
||||
@dataclass
|
||||
class SequenceGroupState:
|
||||
"""Mutable state tied to a specific sequence group"""
|
||||
|
||||
# torch.Generator used in seeded sampling
|
||||
generator: Optional = None # type: ignore
|
||||
|
||||
|
||||
class SequenceGroup:
|
||||
"""A group of sequences that are generated from the same prompt.
|
||||
|
||||
@@ -461,7 +453,6 @@ class SequenceGroup:
|
||||
time_in_queue=None)
|
||||
self.lora_request = lora_request
|
||||
self.prompt_logprobs: Optional[PromptLogprobs] = None
|
||||
self.state = SequenceGroupState()
|
||||
self.embeddings = embeddings
|
||||
self.pooling_params = pooling_params
|
||||
self.prompt_adapter_request = prompt_adapter_request
|
||||
@@ -648,7 +639,6 @@ class SequenceGroupMetadata:
|
||||
lora_request: LoRA request.
|
||||
computed_block_nums: The block numbers that are already computed,
|
||||
used in prefix caching.
|
||||
state: Internal state tied to this sequence group.
|
||||
multi_modal_data: Multi modal data.
|
||||
encoder_seq_data: Optional sequence data for encoder prompt
|
||||
(SequenceGroup.encoder_seq). Should be None
|
||||
@@ -674,7 +664,6 @@ class SequenceGroupMetadata:
|
||||
token_chunk_size: Optional[int] = None,
|
||||
lora_request: Optional[LoRARequest] = None,
|
||||
computed_block_nums: Optional[List[int]] = None,
|
||||
state: Optional[SequenceGroupState] = None,
|
||||
multi_modal_data: Optional["MultiModalDataDict"] = None,
|
||||
encoder_seq_data: Optional[SequenceData] = None,
|
||||
cross_block_table: Optional[List[int]] = None,
|
||||
@@ -690,7 +679,6 @@ class SequenceGroupMetadata:
|
||||
self.prompt_adapter_request = prompt_adapter_request
|
||||
self.computed_block_nums = computed_block_nums
|
||||
self.multi_modal_data = multi_modal_data
|
||||
self.state = SequenceGroupState() if state is None else state
|
||||
self.encoder_seq_data = encoder_seq_data
|
||||
self.cross_block_table = cross_block_table
|
||||
self._token_chunk_size = token_chunk_size
|
||||
|
||||
Reference in New Issue
Block a user