[Performance] Optimize e2e overheads: Reduce python allocations (#7162)

This commit is contained in:
Alexander Matveev
2024-08-09 00:34:28 -04:00
committed by GitHub
parent 73388c07a4
commit e02ac55617
11 changed files with 549 additions and 124 deletions

View File

@@ -1,10 +1,12 @@
from vllm.model_executor.parameter import (BasevLLMParameter,
PackedvLLMParameter)
from vllm.model_executor.sampling_metadata import SamplingMetadata
from vllm.model_executor.sampling_metadata import (SamplingMetadata,
SamplingMetadataCache)
from vllm.model_executor.utils import set_random_seed
__all__ = [
"SamplingMetadata",
"SamplingMetadataCache",
"set_random_seed",
"BasevLLMParameter",
"PackedvLLMParameter",