[Performance] Optimize e2e overheads: Reduce python allocations (#7162)
This commit is contained in:
committed by
GitHub
parent
73388c07a4
commit
e02ac55617
@@ -1,10 +1,12 @@
|
||||
from vllm.model_executor.parameter import (BasevLLMParameter,
|
||||
PackedvLLMParameter)
|
||||
from vllm.model_executor.sampling_metadata import SamplingMetadata
|
||||
from vllm.model_executor.sampling_metadata import (SamplingMetadata,
|
||||
SamplingMetadataCache)
|
||||
from vllm.model_executor.utils import set_random_seed
|
||||
|
||||
__all__ = [
|
||||
"SamplingMetadata",
|
||||
"SamplingMetadataCache",
|
||||
"set_random_seed",
|
||||
"BasevLLMParameter",
|
||||
"PackedvLLMParameter",
|
||||
|
||||
Reference in New Issue
Block a user