Fix early CUDA init via get_architecture_class_name import (#3770)
Signed-off-by: Lei Wen <wenlei03@qiyi.com> Co-authored-by: Lei Wen <wenlei03@qiyi.com>
This commit is contained in:
@@ -13,7 +13,6 @@ from vllm.engine.ray_utils import initialize_ray_cluster
|
|||||||
from vllm.executor.executor_base import ExecutorBase
|
from vllm.executor.executor_base import ExecutorBase
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.lora.request import LoRARequest
|
from vllm.lora.request import LoRARequest
|
||||||
from vllm.model_executor.model_loader import get_architecture_class_name
|
|
||||||
from vllm.outputs import RequestOutput
|
from vllm.outputs import RequestOutput
|
||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
from vllm.sequence import (MultiModalData, SamplerOutput, Sequence,
|
from vllm.sequence import (MultiModalData, SamplerOutput, Sequence,
|
||||||
@@ -115,6 +114,8 @@ class LLMEngine:
|
|||||||
|
|
||||||
# If usage stat is enabled, collect relevant info.
|
# If usage stat is enabled, collect relevant info.
|
||||||
if is_usage_stats_enabled():
|
if is_usage_stats_enabled():
|
||||||
|
from vllm.model_executor.model_loader import (
|
||||||
|
get_architecture_class_name)
|
||||||
usage_message.report_usage(
|
usage_message.report_usage(
|
||||||
get_architecture_class_name(model_config),
|
get_architecture_class_name(model_config),
|
||||||
usage_context,
|
usage_context,
|
||||||
|
|||||||
Reference in New Issue
Block a user