[CI] Disable non-lazy string operation on logging (#4326)
Co-authored-by: Danny Guinther <dguinther@neuralmagic.com>
This commit is contained in:
@@ -96,29 +96,38 @@ class LLMEngine:
|
||||
usage_context: UsageContext = UsageContext.ENGINE_CONTEXT,
|
||||
) -> None:
|
||||
logger.info(
|
||||
f"Initializing an LLM engine (v{vllm.__version__}) with config: "
|
||||
f"model={model_config.model!r}, "
|
||||
f"speculative_config={speculative_config!r}, "
|
||||
f"tokenizer={model_config.tokenizer!r}, "
|
||||
f"skip_tokenizer_init={model_config.skip_tokenizer_init}, "
|
||||
f"tokenizer_mode={model_config.tokenizer_mode}, "
|
||||
f"revision={model_config.revision}, "
|
||||
f"tokenizer_revision={model_config.tokenizer_revision}, "
|
||||
f"trust_remote_code={model_config.trust_remote_code}, "
|
||||
f"dtype={model_config.dtype}, "
|
||||
f"max_seq_len={model_config.max_model_len}, "
|
||||
f"download_dir={load_config.download_dir!r}, "
|
||||
f"load_format={load_config.load_format}, "
|
||||
f"tensor_parallel_size={parallel_config.tensor_parallel_size}, "
|
||||
f"disable_custom_all_reduce="
|
||||
f"{parallel_config.disable_custom_all_reduce}, "
|
||||
f"quantization={model_config.quantization}, "
|
||||
f"enforce_eager={model_config.enforce_eager}, "
|
||||
f"kv_cache_dtype={cache_config.cache_dtype}, "
|
||||
f"quantization_param_path={model_config.quantization_param_path}, "
|
||||
f"device_config={device_config.device}, "
|
||||
f"decoding_config={decoding_config!r}, "
|
||||
f"seed={model_config.seed})")
|
||||
"Initializing an LLM engine (v%s) with config: "
|
||||
"model=%r, speculative_config=%r, tokenizer=%r, "
|
||||
"skip_tokenizer_init=%s, tokenizer_mode=%s, revision=%s, "
|
||||
"tokenizer_revision=%s, trust_remote_code=%s, dtype=%s, "
|
||||
"max_seq_len=%d, download_dir=%r, load_format=%s, "
|
||||
"tensor_parallel_size=%d, disable_custom_all_reduce=%s"
|
||||
"quantization=%s, enforce_eager=%s, kv_cache_dtype=%s, "
|
||||
"quantization_param_path=%s, device_config=%s, "
|
||||
"decoding_config=%r, seed=%d)",
|
||||
vllm.__version__,
|
||||
model_config.model,
|
||||
speculative_config,
|
||||
model_config.tokenizer,
|
||||
model_config.skip_tokenizer_init,
|
||||
model_config.tokenizer_mode,
|
||||
model_config.revision,
|
||||
model_config.tokenizer_revision,
|
||||
model_config.trust_remote_code,
|
||||
model_config.dtype,
|
||||
model_config.max_model_len,
|
||||
load_config.download_dir,
|
||||
load_config.load_format,
|
||||
parallel_config.tensor_parallel_size,
|
||||
parallel_config.disable_custom_all_reduce,
|
||||
model_config.quantization,
|
||||
model_config.enforce_eager,
|
||||
cache_config.cache_dtype,
|
||||
model_config.quantization_param_path,
|
||||
device_config.device,
|
||||
decoding_config,
|
||||
model_config.seed,
|
||||
)
|
||||
# TODO(woosuk): Print more configs in debug mode.
|
||||
|
||||
self.model_config = model_config
|
||||
@@ -237,8 +246,10 @@ class LLMEngine:
|
||||
|
||||
if self.cache_config.num_gpu_blocks_override is not None:
|
||||
num_gpu_blocks_override = self.cache_config.num_gpu_blocks_override
|
||||
logger.info(f"Overriding {num_gpu_blocks=} with "
|
||||
f"{num_gpu_blocks_override=}")
|
||||
logger.info(
|
||||
"Overriding num_gpu_blocks=%d with "
|
||||
"num_gpu_blocks_override=%d", num_gpu_blocks,
|
||||
num_gpu_blocks_override)
|
||||
num_gpu_blocks = num_gpu_blocks_override
|
||||
|
||||
self.cache_config.num_gpu_blocks = num_gpu_blocks
|
||||
|
||||
Reference in New Issue
Block a user