[Platform] Move async output check to platform (#10768)

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2024-12-10 01:24:46 +08:00
committed by GitHub
parent e691b26f6f
commit aea2fc38c3
10 changed files with 66 additions and 22 deletions

View File

@@ -513,11 +513,10 @@ class ModelConfig:
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
# If the feature combo become valid
if device_config.device_type not in ("cuda", "tpu", "xpu", "hpu"):
if not current_platform.is_async_output_supported(self.enforce_eager):
logger.warning(
"Async output processing is only supported for CUDA, TPU, XPU "
"and HPU."
"Disabling it for other platforms.")
"Async output processing is not supported on the "
"current platform type %s.", current_platform.device_type)
self.use_async_output_proc = False
return
@@ -527,16 +526,6 @@ class ModelConfig:
self.use_async_output_proc = False
return
# Reminder: Please update docs/source/usage/compatibility_matrix.rst
# If the feature combo become valid
if device_config.device_type == "cuda" and self.enforce_eager:
logger.warning(
"To see benefits of async output processing, enable CUDA "
"graph. Since, enforce-eager is enabled, async output "
"processor cannot be used")
self.use_async_output_proc = not self.enforce_eager
return
# Async postprocessor is not necessary with embedding mode
# since there is no token generation
if self.task == "embedding":