[V1][Quantization] Add CUDA graph compatible v1 GGUF support (#18646)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
@@ -1291,14 +1291,6 @@ class EngineArgs:
|
||||
recommend_to_remove=False)
|
||||
return False
|
||||
|
||||
# Some quantization is not compatible with torch.compile.
|
||||
V1_UNSUPPORTED_QUANT = ["gguf"]
|
||||
if model_config.quantization in V1_UNSUPPORTED_QUANT:
|
||||
_raise_or_fallback(
|
||||
feature_name=f"--quantization {model_config.quantization}",
|
||||
recommend_to_remove=False)
|
||||
return False
|
||||
|
||||
# No Embedding Models so far.
|
||||
if model_config.task not in ["generate"]:
|
||||
_raise_or_fallback(feature_name=f"--task {model_config.task}",
|
||||
|
||||
Reference in New Issue
Block a user