[Core][Performance] Add XGrammar support for guided decoding and set it as default (#10785)

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
Signed-off-by: mgoin <michael@neuralmagic.com>
Co-authored-by: mgoin <michael@neuralmagic.com>
This commit is contained in:
Aaron Pham
2024-12-03 02:17:00 -05:00
committed by GitHub
parent 3257d449fa
commit 9323a3153b
11 changed files with 385 additions and 33 deletions

View File

@@ -168,7 +168,7 @@ class EngineArgs:
scheduler_delay_factor: float = 0.0
enable_chunked_prefill: Optional[bool] = None
guided_decoding_backend: str = 'outlines'
guided_decoding_backend: str = 'xgrammar'
# Speculative decoding configuration.
speculative_model: Optional[str] = None
speculative_model_quantization: Optional[str] = None
@@ -364,11 +364,12 @@ class EngineArgs:
parser.add_argument(
'--guided-decoding-backend',
type=str,
default='outlines',
choices=['outlines', 'lm-format-enforcer'],
default='xgrammar',
choices=['outlines', 'lm-format-enforcer', 'xgrammar'],
help='Which engine will be used for guided decoding'
' (JSON schema / regex etc) by default. Currently support '
'https://github.com/outlines-dev/outlines and '
'https://github.com/outlines-dev/outlines,'
'https://github.com/mlc-ai/xgrammar, and '
'https://github.com/noamgat/lm-format-enforcer.'
' Can be overridden per request via guided_decoding_backend'
' parameter.')