[Misc] Clean up model registry (#37457)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-03-19 02:24:44 +08:00
committed by GitHub
parent 0ef7f79054
commit f3732bd931

View File

@@ -124,8 +124,8 @@ _TEXT_GENERATION_MODELS = {
"GPTNeoXForCausalLM": ("gpt_neox", "GPTNeoXForCausalLM"), "GPTNeoXForCausalLM": ("gpt_neox", "GPTNeoXForCausalLM"),
"GraniteForCausalLM": ("granite", "GraniteForCausalLM"), "GraniteForCausalLM": ("granite", "GraniteForCausalLM"),
"GraniteMoeForCausalLM": ("granitemoe", "GraniteMoeForCausalLM"), "GraniteMoeForCausalLM": ("granitemoe", "GraniteMoeForCausalLM"),
"GraniteMoeHybridForCausalLM": ("granitemoehybrid", "GraniteMoeHybridForCausalLM"), # noqa: E501 "GraniteMoeHybridForCausalLM": ("granitemoehybrid", "GraniteMoeHybridForCausalLM"),
"GraniteMoeSharedForCausalLM": ("granitemoeshared", "GraniteMoeSharedForCausalLM"), # noqa: E501 "GraniteMoeSharedForCausalLM": ("granitemoeshared", "GraniteMoeSharedForCausalLM"),
"GritLM": ("gritlm", "GritLM"), "GritLM": ("gritlm", "GritLM"),
"Grok1ModelForCausalLM": ("grok1", "GrokForCausalLM"), "Grok1ModelForCausalLM": ("grok1", "GrokForCausalLM"),
"Grok1ForCausalLM": ("grok1", "GrokForCausalLM"), "Grok1ForCausalLM": ("grok1", "GrokForCausalLM"),
@@ -143,7 +143,7 @@ _TEXT_GENERATION_MODELS = {
"JAISLMHeadModel": ("jais", "JAISLMHeadModel"), "JAISLMHeadModel": ("jais", "JAISLMHeadModel"),
"Jais2ForCausalLM": ("jais2", "Jais2ForCausalLM"), "Jais2ForCausalLM": ("jais2", "Jais2ForCausalLM"),
"JambaForCausalLM": ("jamba", "JambaForCausalLM"), "JambaForCausalLM": ("jamba", "JambaForCausalLM"),
"KimiLinearForCausalLM": ("kimi_linear", "KimiLinearForCausalLM"), # noqa: E501 "KimiLinearForCausalLM": ("kimi_linear", "KimiLinearForCausalLM"),
"Lfm2ForCausalLM": ("lfm2", "Lfm2ForCausalLM"), "Lfm2ForCausalLM": ("lfm2", "Lfm2ForCausalLM"),
"Lfm2MoeForCausalLM": ("lfm2_moe", "Lfm2MoeForCausalLM"), "Lfm2MoeForCausalLM": ("lfm2_moe", "Lfm2MoeForCausalLM"),
"LlamaForCausalLM": ("llama", "LlamaForCausalLM"), "LlamaForCausalLM": ("llama", "LlamaForCausalLM"),
@@ -249,17 +249,14 @@ _EMBEDDING_MODELS = {
# [Multimodal] # [Multimodal]
"CLIPModel": ("clip", "CLIPEmbeddingModel"), "CLIPModel": ("clip", "CLIPEmbeddingModel"),
"ColPaliForRetrieval": ("colpali", "ColPaliModel"), "ColPaliForRetrieval": ("colpali", "ColPaliModel"),
"LlamaNemotronVLModel": ("nemotron_vl", "LlamaNemotronVLForEmbedding"),
"LlavaNextForConditionalGeneration": ( "LlavaNextForConditionalGeneration": (
"llava_next", "llava_next",
"LlavaNextForConditionalGeneration", "LlavaNextForConditionalGeneration",
), ),
"Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"), "Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
"Qwen2VLForConditionalGeneration": ("qwen2_vl", "Qwen2VLForConditionalGeneration"), # noqa: E501 "Qwen2VLForConditionalGeneration": ("qwen2_vl", "Qwen2VLForConditionalGeneration"),
"SiglipModel": ("siglip", "SiglipEmbeddingModel"), "SiglipModel": ("siglip", "SiglipEmbeddingModel"),
"LlamaNemotronVLModel": (
"nemotron_vl",
"LlamaNemotronVLForEmbedding",
),
# Technically Terratorch models work on images, both in # Technically Terratorch models work on images, both in
# input and output. I am adding it here because it piggy-backs on embedding # input and output. I am adding it here because it piggy-backs on embedding
# models for the time being. # models for the time being.
@@ -304,7 +301,7 @@ _SEQUENCE_CLASSIFICATION_MODELS = {
"bert_with_rope", "bert_with_rope",
"GteNewForSequenceClassification", "GteNewForSequenceClassification",
), ),
"JambaForSequenceClassification": ("jamba", "JambaForSequenceClassification"), # noqa: E501 "JambaForSequenceClassification": ("jamba", "JambaForSequenceClassification"),
"LlamaBidirectionalForSequenceClassification": ( "LlamaBidirectionalForSequenceClassification": (
"llama", "llama",
"LlamaBidirectionalForSequenceClassification", "LlamaBidirectionalForSequenceClassification",
@@ -368,13 +365,13 @@ _MULTIMODAL_MODELS = {
"fireredasr2", "fireredasr2",
"FireRedASR2ForConditionalGeneration", "FireRedASR2ForConditionalGeneration",
), ),
"FunASRForConditionalGeneration": ("funasr", "FunASRForConditionalGeneration"), # noqa: E501 "FunASRForConditionalGeneration": ("funasr", "FunASRForConditionalGeneration"),
"FunAudioChatForConditionalGeneration": ( "FunAudioChatForConditionalGeneration": (
"funaudiochat", "funaudiochat",
"FunAudioChatForConditionalGeneration", "FunAudioChatForConditionalGeneration",
), ),
"FuyuForCausalLM": ("fuyu", "FuyuForCausalLM"), "FuyuForCausalLM": ("fuyu", "FuyuForCausalLM"),
"Gemma3ForConditionalGeneration": ("gemma3_mm", "Gemma3ForConditionalGeneration"), # noqa: E501 "Gemma3ForConditionalGeneration": ("gemma3_mm", "Gemma3ForConditionalGeneration"),
"Gemma3nForConditionalGeneration": ( "Gemma3nForConditionalGeneration": (
"gemma3n_mm", "gemma3n_mm",
"Gemma3nForConditionalGeneration", "Gemma3nForConditionalGeneration",
@@ -383,7 +380,7 @@ _MULTIMODAL_MODELS = {
"GLM4VForCausalLM": ("glm4v", "GLM4VForCausalLM"), "GLM4VForCausalLM": ("glm4v", "GLM4VForCausalLM"),
"Glm4vForConditionalGeneration": ("glm4_1v", "Glm4vForConditionalGeneration"), "Glm4vForConditionalGeneration": ("glm4_1v", "Glm4vForConditionalGeneration"),
"Glm4vMoeForConditionalGeneration": ("glm4_1v", "Glm4vMoeForConditionalGeneration"), "Glm4vMoeForConditionalGeneration": ("glm4_1v", "Glm4vMoeForConditionalGeneration"),
"GlmOcrForConditionalGeneration": ("glm_ocr", "GlmOcrForConditionalGeneration"), # noqa: E501 "GlmOcrForConditionalGeneration": ("glm_ocr", "GlmOcrForConditionalGeneration"),
"GraniteSpeechForConditionalGeneration": ( "GraniteSpeechForConditionalGeneration": (
"granite_speech", "granite_speech",
"GraniteSpeechForConditionalGeneration", "GraniteSpeechForConditionalGeneration",
@@ -393,13 +390,7 @@ _MULTIMODAL_MODELS = {
"hunyuan_vision", "hunyuan_vision",
"HunYuanVLForConditionalGeneration", "HunYuanVLForConditionalGeneration",
), ),
"StepVLForConditionalGeneration": ("step_vl", "StepVLForConditionalGeneration"),
"InternVLChatModel": ("internvl", "InternVLChatModel"), "InternVLChatModel": ("internvl", "InternVLChatModel"),
"NemotronH_Nano_VL_V2": ("nano_nemotron_vl", "NemotronH_Nano_VL_V2"),
"OpenCUAForConditionalGeneration": (
"opencua",
"OpenCUAForConditionalGeneration",
),
"InternS1ForConditionalGeneration": ( "InternS1ForConditionalGeneration": (
"interns1", "interns1",
"InternS1ForConditionalGeneration", "InternS1ForConditionalGeneration",
@@ -417,24 +408,22 @@ _MULTIMODAL_MODELS = {
"Idefics3ForConditionalGeneration", "Idefics3ForConditionalGeneration",
), ),
"IsaacForConditionalGeneration": ("isaac", "IsaacForConditionalGeneration"), "IsaacForConditionalGeneration": ("isaac", "IsaacForConditionalGeneration"),
"SmolVLMForConditionalGeneration": ("smolvlm", "SmolVLMForConditionalGeneration"), # noqa: E501
"KananaVForConditionalGeneration": ("kanana_v", "KananaVForConditionalGeneration"), "KananaVForConditionalGeneration": ("kanana_v", "KananaVForConditionalGeneration"),
"KeyeForConditionalGeneration": ("keye", "KeyeForConditionalGeneration"), "KeyeForConditionalGeneration": ("keye", "KeyeForConditionalGeneration"),
"KeyeVL1_5ForConditionalGeneration": ( "KeyeVL1_5ForConditionalGeneration": (
"keye_vl1_5", "keye_vl1_5",
"KeyeVL1_5ForConditionalGeneration", "KeyeVL1_5ForConditionalGeneration",
), ),
"RForConditionalGeneration": ("rvl", "RForConditionalGeneration"), "KimiVLForConditionalGeneration": ("kimi_vl", "KimiVLForConditionalGeneration"),
"KimiVLForConditionalGeneration": ("kimi_vl", "KimiVLForConditionalGeneration"), # noqa: E501 "KimiK25ForConditionalGeneration": ("kimi_k25", "KimiK25ForConditionalGeneration"),
"KimiK25ForConditionalGeneration": ("kimi_k25", "KimiK25ForConditionalGeneration"), # noqa: E501 "MoonshotKimiaForCausalLM": ("kimi_audio", "KimiAudioForConditionalGeneration"),
"MoonshotKimiaForCausalLM": ("kimi_audio", "KimiAudioForConditionalGeneration"), # noqa: E501
"LightOnOCRForConditionalGeneration": ( "LightOnOCRForConditionalGeneration": (
"lightonocr", "lightonocr",
"LightOnOCRForConditionalGeneration", "LightOnOCRForConditionalGeneration",
), ),
"Lfm2VlForConditionalGeneration": ("lfm2_vl", "Lfm2VLForConditionalGeneration"), "Lfm2VlForConditionalGeneration": ("lfm2_vl", "Lfm2VLForConditionalGeneration"),
"Llama4ForConditionalGeneration": ("mllama4", "Llama4ForConditionalGeneration"),
"Llama_Nemotron_Nano_VL": ("nemotron_vl", "LlamaNemotronVLChatModel"), "Llama_Nemotron_Nano_VL": ("nemotron_vl", "LlamaNemotronVLChatModel"),
"Llama4ForConditionalGeneration": ("mllama4", "Llama4ForConditionalGeneration"), # noqa: E501
"LlavaForConditionalGeneration": ("llava", "LlavaForConditionalGeneration"), "LlavaForConditionalGeneration": ("llava", "LlavaForConditionalGeneration"),
"LlavaNextForConditionalGeneration": ( "LlavaNextForConditionalGeneration": (
"llava_next", "llava_next",
@@ -448,7 +437,7 @@ _MULTIMODAL_MODELS = {
"llava_onevision", "llava_onevision",
"LlavaOnevisionForConditionalGeneration", "LlavaOnevisionForConditionalGeneration",
), ),
"MantisForConditionalGeneration": ("llava", "MantisForConditionalGeneration"), # noqa: E501 "MantisForConditionalGeneration": ("llava", "MantisForConditionalGeneration"),
"MiDashengLMModel": ("midashenglm", "MiDashengLMModel"), "MiDashengLMModel": ("midashenglm", "MiDashengLMModel"),
"MiniMaxVL01ForConditionalGeneration": ( "MiniMaxVL01ForConditionalGeneration": (
"minimax_vl_01", "minimax_vl_01",
@@ -462,7 +451,9 @@ _MULTIMODAL_MODELS = {
), ),
"MolmoForCausalLM": ("molmo", "MolmoForCausalLM"), "MolmoForCausalLM": ("molmo", "MolmoForCausalLM"),
"Molmo2ForConditionalGeneration": ("molmo2", "Molmo2ForConditionalGeneration"), "Molmo2ForConditionalGeneration": ("molmo2", "Molmo2ForConditionalGeneration"),
"NemotronH_Nano_VL_V2": ("nano_nemotron_vl", "NemotronH_Nano_VL_V2"),
"NVLM_D": ("nvlm_d", "NVLM_D_Model"), "NVLM_D": ("nvlm_d", "NVLM_D_Model"),
"OpenCUAForConditionalGeneration": ("opencua", "OpenCUAForConditionalGeneration"),
"OpenPanguVLForConditionalGeneration": ( "OpenPanguVLForConditionalGeneration": (
"openpangu_vl", "openpangu_vl",
"OpenPanguVLForConditionalGeneration", "OpenPanguVLForConditionalGeneration",
@@ -481,9 +472,9 @@ _MULTIMODAL_MODELS = {
), ),
"Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"), "Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
"Phi4MMForCausalLM": ("phi4mm", "Phi4MMForCausalLM"), "Phi4MMForCausalLM": ("phi4mm", "Phi4MMForCausalLM"),
"PixtralForConditionalGeneration": ("pixtral", "PixtralForConditionalGeneration"), # noqa: E501 "PixtralForConditionalGeneration": ("pixtral", "PixtralForConditionalGeneration"),
"QwenVLForConditionalGeneration": ("qwen_vl", "QwenVLForConditionalGeneration"), # noqa: E501 "QwenVLForConditionalGeneration": ("qwen_vl", "QwenVLForConditionalGeneration"),
"Qwen2VLForConditionalGeneration": ("qwen2_vl", "Qwen2VLForConditionalGeneration"), # noqa: E501 "Qwen2VLForConditionalGeneration": ("qwen2_vl", "Qwen2VLForConditionalGeneration"),
"Qwen2_5_VLForConditionalGeneration": ( "Qwen2_5_VLForConditionalGeneration": (
"qwen2_5_vl", "qwen2_5_vl",
"Qwen2_5_VLForConditionalGeneration", "Qwen2_5_VLForConditionalGeneration",
@@ -508,33 +499,30 @@ _MULTIMODAL_MODELS = {
"qwen3_asr", "qwen3_asr",
"Qwen3ASRForConditionalGeneration", "Qwen3ASRForConditionalGeneration",
), ),
"Qwen3ASRRealtimeGeneration": ( "Qwen3ASRRealtimeGeneration": ("qwen3_asr_realtime", "Qwen3ASRRealtimeGeneration"),
"qwen3_asr_realtime", "Qwen3VLForConditionalGeneration": ("qwen3_vl", "Qwen3VLForConditionalGeneration"),
"Qwen3ASRRealtimeGeneration",
),
"Qwen3VLForConditionalGeneration": ("qwen3_vl", "Qwen3VLForConditionalGeneration"), # noqa: E501
"Qwen3VLMoeForConditionalGeneration": ( "Qwen3VLMoeForConditionalGeneration": (
"qwen3_vl_moe", "qwen3_vl_moe",
"Qwen3VLMoeForConditionalGeneration", "Qwen3VLMoeForConditionalGeneration",
), ),
"Qwen3_5ForConditionalGeneration": ( "Qwen3_5ForConditionalGeneration": ("qwen3_5", "Qwen3_5ForConditionalGeneration"),
"qwen3_5",
"Qwen3_5ForConditionalGeneration",
),
"Qwen3_5MoeForConditionalGeneration": ( "Qwen3_5MoeForConditionalGeneration": (
"qwen3_5", "qwen3_5",
"Qwen3_5MoeForConditionalGeneration", "Qwen3_5MoeForConditionalGeneration",
), ),
"RForConditionalGeneration": ("rvl", "RForConditionalGeneration"),
"SkyworkR1VChatModel": ("skyworkr1v", "SkyworkR1VChatModel"), "SkyworkR1VChatModel": ("skyworkr1v", "SkyworkR1VChatModel"),
"Step3VLForConditionalGeneration": ("step3_vl", "Step3VLForConditionalGeneration"), # noqa: E501 "SmolVLMForConditionalGeneration": ("smolvlm", "SmolVLMForConditionalGeneration"),
"TarsierForConditionalGeneration": ("tarsier", "TarsierForConditionalGeneration"), # noqa: E501 "StepVLForConditionalGeneration": ("step_vl", "StepVLForConditionalGeneration"),
"Step3VLForConditionalGeneration": ("step3_vl", "Step3VLForConditionalGeneration"),
"TarsierForConditionalGeneration": ("tarsier", "TarsierForConditionalGeneration"),
"Tarsier2ForConditionalGeneration": ( "Tarsier2ForConditionalGeneration": (
"qwen2_vl", "qwen2_vl",
"Tarsier2ForConditionalGeneration", "Tarsier2ForConditionalGeneration",
), ),
"UltravoxModel": ("ultravox", "UltravoxModel"), "UltravoxModel": ("ultravox", "UltravoxModel"),
"VoxtralForConditionalGeneration": ("voxtral", "VoxtralForConditionalGeneration"), # noqa: E501 "VoxtralForConditionalGeneration": ("voxtral", "VoxtralForConditionalGeneration"),
"VoxtralRealtimeGeneration": ("voxtral_realtime", "VoxtralRealtimeGeneration"), # noqa: E501 "VoxtralRealtimeGeneration": ("voxtral_realtime", "VoxtralRealtimeGeneration"),
# [Encoder-decoder] # [Encoder-decoder]
"CohereASRForConditionalGeneration": ( "CohereASRForConditionalGeneration": (
"cohere_asr", "cohere_asr",
@@ -544,7 +532,7 @@ _MULTIMODAL_MODELS = {
"nemotron_parse", "nemotron_parse",
"NemotronParseForConditionalGeneration", "NemotronParseForConditionalGeneration",
), ),
"WhisperForConditionalGeneration": ("whisper", "WhisperForConditionalGeneration"), # noqa: E501 "WhisperForConditionalGeneration": ("whisper", "WhisperForConditionalGeneration"),
} }
_SPECULATIVE_DECODING_MODELS = { _SPECULATIVE_DECODING_MODELS = {
@@ -654,14 +642,17 @@ _PREVIOUSLY_SUPPORTED_MODELS = {
"Phi4MultimodalForCausalLM": "0.12.0", "Phi4MultimodalForCausalLM": "0.12.0",
# encoder-decoder models except whisper # encoder-decoder models except whisper
# have been removed for V0 deprecation. # have been removed for V0 deprecation.
"BartModel": "0.10.2",
"BartForConditionalGeneration": "0.10.2",
"DonutForConditionalGeneration": "0.10.2", "DonutForConditionalGeneration": "0.10.2",
"Florence2ForConditionalGeneration": "0.10.2",
"MBartForConditionalGeneration": "0.10.2",
"MllamaForConditionalGeneration": "0.10.2", "MllamaForConditionalGeneration": "0.10.2",
} }
_OOT_SUPPORTED_MODELS = {
"BartModel": "https://github.com/vllm-project/bart-plugin",
"BartForConditionalGeneration": "https://github.com/vllm-project/bart-plugin",
"Florence2ForConditionalGeneration": "https://github.com/vllm-project/bart-plugin",
"MBartForConditionalGeneration": "https://github.com/vllm-project/bart-plugin",
}
@dataclass(frozen=True) @dataclass(frozen=True)
class _ModelInfo: class _ModelInfo:
@@ -958,6 +949,14 @@ class _ModelRegistry:
"Please use an older version of vLLM if you want to " "Please use an older version of vLLM if you want to "
"use this model architecture." "use this model architecture."
) )
if arch in _OOT_SUPPORTED_MODELS:
plugin_url = _OOT_SUPPORTED_MODELS[arch]
raise ValueError(
f"Model architecture {arch} is not supported in-tree anymore. "
f"Please install the plugin at {plugin_url} if you want to "
"use this model architecture."
)
raise ValueError( raise ValueError(
f"Model architectures {architectures} are not supported for now. " f"Model architectures {architectures} are not supported for now. "