Add support for LoRA adapters in Nemotron-H models (#30802)

Signed-off-by: Daniel Serebrenik <daserebrenik@nvidia.com>
This commit is contained in:
danisereb
2026-01-19 16:30:44 +02:00
committed by GitHub
parent c88860d759
commit aa7f37ccfa
10 changed files with 497 additions and 27 deletions

View File

@@ -747,6 +747,9 @@ class NemotronHForCausalLM(
MixtureOfExperts,
SupportsMambaPrefixCaching,
):
# Relevant only if self.has_moe is True
is_non_gated_moe: bool = True
hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={"backbone": "model"},
orig_to_new_substr={"A_log": "A", "embeddings": "embed_tokens"},