[Feature] Enhance EAGLE Architecture with Proper RMS Norms (#14990)

Signed-off-by: Bryan Lu <yuzhelu@amazon.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
Bryan Lu
2025-03-26 01:24:07 -07:00
committed by GitHub
parent 5aefd6ac31
commit 781d056280
3 changed files with 70 additions and 12 deletions

View File

@@ -800,10 +800,18 @@ class ModelConfig:
@property
def is_deepseek_mla(self) -> bool:
return (hasattr(self.hf_text_config, "model_type")) \
and (self.hf_text_config.model_type in \
('deepseek_v2', 'deepseek_v3', 'deepseek_mtp'))\
and (self.hf_text_config.kv_lora_rank is not None)
if not hasattr(self.hf_text_config, "model_type"):
return False
elif self.hf_text_config.model_type in \
('deepseek_v2', 'deepseek_v3', 'deepseek_mtp'):
return self.hf_text_config.kv_lora_rank is not None
elif self.hf_text_config.model_type == 'eagle':
# if the model is an EAGLE module, check for the
# underlying architecture
return self.hf_text_config.model.model_type in \
('deepseek_v2', 'deepseek_v3') \
and self.hf_text_config.kv_lora_rank is not None
return False
def get_head_size(self) -> int:
# TODO remove hard code