[Frontend] Using matryoshka_dimensions control the allowed output dimensions. (#16970)

This commit is contained in:
wang.yuqi
2025-04-24 22:06:28 +08:00
committed by GitHub
parent b724afe343
commit 67309a1cb5
8 changed files with 172 additions and 76 deletions

View File

@@ -1248,6 +1248,10 @@ class ModelConfig:
return (hasattr(self.hf_config, "matryoshka_dimensions")
or getattr(self.hf_config, "is_matryoshka", False))
@property
def matryoshka_dimensions(self):
return getattr(self.hf_config, "matryoshka_dimensions", None)
BlockSize = Literal[1, 8, 16, 32, 64, 128]
CacheDType = Literal["auto", "fp8", "fp8_e4m3", "fp8_e5m2"]