[Frontend] Using matryoshka_dimensions control the allowed output dimensions. (#16970)
This commit is contained in:
@@ -1248,6 +1248,10 @@ class ModelConfig:
|
||||
return (hasattr(self.hf_config, "matryoshka_dimensions")
|
||||
or getattr(self.hf_config, "is_matryoshka", False))
|
||||
|
||||
@property
|
||||
def matryoshka_dimensions(self):
|
||||
return getattr(self.hf_config, "matryoshka_dimensions", None)
|
||||
|
||||
|
||||
BlockSize = Literal[1, 8, 16, 32, 64, 128]
|
||||
CacheDType = Literal["auto", "fp8", "fp8_e4m3", "fp8_e5m2"]
|
||||
|
||||
Reference in New Issue
Block a user