[Frontend] [Core] Integrate Tensorizer in to S3 loading machinery, allow passing arbitrary arguments during save/load (#19619)

Signed-off-by: Sanger Steel <sangersteel@gmail.com>
Co-authored-by: Eta <esyra@coreweave.com>
This commit is contained in:
Sanger Steel
2025-07-08 01:47:43 -04:00
committed by GitHub
parent e34d130c16
commit 72d14d0eed
18 changed files with 814 additions and 196 deletions

View File

@@ -58,7 +58,8 @@ def parse_type(return_type: Callable[[str], T]) -> Callable[[str], T]:
def _parse_type(val: str) -> T:
try:
if return_type is json.loads and not re.match("^{.*}$", val):
if return_type is json.loads and not re.match(
r"(?s)^\s*{.*}\s*$", val):
return cast(T, nullable_kvs(val))
return return_type(val)
except ValueError as e:
@@ -80,7 +81,7 @@ def optional_type(
def union_dict_and_str(val: str) -> Optional[Union[str, dict[str, str]]]:
if not re.match("^{.*}$", val):
if not re.match(r"(?s)^\s*{.*}\s*$", val):
return str(val)
return optional_type(json.loads)(val)
@@ -1001,11 +1002,42 @@ class EngineArgs:
override_attention_dtype=self.override_attention_dtype,
)
def valid_tensorizer_config_provided(self) -> bool:
"""
Checks if a parseable TensorizerConfig was passed to
self.model_loader_extra_config. It first checks if the config passed
is a dict or a TensorizerConfig object directly, and if the latter is
true (by checking that the object has TensorizerConfig's
.to_serializable() method), converts it in to a serializable dict
format
"""
if self.model_loader_extra_config:
if hasattr(self.model_loader_extra_config, "to_serializable"):
self.model_loader_extra_config = (
self.model_loader_extra_config.to_serializable())
for allowed_to_pass in ["tensorizer_uri", "tensorizer_dir"]:
try:
self.model_loader_extra_config[allowed_to_pass]
return False
except KeyError:
pass
return True
def create_load_config(self) -> LoadConfig:
if self.quantization == "bitsandbytes":
self.load_format = "bitsandbytes"
if (self.load_format == "tensorizer"
and self.valid_tensorizer_config_provided()):
logger.info("Inferring Tensorizer args from %s", self.model)
self.model_loader_extra_config = {"tensorizer_dir": self.model}
else:
logger.info(
"Using Tensorizer args from --model-loader-extra-config. "
"Note that you can now simply pass the S3 directory in the "
"model tag instead of providing the JSON string.")
return LoadConfig(
load_format=self.load_format,
download_dir=self.download_dir,