[Bugfix] Update Run:AI Model Streamer Loading Integration (#23845)

Signed-off-by: Omer Dayan (SW-GPU) <omer@run.ai> Signed-off-by: Peter Schuurman <psch@google.com> Co-authored-by: Omer Dayan (SW-GPU) <omer@run.ai> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
2025-09-09 21:37:17 -07:00
parent 009d689b0c
commit 4377b1ae3b
7 changed files with 187 additions and 122 deletions
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1053,9 +1053,10 @@ class EngineArgs:
            SpeculatorsConfig)

        if self.speculative_config is None:
-            hf_config = get_config(self.hf_config_path or self.model,
-                                   self.trust_remote_code, self.revision,
-                                   self.code_revision, self.config_format)
+            hf_config = get_config(
+                self.hf_config_path or target_model_config.model,
+                self.trust_remote_code, self.revision, self.code_revision,
+                self.config_format)

            # if loading a SpeculatorsConfig, load the speculative_config
            # details from the config directly
@@ -1065,7 +1066,7 @@ class EngineArgs:
                self.speculative_config = {}
                self.speculative_config[
                    "num_speculative_tokens"] = hf_config.num_lookahead_tokens
-                self.speculative_config["model"] = self.model
+                self.speculative_config["model"] = target_model_config.model
                self.speculative_config["method"] = hf_config.method
            else:
                return None