[CPU][Bugfix] Fix _to_list in CPU model runner (#28824)

Signed-off-by: jiang1.li <jiang1.li@intel.com>
2025-11-17 15:47:24 +08:00
parent 3380ed5e11
commit 577bb34fff
2 changed files with 8 additions and 3 deletions
--- a/csrc/cpu/torch_bindings.cpp
+++ b/csrc/cpu/torch_bindings.cpp
@@ -100,6 +100,9 @@ void cpu_attention_with_kv_cache(
    const torch::Tensor& scheduler_metadata,
    const std::optional<torch::Tensor>& s_aux);

+// Note: just for avoiding importing errors
+void placeholder_op() { TORCH_CHECK(false, "Unimplemented"); }
+
 TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
  // vLLM custom ops

@@ -275,6 +278,11 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
      "sliding_window_left, SymInt sliding_window_right, Tensor block_table, "
      "float softcap, Tensor sheduler_metadata, Tensor? s_aux) -> ()",
      &cpu_attention_with_kv_cache);
+
+  // placeholders
+  ops.def("static_scaled_fp8_quant() -> ()", placeholder_op);
+  ops.def("dynamic_scaled_fp8_quant() -> ()", placeholder_op);
+  ops.def("dynamic_per_token_scaled_fp8_quant() -> ()", placeholder_op);
 }

 TORCH_LIBRARY_EXPAND(CONCAT(TORCH_EXTENSION_NAME, _utils), utils) {