[Refactor][TPU] Remove torch_xla path and use tpu-inference (#30808)

Signed-off-by: Wei-Yu Lin <weiyulin@google.com> Signed-off-by: weiyu <62784299+weiyu0824@users.noreply.github.com>
2026-01-07 00:07:16 -08:00
parent 0dd5dee9b9
commit e7596371a4
46 changed files with 9 additions and 6785 deletions
--- a/vllm/usage/usage_lib.py
+++ b/vllm/usage/usage_lib.py
@@ -186,20 +186,6 @@ class UsageMessage:
        except Exception:
            return False

-    def _report_torch_xla_usage(self) -> bool:
-        try:
-            import torch_xla
-
-            self.gpu_count = torch_xla.runtime.world_size()
-            self.gpu_type = torch_xla.tpu.get_tpu_type()
-            self.gpu_memory_per_device = torch_xla.core.xla_model.get_memory_info()[
-                "bytes_limit"
-            ]
-            self.cuda_runtime = "torch_xla"
-            return True
-        except Exception:
-            return False
-
    def _report_usage_once(
        self,
        model_architecture: str,
@@ -217,9 +203,7 @@ class UsageMessage:
        if current_platform.is_cuda():
            self.cuda_runtime = torch.version.cuda
        if current_platform.is_tpu():  # noqa: SIM102
-            if (not self._report_tpu_inference_usage()) and (
-                not self._report_torch_xla_usage()
-            ):
+            if not self._report_tpu_inference_usage():
                logger.exception("Failed to collect TPU information")
        self.provider = _detect_cloud_provider()
        self.architecture = platform.machine()