[Hardware] Initial TPU integration (#5292)

2024-06-12 11:53:03 -07:00
parent 847cdcca1c
commit 1a8bfd92d5
22 changed files with 1322 additions and 28 deletions
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -146,6 +146,15 @@ def is_neuron() -> bool:
    return transformers_neuronx is not None


+@lru_cache(maxsize=None)
+def is_tpu() -> bool:
+    try:
+        import libtpu
+    except ImportError:
+        libtpu = None
+    return libtpu is not None
+
+
@lru_cache(maxsize=None)
 def get_max_shared_memory_bytes(gpu: int = 0) -> int:
    """Returns the maximum shared memory per thread block in bytes."""
@@ -546,6 +555,11 @@ def maybe_expand_dim(tensor: torch.Tensor,
    return tensor


+def get_dtype_size(dtype: torch.dtype) -> int:
+    """Get the size of the data type in bytes."""
+    return torch.tensor([], dtype=dtype).element_size()
+
+
 def merge_dicts(dict1: Dict[Any, List[Any]],
                dict2: Dict[Any, List[Any]]) -> Dict[Any, List[Any]]:
    """Merge 2 dicts that have key -> List of items.