Add docstrings to some modules and classes (#100)
This commit is contained in:
@@ -1,12 +1,20 @@
|
||||
"""CacheEngine class for managing the KV cache."""
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
import torch
|
||||
|
||||
from cacheflow import cache_ops
|
||||
|
||||
KVCache = Tuple[torch.Tensor, torch.Tensor]
|
||||
|
||||
|
||||
class CacheEngine:
|
||||
"""Manages the KV cache.
|
||||
|
||||
This class is responsible for initializing and managing the GPU and CPU KV
|
||||
caches. It also provides methods for performing KV cache operations, such
|
||||
as swapping and copying.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
"""A GPU worker class."""
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import torch
|
||||
@@ -14,6 +15,12 @@ from cacheflow.worker.cache_engine import CacheEngine
|
||||
|
||||
|
||||
class Worker:
|
||||
"""A worker class that executes (a partition of) the model on a GPU.
|
||||
|
||||
Each worker is associated with a single GPU. The worker is responsible for
|
||||
maintaining the KV cache and executing the model on the GPU. In case of
|
||||
distributed inference, each worker is assigned a partition of the model.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user