Add an option to use dummy model weights (#33)

2023-04-08 23:36:12 -07:00
parent c267b1a02c
commit ee88a7e5f3
9 changed files with 36 additions and 8 deletions
--- a/cacheflow/worker/controller.py
+++ b/cacheflow/worker/controller.py
@@ -27,6 +27,7 @@ class Controller:
        dtype: str,
        seed: int,
        model_path: str,
+        use_dummy_weights: bool,
        max_num_batched_tokens: int,
    ) -> None:
        self.stage_id = stage_id
@@ -58,6 +59,7 @@ class Controller:
                tensor_parallel_size=tensor_parallel_size,
                pipeline_parallel_size=pipeline_parallel_size,
                model_path=model_path,
+                use_dummy_weights=use_dummy_weights,
                max_num_batched_tokens=max_num_batched_tokens,
            )
            self.workers.append(worker)
--- a/cacheflow/worker/worker.py
+++ b/cacheflow/worker/worker.py
@@ -29,6 +29,7 @@ class Worker:
        rank: int,
        world_size: int,
        model_path: str,
+        use_dummy_weights: bool,
        max_num_batched_tokens: int,
        tensor_parallel_size: int = 1,
        pipeline_parallel_size: int = 1,
@@ -43,8 +44,8 @@ class Worker:
        set_random_seed(seed)

        # Initialize the model.
-        self.model, self.dtype = get_model(model_name, dtype=dtype, path=model_path)
-        self.model = self.model.cuda()
+        self.model, self.dtype = get_model(
+            model_name, dtype=dtype, path=model_path, use_dummy_weights=use_dummy_weights)
        tensor_model_parallel_world_size = (
            get_tensor_model_parallel_world_size())
        initialize_all_reduce_launcher(