From 230d28e56268e4e5346e221921cb409581bbc323 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Mon, 1 Jun 2026 21:11:01 +0000 Subject: [PATCH] =?UTF-8?q?Fix=20KVCache=20constructor=20call=20=E2=80=94?= =?UTF-8?q?=20device=20as=20keyword=20arg,=20not=20positional?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KVCache signature has max_comp before device, so positional pass of dev was hitting max_comp parameter instead of device. --- single_shot_inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/single_shot_inference.py b/single_shot_inference.py index 1ad13283..c4aafb06 100644 --- a/single_shot_inference.py +++ b/single_shot_inference.py @@ -861,7 +861,7 @@ def main(): n_ih = cfg.get("index_n_heads", 64); ihd = cfg.get("index_head_dim", 128); itk = cfg.get("index_topk", 1024) for li in range(n_layers): dev = f"cuda:{li % NUM_GPUS}"; ratio = cr[li] if li < len(cr) else 128 - kv_caches[li] = KVCache(hd, cfg.get("sliding_window", 128), dev) + kv_caches[li] = KVCache(hd, cfg.get("sliding_window", 128), device=dev) if ratio > 0: compressors[li] = Compressor(ratio, hd, H, dev) if ratio == 4: indexers[li] = Indexer(n_ih, ihd, itk, dev)