Remove hard-dependencies of Speculative decode to CUDA workers (#10587)

Signed-off-by: Chendi Xue <chendi.xue@intel.com>
This commit is contained in:
Chendi.Xue
2024-11-26 19:57:11 -06:00
committed by GitHub
parent 2f0a0a17a4
commit 0a71900bc9
19 changed files with 219 additions and 77 deletions

View File

@@ -595,8 +595,8 @@ def test_init_device(acceptance_sampler_method: str):
target_worker.init_device.assert_called_once()
metrics_collector.init_gpu_tensors.assert_called_once()
spec_decode_sampler.init_gpu_tensors.assert_called_once()
metrics_collector.init_tensors.assert_called_once()
spec_decode_sampler.init_tensors.assert_called_once()
@pytest.mark.parametrize("acceptance_sampler_method",