[Model] RowParallelLinear: pass bias to quant_method.apply (#6327)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
This commit is contained in:
Thomas Parnell
2024-07-19 15:15:22 +02:00
committed by GitHub
parent a921e86392
commit a5314e8698
2 changed files with 14 additions and 9 deletions

View File

@@ -83,6 +83,9 @@ def test_target_model_tp_gt_1(baseline_llm_generator, test_llm_generator,
# cleaned up properly, and its server host thread leaks, causing the
# second run of the test to fail with internal NCCL error.
"use_async": True,
# precision
"dtype": "float32",
}])
@pytest.mark.parametrize("baseline_llm_kwargs", [{}])
@pytest.mark.parametrize(