TP/quantization/weight loading refactor part 1 - Simplify parallel linear logic (#1181)

2023-10-02 15:36:09 -07:00
parent 84e4e37d14
commit ba0bfd40e2
42 changed files with 819 additions and 1547 deletions
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -8,6 +8,7 @@ from vllm import LLM, SamplingParams
 from vllm.transformers_utils.tokenizer import get_tokenizer

 _TEST_PROMPTS = [
+    # pylint: disable=line-too-long
    "vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs.",
    "Briefly describe the major milestones in the development of artificial intelligence from 1950 to 2020.",
    "Compare and contrast artificial intelligence with human intelligence in terms of processing information.",