[Misc] Fused MoE Marlin support for GPTQ (#8217)

This commit is contained in:
Dipika Sikka
2024-09-09 23:02:52 -04:00
committed by GitHub
parent c7cb5c3335
commit 6cd5e5b07e
19 changed files with 912 additions and 204 deletions

View File

@@ -386,7 +386,18 @@ steps:
- vllm/
- tests/weight_loading
commands:
- bash weight_loading/run_model_weight_loading_test.sh
- bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models.txt
- label: Weight Loading Multiple GPU Test - Large Models # optional
working_dir: "/vllm-workspace/tests"
num_gpus: 2
gpu: a100
optional: true
source_file_dependencies:
- vllm/
- tests/weight_loading
commands:
- bash weight_loading/run_model_weight_loading_test.sh -c weight_loading/models-large.txt
##### multi gpus test #####