[CPU] Refactor CPU attention backend (#27954)
Signed-off-by: jiang1.li <jiang1.li@intel.com>
This commit is contained in:
@@ -38,7 +38,11 @@ AITER_MODEL_LIST = [
|
||||
[
|
||||
pytest.param(
|
||||
"bigscience/bloom-560m", # bloom - testing alibi slopes
|
||||
marks=[pytest.mark.core_model, pytest.mark.slow_test],
|
||||
marks=[
|
||||
pytest.mark.core_model,
|
||||
pytest.mark.slow_test,
|
||||
pytest.mark.cpu_model,
|
||||
],
|
||||
),
|
||||
pytest.param(
|
||||
"openai-community/gpt2", # gpt2
|
||||
@@ -55,6 +59,10 @@ AITER_MODEL_LIST = [
|
||||
pytest.mark.slow_test,
|
||||
],
|
||||
),
|
||||
pytest.param(
|
||||
"google/gemma-2-2b-it", # test hybrid attention
|
||||
marks=[pytest.mark.cpu_model],
|
||||
),
|
||||
pytest.param(
|
||||
"zai-org/chatglm3-6b", # chatglm (text-only)
|
||||
),
|
||||
@@ -64,7 +72,6 @@ AITER_MODEL_LIST = [
|
||||
),
|
||||
pytest.param(
|
||||
"openbmb/MiniCPM3-4B",
|
||||
# fused_moe not supported on CPU
|
||||
marks=[pytest.mark.core_model, large_gpu_mark(min_gb=32)],
|
||||
),
|
||||
pytest.param(
|
||||
@@ -93,11 +100,7 @@ AITER_MODEL_LIST = [
|
||||
pytest.param("bigcode/starcoder2-3b"), # starcoder2
|
||||
pytest.param(
|
||||
"TitanML/tiny-mixtral", # mixtral
|
||||
marks=[pytest.mark.core_model],
|
||||
),
|
||||
pytest.param(
|
||||
"allenai/OLMoE-1B-7B-0924-Instruct",
|
||||
marks=[pytest.mark.cpu_model],
|
||||
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
|
||||
),
|
||||
pytest.param("swiss-ai/Apertus-8B-Instruct-2509"), # apertus
|
||||
],
|
||||
|
||||
Reference in New Issue
Block a user