Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
966f933ee1 | ||
|
|
1a504aff6c | ||
|
|
01ca85bbd8 | ||
|
|
d82b9487ea | ||
|
|
be13281d4b | ||
|
|
54e084f7fb | ||
|
|
9e8f089d08 | ||
|
|
16e9064f84 | ||
|
|
5ac1a8e6e4 |
25
.buildkite/run-tpu-test.sh
Executable file
25
.buildkite/run-tpu-test.sh
Executable file
@@ -0,0 +1,25 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Build the docker image.
|
||||||
|
docker build -f Dockerfile.tpu -t vllm-tpu .
|
||||||
|
|
||||||
|
# Set up cleanup.
|
||||||
|
remove_docker_container() { docker rm -f tpu-test || true; }
|
||||||
|
trap remove_docker_container EXIT
|
||||||
|
# Remove the container that might not be cleaned up in the previous run.
|
||||||
|
remove_docker_container
|
||||||
|
|
||||||
|
# For HF_TOKEN.
|
||||||
|
source /etc/environment
|
||||||
|
# Run a simple end-to-end example.
|
||||||
|
docker run --privileged --net host --shm-size=16G -it \
|
||||||
|
-e "HF_TOKEN=$HF_TOKEN" --name tpu-test \
|
||||||
|
vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git \
|
||||||
|
&& python3 -m pip install pytest \
|
||||||
|
&& python3 -m pip install lm_eval[api]==0.4.4 \
|
||||||
|
&& pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py \
|
||||||
|
&& python3 /workspace/vllm/tests/tpu/test_compilation.py \
|
||||||
|
&& python3 /workspace/vllm/tests/tpu/test_quantization_accuracy.py \
|
||||||
|
&& python3 /workspace/vllm/examples/offline_inference/tpu.py"
|
||||||
@@ -15,22 +15,13 @@ remove_docker_container
|
|||||||
source /etc/environment
|
source /etc/environment
|
||||||
# Run a simple end-to-end example.
|
# Run a simple end-to-end example.
|
||||||
docker run --privileged --net host --shm-size=16G -it \
|
docker run --privileged --net host --shm-size=16G -it \
|
||||||
-e "HF_TOKEN=$HF_TOKEN" --name tpu-test \
|
-e "HF_TOKEN=$HF_TOKEN" -e "VLLM_USE_V1=1" --name tpu-test \
|
||||||
vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git \
|
vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git \
|
||||||
&& python3 -m pip install pytest \
|
&& python3 -m pip install pytest \
|
||||||
&& python3 -m pip install lm_eval[api]==0.4.4 \
|
&& python3 -m pip install lm_eval[api]==0.4.4 \
|
||||||
&& echo TEST_1 \
|
&& pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py \
|
||||||
&& VLLM_USE_V1=1 python3 /workspace/vllm/tests/tpu/test_compilation.py \
|
&& pytest -v -s /workspace/vllm/tests/v1/tpu/test_basic.py \
|
||||||
&& echo TEST_2 \
|
&& pytest -v -s /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine \
|
||||||
&& VLLM_USE_V1=1 pytest -v -s /workspace/vllm/tests/v1/tpu/test_basic.py \
|
&& python3 /workspace/vllm/tests/tpu/test_compilation.py \
|
||||||
&& echo TEST_3 \
|
&& python3 /workspace/vllm/tests/tpu/test_quantization_accuracy.py \
|
||||||
&& VLLM_USE_V1=1 pytest -v -s /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine \
|
&& python3 /workspace/vllm/examples/offline_inference/tpu.py"
|
||||||
&& echo TEST_4 \
|
|
||||||
&& VLLM_USE_V1=1 pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py \
|
|
||||||
&& echo TEST_5 \
|
|
||||||
&& VLLM_USE_V1=1 python3 /workspace/vllm/examples/offline_inference/tpu.py" \
|
|
||||||
|
|
||||||
|
|
||||||
# TODO: This test fails because it uses RANDOM_SEED sampling
|
|
||||||
# && VLLM_USE_V1=1 pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py \
|
|
||||||
|
|
||||||
|
|||||||
@@ -12,11 +12,10 @@ docker build -t ${image_name} -f Dockerfile.xpu .
|
|||||||
|
|
||||||
# Setup cleanup
|
# Setup cleanup
|
||||||
remove_docker_container() {
|
remove_docker_container() {
|
||||||
docker rm -f "${container_name}" || true;
|
docker rm -f "${container_name}" || docker image rm -f "${image_name}" || true;
|
||||||
docker image rm -f "${image_name}" || true;
|
|
||||||
docker system prune -f || true;
|
|
||||||
}
|
}
|
||||||
trap remove_docker_container EXIT
|
trap remove_docker_container EXIT
|
||||||
|
remove_docker_container
|
||||||
|
|
||||||
# Run the image and test offline inference/tensor parallel
|
# Run the image and test offline inference/tensor parallel
|
||||||
docker run \
|
docker run \
|
||||||
@@ -26,6 +25,6 @@ docker run \
|
|||||||
--name "${container_name}" \
|
--name "${container_name}" \
|
||||||
"${image_name}" \
|
"${image_name}" \
|
||||||
sh -c '
|
sh -c '
|
||||||
VLLM_USE_V1=0 python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m
|
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m
|
||||||
VLLM_USE_V1=0 python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m -tp 2
|
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m -tp 2
|
||||||
'
|
'
|
||||||
|
|||||||
@@ -1,7 +1,11 @@
|
|||||||
# oneapi 2025.0.2 docker base image use rolling 2448 package. https://dgpu-docs.intel.com/releases/packages.html?release=Rolling+2448.13&os=Ubuntu+22.04, and we don't need install driver manually.
|
FROM intel/deep-learning-essentials:2025.0.1-0-devel-ubuntu22.04 AS vllm-base
|
||||||
FROM intel/deep-learning-essentials:2025.0.2-0-devel-ubuntu22.04 AS vllm-base
|
|
||||||
|
|
||||||
RUN rm /etc/apt/sources.list.d/intel-graphics.list
|
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/intel-oneapi-archive-keyring.gpg > /dev/null && \
|
||||||
|
echo "deb [signed-by=/usr/share/keyrings/intel-oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main " | tee /etc/apt/sources.list.d/oneAPI.list && \
|
||||||
|
chmod 644 /usr/share/keyrings/intel-oneapi-archive-keyring.gpg && \
|
||||||
|
wget -O- https://repositories.intel.com/graphics/intel-graphics.key | gpg --dearmor | tee /usr/share/keyrings/intel-graphics.gpg > /dev/null && \
|
||||||
|
echo "deb [arch=amd64,i386 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu jammy arc" | tee /etc/apt/sources.list.d/intel.gpu.jammy.list && \
|
||||||
|
chmod 644 /usr/share/keyrings/intel-graphics.gpg
|
||||||
|
|
||||||
RUN apt-get update -y && \
|
RUN apt-get update -y && \
|
||||||
apt-get install -y --no-install-recommends --fix-missing \
|
apt-get install -y --no-install-recommends --fix-missing \
|
||||||
@@ -17,6 +21,8 @@ RUN apt-get update -y && \
|
|||||||
python3 \
|
python3 \
|
||||||
python3-dev \
|
python3-dev \
|
||||||
python3-pip \
|
python3-pip \
|
||||||
|
libze-intel-gpu-dev \
|
||||||
|
libze-intel-gpu1 \
|
||||||
wget
|
wget
|
||||||
|
|
||||||
WORKDIR /workspace/vllm
|
WORKDIR /workspace/vllm
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ Easy, fast, and cheap LLM serving for everyone
|
|||||||
|
|
||||||
*Latest News* 🔥
|
*Latest News* 🔥
|
||||||
|
|
||||||
- [2025/03] We hosted [the first vLLM China Meetup](https://mp.weixin.qq.com/s/n77GibL2corAtQHtVEAzfg)! Please find the meetup slides from vLLM team [here](https://docs.google.com/presentation/d/1REHvfQMKGnvz6p3Fd23HhSO4c8j5WPGZV0bKYLwnHyQ/edit?usp=sharing).
|
- [2025/03] We hosted [the first vLLM China Meetup](https://mp.weixin.qq.com/s/n77GibL2corAtQHtVEAzfg)! Please find the meetup slides from vLLM team [here](https://docs.google.com/presentation/d/1REHvfQMKGnvz6p3Fd23HhSO4c8j5WPGZV0bKYLwnHyQ/edit#slide=id.g33fb1ff286e_0_29).
|
||||||
- [2025/03] We hosted [the East Coast vLLM Meetup](https://lu.ma/7mu4k4xx)! Please find the meetup slides [here](https://docs.google.com/presentation/d/1NHiv8EUFF1NLd3fEYODm56nDmL26lEeXCaDgyDlTsRs/edit#slide=id.g31441846c39_0_0).
|
- [2025/03] We hosted [the East Coast vLLM Meetup](https://lu.ma/7mu4k4xx)! Please find the meetup slides [here](https://docs.google.com/presentation/d/1NHiv8EUFF1NLd3fEYODm56nDmL26lEeXCaDgyDlTsRs/edit#slide=id.g31441846c39_0_0).
|
||||||
- [2025/02] We hosted [the ninth vLLM meetup](https://lu.ma/h7g3kuj9) with Meta! Please find the meetup slides from vLLM team [here](https://docs.google.com/presentation/d/1jzC_PZVXrVNSFVCW-V4cFXb6pn7zZ2CyP_Flwo05aqg/edit?usp=sharing) and AMD [here](https://drive.google.com/file/d/1Zk5qEJIkTmlQ2eQcXQZlljAx3m9s7nwn/view?usp=sharing). The slides from Meta will not be posted.
|
- [2025/02] We hosted [the ninth vLLM meetup](https://lu.ma/h7g3kuj9) with Meta! Please find the meetup slides from vLLM team [here](https://docs.google.com/presentation/d/1jzC_PZVXrVNSFVCW-V4cFXb6pn7zZ2CyP_Flwo05aqg/edit?usp=sharing) and AMD [here](https://drive.google.com/file/d/1Zk5qEJIkTmlQ2eQcXQZlljAx3m9s7nwn/view?usp=sharing). The slides from Meta will not be posted.
|
||||||
- [2025/01] We are excited to announce the alpha release of vLLM V1: A major architectural upgrade with 1.7x speedup! Clean code, optimized execution loop, zero-overhead prefix caching, enhanced multimodal support, and more. Please check out our blog post [here](https://blog.vllm.ai/2025/01/27/v1-alpha-release.html).
|
- [2025/01] We are excited to announce the alpha release of vLLM V1: A major architectural upgrade with 1.7x speedup! Clean code, optimized execution loop, zero-overhead prefix caching, enhanced multimodal support, and more. Please check out our blog post [here](https://blog.vllm.ai/2025/01/27/v1-alpha-release.html).
|
||||||
|
|||||||
@@ -732,11 +732,8 @@ def main(args: argparse.Namespace):
|
|||||||
api_url = f"http://{args.host}:{args.port}{args.endpoint}"
|
api_url = f"http://{args.host}:{args.port}{args.endpoint}"
|
||||||
base_url = f"http://{args.host}:{args.port}"
|
base_url = f"http://{args.host}:{args.port}"
|
||||||
|
|
||||||
tokenizer = get_tokenizer(
|
tokenizer = get_tokenizer(tokenizer_id,
|
||||||
tokenizer_id,
|
trust_remote_code=args.trust_remote_code)
|
||||||
trust_remote_code=args.trust_remote_code,
|
|
||||||
tokenizer_mode=args.tokenizer_mode,
|
|
||||||
)
|
|
||||||
|
|
||||||
if args.dataset == 'grammar':
|
if args.dataset == 'grammar':
|
||||||
args.structure_type = 'guided_grammar'
|
args.structure_type = 'guided_grammar'
|
||||||
@@ -879,13 +876,6 @@ if __name__ == "__main__":
|
|||||||
help=
|
help=
|
||||||
"Name or path of the tokenizer, if not using the default tokenizer.", # noqa: E501
|
"Name or path of the tokenizer, if not using the default tokenizer.", # noqa: E501
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
|
||||||
"--tokenizer-mode",
|
|
||||||
type=str,
|
|
||||||
default="auto",
|
|
||||||
help=
|
|
||||||
"Name or path of the tokenizer, if not using the default tokenizer.", # noqa: E501
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--num-prompts",
|
"--num-prompts",
|
||||||
type=int,
|
type=int,
|
||||||
|
|||||||
@@ -54,7 +54,6 @@ for qps in "${QPS_VALUES[@]}"; do
|
|||||||
python "$SCRIPT_DIR/benchmark_serving_structured_output.py" $COMMON_PARAMS \
|
python "$SCRIPT_DIR/benchmark_serving_structured_output.py" $COMMON_PARAMS \
|
||||||
--request-rate $qps \
|
--request-rate $qps \
|
||||||
--result-filename "$FILENAME" \
|
--result-filename "$FILENAME" \
|
||||||
--tokenizer-mode ${TOKENIZER_MODE:-"auto"} \
|
|
||||||
--port ${PORT:-8000}
|
--port ${PORT:-8000}
|
||||||
|
|
||||||
echo "Completed benchmark with QPS: $qps"
|
echo "Completed benchmark with QPS: $qps"
|
||||||
|
|||||||
@@ -350,8 +350,8 @@ __global__ void concat_and_cache_mla_kernel(
|
|||||||
|
|
||||||
} // namespace vllm
|
} // namespace vllm
|
||||||
|
|
||||||
// KV_T is the data type of key and value tensors.
|
// KV_T is the stored data type of kv-cache.
|
||||||
// CACHE_T is the stored data type of kv-cache.
|
// CACHE_T is the data type of key and value tensors.
|
||||||
// KV_DTYPE is the real data type of kv-cache.
|
// KV_DTYPE is the real data type of kv-cache.
|
||||||
#define CALL_RESHAPE_AND_CACHE(KV_T, CACHE_T, KV_DTYPE) \
|
#define CALL_RESHAPE_AND_CACHE(KV_T, CACHE_T, KV_DTYPE) \
|
||||||
vllm::reshape_and_cache_kernel<KV_T, CACHE_T, KV_DTYPE> \
|
vllm::reshape_and_cache_kernel<KV_T, CACHE_T, KV_DTYPE> \
|
||||||
@@ -393,8 +393,8 @@ void reshape_and_cache(
|
|||||||
CALL_RESHAPE_AND_CACHE)
|
CALL_RESHAPE_AND_CACHE)
|
||||||
}
|
}
|
||||||
|
|
||||||
// KV_T is the data type of key and value tensors.
|
// KV_T is the stored data type of kv-cache.
|
||||||
// CACHE_T is the stored data type of kv-cache.
|
// CACHE_T is the data type of key and value tensors.
|
||||||
// KV_DTYPE is the real data type of kv-cache.
|
// KV_DTYPE is the real data type of kv-cache.
|
||||||
#define CALL_RESHAPE_AND_CACHE_FLASH(KV_T, CACHE_T, KV_DTYPE) \
|
#define CALL_RESHAPE_AND_CACHE_FLASH(KV_T, CACHE_T, KV_DTYPE) \
|
||||||
vllm::reshape_and_cache_flash_kernel<KV_T, CACHE_T, KV_DTYPE> \
|
vllm::reshape_and_cache_flash_kernel<KV_T, CACHE_T, KV_DTYPE> \
|
||||||
@@ -446,8 +446,8 @@ void reshape_and_cache_flash(
|
|||||||
CALL_RESHAPE_AND_CACHE_FLASH);
|
CALL_RESHAPE_AND_CACHE_FLASH);
|
||||||
}
|
}
|
||||||
|
|
||||||
// KV_T is the data type of key and value tensors.
|
// KV_T is the stored data type of kv-cache.
|
||||||
// CACHE_T is the stored data type of kv-cache.
|
// CACHE_T is the data type of key and value tensors.
|
||||||
// KV_DTYPE is the real data type of kv-cache.
|
// KV_DTYPE is the real data type of kv-cache.
|
||||||
#define CALL_CONCAT_AND_CACHE_MLA(KV_T, CACHE_T, KV_DTYPE) \
|
#define CALL_CONCAT_AND_CACHE_MLA(KV_T, CACHE_T, KV_DTYPE) \
|
||||||
vllm::concat_and_cache_mla_kernel<KV_T, CACHE_T, KV_DTYPE> \
|
vllm::concat_and_cache_mla_kernel<KV_T, CACHE_T, KV_DTYPE> \
|
||||||
|
|||||||
@@ -34,11 +34,11 @@ If you need to use those dependencies (having accepted the license terms),
|
|||||||
create a custom Dockerfile on top of the base image with an extra layer that installs them:
|
create a custom Dockerfile on top of the base image with an extra layer that installs them:
|
||||||
|
|
||||||
```Dockerfile
|
```Dockerfile
|
||||||
FROM vllm/vllm-openai:v0.8.0
|
FROM vllm/vllm-openai:v0.7.3
|
||||||
|
|
||||||
# e.g. install the `audio` and `video` optional dependencies
|
# e.g. install the `audio` and `video` optional dependencies
|
||||||
# NOTE: Make sure the version of vLLM matches the base image!
|
# NOTE: Make sure the version of vLLM matches the base image!
|
||||||
RUN uv pip install vllm[audio,video]==0.8.0
|
RUN uv pip install --system vllm[audio,video]==0.7.3
|
||||||
```
|
```
|
||||||
|
|
||||||
:::
|
:::
|
||||||
@@ -52,7 +52,7 @@ with an extra layer that installs their code from source:
|
|||||||
```Dockerfile
|
```Dockerfile
|
||||||
FROM vllm/vllm-openai:latest
|
FROM vllm/vllm-openai:latest
|
||||||
|
|
||||||
RUN uv pip install git+https://github.com/huggingface/transformers.git
|
RUN uv pip install --system git+https://github.com/huggingface/transformers.git
|
||||||
```
|
```
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|||||||
@@ -191,7 +191,7 @@ When the head block (least recently used block) of the free queue is cached, we
|
|||||||
|
|
||||||
In this example, we assume the block size is 4 (each block can cache 4 tokens), and we have 10 blocks in the KV-cache manager in total.
|
In this example, we assume the block size is 4 (each block can cache 4 tokens), and we have 10 blocks in the KV-cache manager in total.
|
||||||
|
|
||||||
**Time 1: The cache is empty and a new request comes in.** We allocate 4 blocks. 3 of them are already full and cached. The fourth block is partially full with 3 of 4 tokens.
|
**Time 1: The cache is empty and a new request comes in.** We allocate 4 blocks. 3 of them are already full and cached. The fourth block is partially full with 2 of 4 tokens.
|
||||||
|
|
||||||
:::{image} /assets/design/v1/prefix_caching/example-time-1.png
|
:::{image} /assets/design/v1/prefix_caching/example-time-1.png
|
||||||
:alt: Example Time 1
|
:alt: Example Time 1
|
||||||
@@ -203,7 +203,7 @@ In this example, we assume the block size is 4 (each block can cache 4 tokens),
|
|||||||
:alt: Example Time 3
|
:alt: Example Time 3
|
||||||
:::
|
:::
|
||||||
|
|
||||||
**Time 4: Request 1 comes in with the 14 prompt tokens, where the first 10 tokens are the same as request 0.** We can see that only the first 2 blocks (8 tokens) hit the cache, because the 3rd block only matches 2 of 4 tokens.
|
**Time 4: Request 1 comes in with the 14 prompt tokens, where the first 11 tokens are the same as request 0.** We can see that only 2 blocks (11 tokens) hit the cache, because the 3rd block only matches 3 of 4 tokens.
|
||||||
|
|
||||||
:::{image} /assets/design/v1/prefix_caching/example-time-4.png
|
:::{image} /assets/design/v1/prefix_caching/example-time-4.png
|
||||||
:alt: Example Time 4
|
:alt: Example Time 4
|
||||||
|
|||||||
@@ -2,8 +2,6 @@
|
|||||||
|
|
||||||
V1 is now enabled by default for all supported use cases, and we will gradually enable it for every use case we plan to support. Please share any feedback on [GitHub](https://github.com/vllm-project/vllm) or in the [vLLM Slack](https://inviter.co/vllm-slack).
|
V1 is now enabled by default for all supported use cases, and we will gradually enable it for every use case we plan to support. Please share any feedback on [GitHub](https://github.com/vllm-project/vllm) or in the [vLLM Slack](https://inviter.co/vllm-slack).
|
||||||
|
|
||||||
To disable V1, please set the environment variable as: `VLLM_USE_V1=0`, and send us a GitHub issue sharing the reason!
|
|
||||||
|
|
||||||
## Why vLLM V1?
|
## Why vLLM V1?
|
||||||
|
|
||||||
vLLM V0 successfully supported a wide range of models and hardware, but as new features were developed independently, the system grew increasingly complex. This complexity made it harder to integrate new capabilities and introduced technical debt, revealing the need for a more streamlined and unified design.
|
vLLM V0 successfully supported a wide range of models and hardware, but as new features were developed independently, the system grew increasingly complex. This complexity made it harder to integrate new capabilities and introduced technical debt, revealing the need for a more streamlined and unified design.
|
||||||
|
|||||||
@@ -768,7 +768,7 @@ See [this page](#generative-models) for more information on how to use generativ
|
|||||||
* `google/gemma-3-4b-it`, `google/gemma-3-27b-it`, etc.
|
* `google/gemma-3-4b-it`, `google/gemma-3-27b-it`, etc.
|
||||||
* ✅︎
|
* ✅︎
|
||||||
* ✅︎
|
* ✅︎
|
||||||
* ⚠️
|
*
|
||||||
- * `GLM4VForCausalLM`<sup>^</sup>
|
- * `GLM4VForCausalLM`<sup>^</sup>
|
||||||
* GLM-4V
|
* GLM-4V
|
||||||
* T + I
|
* T + I
|
||||||
@@ -884,7 +884,7 @@ See [this page](#generative-models) for more information on how to use generativ
|
|||||||
- * `PixtralForConditionalGeneration`
|
- * `PixtralForConditionalGeneration`
|
||||||
* Pixtral
|
* Pixtral
|
||||||
* T + I<sup>+</sup>
|
* T + I<sup>+</sup>
|
||||||
* `mistralai/Mistral-Small-3.1-24B-Instruct-2503`, `mistral-community/pixtral-12b`, etc.
|
* `mistralai/Pixtral-12B-2409`, `mistral-community/pixtral-12b`, etc.
|
||||||
*
|
*
|
||||||
* ✅︎
|
* ✅︎
|
||||||
* ✅︎
|
* ✅︎
|
||||||
@@ -951,10 +951,13 @@ V0 correctly implements the model's attention pattern:
|
|||||||
|
|
||||||
V1 currently uses a simplified attention pattern:
|
V1 currently uses a simplified attention pattern:
|
||||||
- Uses causal attention for all tokens, including image tokens
|
- Uses causal attention for all tokens, including image tokens
|
||||||
- Generates reasonable outputs but does not match the original model's attention for text + image inputs, especially when `{"do_pan_and_scan": True}`
|
- Generates reasonable outputs but does not match the original model's attention for text + image inputs
|
||||||
- Will be updated in the future to support the correct behavior
|
- Will be updated in the future to support the correct behavior
|
||||||
|
- Does not support `"do_pan_and_scan": True`
|
||||||
|
|
||||||
This limitation exists because the model's mixed attention pattern (bidirectional for images, causal otherwise) is not yet supported by vLLM's attention backends.
|
This limitation exists because the model's mixed attention pattern (bidirectional for images, causal otherwise) is not yet supported by vLLM's attention backends.
|
||||||
|
|
||||||
|
For these reasons, `Gemma3ForConditionalGeneration` is supported only on V0 at the moment.
|
||||||
:::
|
:::
|
||||||
|
|
||||||
:::{note}
|
:::{note}
|
||||||
|
|||||||
@@ -6,14 +6,14 @@ import argparse
|
|||||||
from vllm import LLM
|
from vllm import LLM
|
||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
|
|
||||||
# This script is an offline demo for running Mistral-Small-3
|
# This script is an offline demo for running Pixtral.
|
||||||
#
|
#
|
||||||
# If you want to run a server/client setup, please follow this code:
|
# If you want to run a server/client setup, please follow this code:
|
||||||
#
|
#
|
||||||
# - Server:
|
# - Server:
|
||||||
#
|
#
|
||||||
# ```bash
|
# ```bash
|
||||||
# vllm serve mistralai/Mistral-Small-3.1-24B-Instruct-2503 --tokenizer-mode mistral --limit-mm-per-prompt 'image=4' --max-model-len 16384
|
# vllm serve mistralai/Pixtral-12B-2409 --tokenizer-mode mistral --limit-mm-per-prompt 'image=4' --max-model-len 16384
|
||||||
# ```
|
# ```
|
||||||
#
|
#
|
||||||
# - Client:
|
# - Client:
|
||||||
@@ -23,7 +23,7 @@ from vllm.sampling_params import SamplingParams
|
|||||||
# --header 'Content-Type: application/json' \
|
# --header 'Content-Type: application/json' \
|
||||||
# --header 'Authorization: Bearer token' \
|
# --header 'Authorization: Bearer token' \
|
||||||
# --data '{
|
# --data '{
|
||||||
# "model": "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
|
# "model": "mistralai/Pixtral-12B-2409",
|
||||||
# "messages": [
|
# "messages": [
|
||||||
# {
|
# {
|
||||||
# "role": "user",
|
# "role": "user",
|
||||||
@@ -44,7 +44,7 @@ from vllm.sampling_params import SamplingParams
|
|||||||
|
|
||||||
|
|
||||||
def run_simple_demo(args: argparse.Namespace):
|
def run_simple_demo(args: argparse.Namespace):
|
||||||
model_name = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
|
model_name = "mistralai/Pixtral-12B-2409"
|
||||||
sampling_params = SamplingParams(max_tokens=8192)
|
sampling_params = SamplingParams(max_tokens=8192)
|
||||||
|
|
||||||
# Lower max_model_len and/or max_num_seqs on low-VRAM GPUs.
|
# Lower max_model_len and/or max_num_seqs on low-VRAM GPUs.
|
||||||
@@ -83,7 +83,7 @@ def run_simple_demo(args: argparse.Namespace):
|
|||||||
|
|
||||||
|
|
||||||
def run_advanced_demo(args: argparse.Namespace):
|
def run_advanced_demo(args: argparse.Namespace):
|
||||||
model_name = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
|
model_name = "mistralai/Pixtral-12B-2409"
|
||||||
max_img_per_msg = 5
|
max_img_per_msg = 5
|
||||||
max_tokens_per_img = 4096
|
max_tokens_per_img = 4096
|
||||||
|
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ class TestSetting:
|
|||||||
# embedding model
|
# embedding model
|
||||||
TestSetting(
|
TestSetting(
|
||||||
model="BAAI/bge-multilingual-gemma2",
|
model="BAAI/bge-multilingual-gemma2",
|
||||||
model_args=["--task", "embed", "--dtype", "bfloat16"],
|
model_args=["--task", "embed"],
|
||||||
pp_size=1,
|
pp_size=1,
|
||||||
tp_size=1,
|
tp_size=1,
|
||||||
attn_backend="FLASH_ATTN",
|
attn_backend="FLASH_ATTN",
|
||||||
|
|||||||
@@ -14,8 +14,8 @@ import torch.nn as nn
|
|||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from huggingface_hub import snapshot_download
|
from huggingface_hub import snapshot_download
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from transformers import (AutoConfig, AutoModelForCausalLM, AutoTokenizer,
|
from transformers import (AutoModelForCausalLM, AutoTokenizer, BatchEncoding,
|
||||||
BatchEncoding, BatchFeature)
|
BatchFeature)
|
||||||
from transformers.models.auto.auto_factory import _BaseAutoModelClass
|
from transformers.models.auto.auto_factory import _BaseAutoModelClass
|
||||||
|
|
||||||
from tests.models.utils import (TokensTextLogprobs,
|
from tests.models.utils import (TokensTextLogprobs,
|
||||||
@@ -23,7 +23,7 @@ from tests.models.utils import (TokensTextLogprobs,
|
|||||||
from vllm import LLM, SamplingParams
|
from vllm import LLM, SamplingParams
|
||||||
from vllm.assets.image import ImageAsset
|
from vllm.assets.image import ImageAsset
|
||||||
from vllm.assets.video import VideoAsset
|
from vllm.assets.video import VideoAsset
|
||||||
from vllm.config import TaskOption, TokenizerPoolConfig, _get_and_verify_dtype
|
from vllm.config import TaskOption, TokenizerPoolConfig
|
||||||
from vllm.connections import global_http_connection
|
from vllm.connections import global_http_connection
|
||||||
from vllm.distributed import (cleanup_dist_env_and_memory,
|
from vllm.distributed import (cleanup_dist_env_and_memory,
|
||||||
init_distributed_environment,
|
init_distributed_environment,
|
||||||
@@ -34,7 +34,8 @@ from vllm.inputs import (ExplicitEncoderDecoderPrompt, TextPrompt,
|
|||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.outputs import RequestOutput
|
from vllm.outputs import RequestOutput
|
||||||
from vllm.sampling_params import BeamSearchParams
|
from vllm.sampling_params import BeamSearchParams
|
||||||
from vllm.utils import cuda_device_count_stateless, is_list_of
|
from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, cuda_device_count_stateless,
|
||||||
|
identity, is_list_of)
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
@@ -270,18 +271,14 @@ _R = TypeVar("_R")
|
|||||||
|
|
||||||
class HfRunner:
|
class HfRunner:
|
||||||
|
|
||||||
def get_default_device(self):
|
|
||||||
from vllm.platforms import current_platform
|
|
||||||
|
|
||||||
return ("cpu" if current_platform.is_cpu()
|
|
||||||
or current_platform.is_openvino() else "cuda")
|
|
||||||
|
|
||||||
def wrap_device(self, x: _T, device: Optional[str] = None) -> _T:
|
def wrap_device(self, x: _T, device: Optional[str] = None) -> _T:
|
||||||
|
from vllm.platforms import current_platform
|
||||||
if x is None or isinstance(x, (bool, )):
|
if x is None or isinstance(x, (bool, )):
|
||||||
return x
|
return x
|
||||||
|
|
||||||
if device is None:
|
if device is None:
|
||||||
device = self.device
|
device = "cpu" if current_platform.is_cpu(
|
||||||
|
) or current_platform.is_openvino() else "cuda"
|
||||||
|
|
||||||
if isinstance(x, dict):
|
if isinstance(x, dict):
|
||||||
return {k: self.wrap_device(v, device) for k, v in x.items()}
|
return {k: self.wrap_device(v, device) for k, v in x.items()}
|
||||||
@@ -294,59 +291,45 @@ class HfRunner:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
model_name: str,
|
model_name: str,
|
||||||
dtype: str = "auto",
|
dtype: str = "half",
|
||||||
*,
|
*,
|
||||||
model_kwargs: Optional[dict[str, Any]] = None,
|
model_kwargs: Optional[dict[str, Any]] = None,
|
||||||
is_sentence_transformer: bool = False,
|
is_sentence_transformer: bool = False,
|
||||||
is_cross_encoder: bool = False,
|
is_cross_encoder: bool = False,
|
||||||
skip_tokenizer_init: bool = False,
|
skip_tokenizer_init: bool = False,
|
||||||
auto_cls: type[_BaseAutoModelClass] = AutoModelForCausalLM,
|
auto_cls: type[_BaseAutoModelClass] = AutoModelForCausalLM,
|
||||||
|
postprocess_inputs: Callable[..., BatchEncoding] = identity,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
torch_dtype = STR_DTYPE_TO_TORCH_DTYPE[dtype]
|
||||||
|
|
||||||
self.model_name = model_name
|
self.model_name = model_name
|
||||||
|
|
||||||
self.config = AutoConfig.from_pretrained(
|
|
||||||
model_name,
|
|
||||||
trust_remote_code=True,
|
|
||||||
)
|
|
||||||
self.device = self.get_default_device()
|
|
||||||
self.dtype = torch_dtype = _get_and_verify_dtype(self.config, dtype)
|
|
||||||
|
|
||||||
model_kwargs = model_kwargs if model_kwargs is not None else {}
|
|
||||||
model_kwargs.setdefault("torch_dtype", torch_dtype)
|
|
||||||
|
|
||||||
if is_sentence_transformer:
|
if is_sentence_transformer:
|
||||||
# Lazy init required for AMD CI
|
# Lazy init required for AMD CI
|
||||||
from sentence_transformers import SentenceTransformer
|
from sentence_transformers import SentenceTransformer
|
||||||
|
self.model = self.wrap_device(
|
||||||
self.model = SentenceTransformer(
|
SentenceTransformer(
|
||||||
model_name,
|
model_name,
|
||||||
device=self.device,
|
device="cpu",
|
||||||
model_kwargs=model_kwargs,
|
trust_remote_code=True,
|
||||||
trust_remote_code=True,
|
).to(dtype=torch_dtype))
|
||||||
)
|
|
||||||
elif is_cross_encoder:
|
elif is_cross_encoder:
|
||||||
# Lazy init required for AMD CI
|
# Lazy init required for AMD CI
|
||||||
from sentence_transformers import CrossEncoder
|
from sentence_transformers import CrossEncoder
|
||||||
|
self.model = CrossEncoder(model_name,
|
||||||
self.model = CrossEncoder(
|
device="cpu",
|
||||||
model_name,
|
trust_remote_code=True)
|
||||||
device=self.device,
|
self.model.model = self.wrap_device(self.model.model)\
|
||||||
automodel_args=model_kwargs,
|
.to(dtype=torch_dtype)
|
||||||
trust_remote_code=True,
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
model = auto_cls.from_pretrained(
|
model_kwargs = model_kwargs if model_kwargs is not None else {}
|
||||||
model_name,
|
self.model = self.wrap_device(
|
||||||
trust_remote_code=True,
|
auto_cls.from_pretrained(
|
||||||
**model_kwargs,
|
model_name,
|
||||||
)
|
torch_dtype=torch_dtype,
|
||||||
|
trust_remote_code=True,
|
||||||
if (getattr(model, "quantization_method", None) != "bitsandbytes"
|
**model_kwargs,
|
||||||
and len({p.device
|
))
|
||||||
for p in model.parameters()}) < 2):
|
|
||||||
model = model.to(self.device)
|
|
||||||
|
|
||||||
self.model = model
|
|
||||||
|
|
||||||
if not skip_tokenizer_init:
|
if not skip_tokenizer_init:
|
||||||
self.tokenizer = AutoTokenizer.from_pretrained(
|
self.tokenizer = AutoTokenizer.from_pretrained(
|
||||||
@@ -366,13 +349,16 @@ class HfRunner:
|
|||||||
if skip_tokenizer_init:
|
if skip_tokenizer_init:
|
||||||
self.tokenizer = self.processor.tokenizer
|
self.tokenizer = self.processor.tokenizer
|
||||||
|
|
||||||
|
self.dtype = dtype
|
||||||
|
self.postprocess_inputs = postprocess_inputs
|
||||||
|
|
||||||
def get_inputs(
|
def get_inputs(
|
||||||
self,
|
self,
|
||||||
prompts: list[str],
|
prompts: list[str],
|
||||||
images: Optional[PromptImageInput] = None,
|
images: Optional[PromptImageInput] = None,
|
||||||
videos: Optional[PromptVideoInput] = None,
|
videos: Optional[PromptVideoInput] = None,
|
||||||
audios: Optional[PromptAudioInput] = None,
|
audios: Optional[PromptAudioInput] = None,
|
||||||
) -> list[Union[BatchFeature, BatchEncoding]]:
|
) -> list[BatchEncoding]:
|
||||||
if images is not None:
|
if images is not None:
|
||||||
assert len(prompts) == len(images)
|
assert len(prompts) == len(images)
|
||||||
|
|
||||||
@@ -382,7 +368,7 @@ class HfRunner:
|
|||||||
if audios is not None:
|
if audios is not None:
|
||||||
assert len(prompts) == len(audios)
|
assert len(prompts) == len(audios)
|
||||||
|
|
||||||
all_inputs: list[Union[BatchFeature, BatchEncoding]] = []
|
all_inputs: list[BatchEncoding] = []
|
||||||
for i, prompt in enumerate(prompts):
|
for i, prompt in enumerate(prompts):
|
||||||
processor_kwargs: dict[str, Any] = {
|
processor_kwargs: dict[str, Any] = {
|
||||||
"text": prompt,
|
"text": prompt,
|
||||||
@@ -398,8 +384,7 @@ class HfRunner:
|
|||||||
processor_kwargs["sampling_rate"] = sr
|
processor_kwargs["sampling_rate"] = sr
|
||||||
|
|
||||||
inputs = self.processor(**processor_kwargs)
|
inputs = self.processor(**processor_kwargs)
|
||||||
if isinstance(inputs, BatchFeature):
|
inputs = self.postprocess_inputs(inputs, dtype=self.dtype)
|
||||||
inputs = inputs.to(dtype=self.dtype)
|
|
||||||
|
|
||||||
all_inputs.append(inputs)
|
all_inputs.append(inputs)
|
||||||
|
|
||||||
@@ -432,7 +417,7 @@ class HfRunner:
|
|||||||
outputs: list[tuple[list[list[int]], list[str]]] = []
|
outputs: list[tuple[list[list[int]], list[str]]] = []
|
||||||
for inputs in all_inputs:
|
for inputs in all_inputs:
|
||||||
output_ids = self.model.generate(
|
output_ids = self.model.generate(
|
||||||
**self.wrap_device(inputs),
|
**self.wrap_device(inputs, device=self.model.device.type),
|
||||||
use_cache=True,
|
use_cache=True,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
@@ -503,7 +488,7 @@ class HfRunner:
|
|||||||
all_logprobs: list[list[torch.Tensor]] = []
|
all_logprobs: list[list[torch.Tensor]] = []
|
||||||
for inputs in all_inputs:
|
for inputs in all_inputs:
|
||||||
output = self.model.generate(
|
output = self.model.generate(
|
||||||
**self.wrap_device(inputs),
|
**self.wrap_device(inputs, device=self.model.device.type),
|
||||||
use_cache=True,
|
use_cache=True,
|
||||||
do_sample=False,
|
do_sample=False,
|
||||||
max_new_tokens=max_tokens,
|
max_new_tokens=max_tokens,
|
||||||
@@ -584,7 +569,7 @@ class HfRunner:
|
|||||||
|
|
||||||
for inputs in all_inputs:
|
for inputs in all_inputs:
|
||||||
output = self.model.generate(
|
output = self.model.generate(
|
||||||
**self.wrap_device(inputs),
|
**self.wrap_device(inputs, device=self.model.device.type),
|
||||||
use_cache=True,
|
use_cache=True,
|
||||||
do_sample=False,
|
do_sample=False,
|
||||||
max_new_tokens=max_tokens,
|
max_new_tokens=max_tokens,
|
||||||
@@ -635,15 +620,19 @@ class HfRunner:
|
|||||||
if images is not None and images[i] is not None:
|
if images is not None and images[i] is not None:
|
||||||
processor_kwargs["images"] = images[i]
|
processor_kwargs["images"] = images[i]
|
||||||
|
|
||||||
encoder_inputs = self.processor(**processor_kwargs)
|
encoder_inputs = self.wrap_device(
|
||||||
encoder_inputs = self.wrap_device(encoder_inputs)
|
self.processor(**processor_kwargs),
|
||||||
|
device=self.model.device.type,
|
||||||
|
)
|
||||||
|
|
||||||
if decoder_prompt is None:
|
if decoder_prompt is None:
|
||||||
decoder_input_ids = None
|
decoder_input_ids = None
|
||||||
else:
|
else:
|
||||||
decoder_inputs = self.tokenizer(decoder_prompt,
|
decoder_input_ids = self.wrap_device(
|
||||||
return_tensors="pt")
|
self.tokenizer(decoder_prompt,
|
||||||
decoder_input_ids = self.wrap_device(decoder_inputs.input_ids)
|
return_tensors="pt").input_ids,
|
||||||
|
device=self.model.device.type,
|
||||||
|
)
|
||||||
|
|
||||||
output = self.model.generate(
|
output = self.model.generate(
|
||||||
decoder_input_ids=decoder_input_ids,
|
decoder_input_ids=decoder_input_ids,
|
||||||
@@ -695,7 +684,6 @@ class VllmRunner:
|
|||||||
"""
|
"""
|
||||||
The default value of some arguments have been modified from
|
The default value of some arguments have been modified from
|
||||||
:class:`~vllm.LLM` as follows:
|
:class:`~vllm.LLM` as follows:
|
||||||
|
|
||||||
- `trust_remote_code`: Set to `True` instead of `False` for convenience.
|
- `trust_remote_code`: Set to `True` instead of `False` for convenience.
|
||||||
- `seed`: Set to `0` instead of `None` for test reproducibility.
|
- `seed`: Set to `0` instead of `None` for test reproducibility.
|
||||||
- `max_model_len`: Set to `1024` instead of `None` to reduce memory usage.
|
- `max_model_len`: Set to `1024` instead of `None` to reduce memory usage.
|
||||||
@@ -713,8 +701,10 @@ class VllmRunner:
|
|||||||
tokenizer_mode: str = "auto",
|
tokenizer_mode: str = "auto",
|
||||||
trust_remote_code: bool = True,
|
trust_remote_code: bool = True,
|
||||||
seed: Optional[int] = 0,
|
seed: Optional[int] = 0,
|
||||||
|
# Use smaller max model length, otherwise bigger model cannot run due
|
||||||
|
# to kv cache size limit.
|
||||||
max_model_len: int = 1024,
|
max_model_len: int = 1024,
|
||||||
dtype: str = "auto",
|
dtype: str = "half",
|
||||||
disable_log_stats: bool = True,
|
disable_log_stats: bool = True,
|
||||||
tensor_parallel_size: int = 1,
|
tensor_parallel_size: int = 1,
|
||||||
block_size: int = 16,
|
block_size: int = 16,
|
||||||
@@ -1120,4 +1110,4 @@ def pytest_collection_modifyitems(config, items):
|
|||||||
skip_optional = pytest.mark.skip(reason="need --optional option to run")
|
skip_optional = pytest.mark.skip(reason="need --optional option to run")
|
||||||
for item in items:
|
for item in items:
|
||||||
if "optional" in item.keywords:
|
if "optional" in item.keywords:
|
||||||
item.add_marker(skip_optional)
|
item.add_marker(skip_optional)
|
||||||
@@ -64,6 +64,7 @@ def test_multi_chat():
|
|||||||
def test_chat_multi_image(image_urls: list[str]):
|
def test_chat_multi_image(image_urls: list[str]):
|
||||||
llm = LLM(
|
llm = LLM(
|
||||||
model="microsoft/Phi-3.5-vision-instruct",
|
model="microsoft/Phi-3.5-vision-instruct",
|
||||||
|
dtype="bfloat16",
|
||||||
max_model_len=4096,
|
max_model_len=4096,
|
||||||
max_num_seqs=5,
|
max_num_seqs=5,
|
||||||
enforce_eager=True,
|
enforce_eager=True,
|
||||||
|
|||||||
@@ -18,6 +18,8 @@ TEST_AUDIO_URLS = [
|
|||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def server():
|
def server():
|
||||||
args = [
|
args = [
|
||||||
|
"--dtype",
|
||||||
|
"bfloat16",
|
||||||
"--max-model-len",
|
"--max-model-len",
|
||||||
"2048",
|
"2048",
|
||||||
"--max-num-seqs",
|
"--max-num-seqs",
|
||||||
|
|||||||
@@ -24,6 +24,8 @@ def server():
|
|||||||
args = [
|
args = [
|
||||||
"--task",
|
"--task",
|
||||||
"generate",
|
"generate",
|
||||||
|
"--dtype",
|
||||||
|
"bfloat16",
|
||||||
"--max-model-len",
|
"--max-model-len",
|
||||||
"32768",
|
"32768",
|
||||||
"--max-num-seqs",
|
"--max-num-seqs",
|
||||||
|
|||||||
@@ -25,6 +25,8 @@ def server():
|
|||||||
args = [
|
args = [
|
||||||
"--task",
|
"--task",
|
||||||
"generate",
|
"generate",
|
||||||
|
"--dtype",
|
||||||
|
"bfloat16",
|
||||||
"--max-model-len",
|
"--max-model-len",
|
||||||
"2048",
|
"2048",
|
||||||
"--max-num-seqs",
|
"--max-num-seqs",
|
||||||
|
|||||||
@@ -28,6 +28,8 @@ def server():
|
|||||||
args = [
|
args = [
|
||||||
"--task",
|
"--task",
|
||||||
"embed",
|
"embed",
|
||||||
|
"--dtype",
|
||||||
|
"bfloat16",
|
||||||
"--max-model-len",
|
"--max-model-len",
|
||||||
"2048",
|
"2048",
|
||||||
"--max-num-seqs",
|
"--max-num-seqs",
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ def phi3v_model_config():
|
|||||||
tokenizer=PHI3V_MODEL_ID,
|
tokenizer=PHI3V_MODEL_ID,
|
||||||
tokenizer_mode="auto",
|
tokenizer_mode="auto",
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
dtype="auto",
|
dtype="bfloat16",
|
||||||
seed=0,
|
seed=0,
|
||||||
limit_mm_per_prompt={
|
limit_mm_per_prompt={
|
||||||
"image": 2,
|
"image": 2,
|
||||||
@@ -58,7 +58,7 @@ def mllama_model_config():
|
|||||||
tokenizer=MLLAMA_MODEL_ID,
|
tokenizer=MLLAMA_MODEL_ID,
|
||||||
tokenizer_mode="auto",
|
tokenizer_mode="auto",
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
dtype="auto",
|
dtype="bfloat16",
|
||||||
seed=0,
|
seed=0,
|
||||||
limit_mm_per_prompt={
|
limit_mm_per_prompt={
|
||||||
"image": 2,
|
"image": 2,
|
||||||
@@ -669,7 +669,7 @@ def test_multimodal_image_parsing_matches_hf(model, image_url):
|
|||||||
tokenizer=MLLAMA_MODEL_ID,
|
tokenizer=MLLAMA_MODEL_ID,
|
||||||
tokenizer_mode="auto",
|
tokenizer_mode="auto",
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
dtype="auto",
|
dtype="bfloat16",
|
||||||
seed=0,
|
seed=0,
|
||||||
limit_mm_per_prompt={
|
limit_mm_per_prompt={
|
||||||
"image": 2,
|
"image": 2,
|
||||||
|
|||||||
@@ -5,10 +5,11 @@ from typing import Optional
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pytest
|
import pytest
|
||||||
import pytest_asyncio
|
import pytest_asyncio
|
||||||
from transformers import AutoModel, AutoTokenizer
|
from transformers import AutoModel, AutoTokenizer, BatchEncoding
|
||||||
|
|
||||||
from vllm.multimodal.audio import resample_audio
|
from vllm.multimodal.audio import resample_audio
|
||||||
from vllm.sequence import SampleLogprobs
|
from vllm.sequence import SampleLogprobs
|
||||||
|
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE
|
||||||
|
|
||||||
from ....conftest import HfRunner, VllmRunner
|
from ....conftest import HfRunner, VllmRunner
|
||||||
from ....utils import RemoteOpenAIServer
|
from ....utils import RemoteOpenAIServer
|
||||||
@@ -106,6 +107,8 @@ def run_test(
|
|||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
"""Inference result should be the same between hf and vllm."""
|
"""Inference result should be the same between hf and vllm."""
|
||||||
|
torch_dtype = STR_DTYPE_TO_TORCH_DTYPE[dtype]
|
||||||
|
|
||||||
# NOTE: take care of the order. run vLLM first, and then run HF.
|
# NOTE: take care of the order. run vLLM first, and then run HF.
|
||||||
# vLLM needs a fresh new process without cuda initialization.
|
# vLLM needs a fresh new process without cuda initialization.
|
||||||
# if we run HF first, the cuda initialization will be done and it
|
# if we run HF first, the cuda initialization will be done and it
|
||||||
@@ -121,7 +124,15 @@ def run_test(
|
|||||||
for vllm_prompt, _, audio in prompts_and_audios
|
for vllm_prompt, _, audio in prompts_and_audios
|
||||||
]
|
]
|
||||||
|
|
||||||
with hf_runner(model, dtype=dtype, auto_cls=AutoModel) as hf_model:
|
def process(hf_inputs: BatchEncoding, **kwargs):
|
||||||
|
hf_inputs["audio_values"] = hf_inputs["audio_values"] \
|
||||||
|
.to(torch_dtype) # type: ignore
|
||||||
|
return hf_inputs
|
||||||
|
|
||||||
|
with hf_runner(model,
|
||||||
|
dtype=dtype,
|
||||||
|
postprocess_inputs=process,
|
||||||
|
auto_cls=AutoModel) as hf_model:
|
||||||
hf_outputs_per_audio = [
|
hf_outputs_per_audio = [
|
||||||
hf_model.generate_greedy_logprobs_limit(
|
hf_model.generate_greedy_logprobs_limit(
|
||||||
[hf_prompt],
|
[hf_prompt],
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ from pathlib import PosixPath
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from packaging.version import Version
|
from packaging.version import Version
|
||||||
from transformers import AutoModelForImageTextToText, AutoModelForVision2Seq
|
from transformers import AutoModelForPreTraining, AutoModelForVision2Seq
|
||||||
from transformers import __version__ as TRANSFORMERS_VERSION
|
from transformers import __version__ as TRANSFORMERS_VERSION
|
||||||
|
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
@@ -101,7 +101,7 @@ VLM_TEST_SETTINGS = {
|
|||||||
prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
|
prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
|
||||||
convert_assets_to_embeddings=model_utils.get_llava_embeddings,
|
convert_assets_to_embeddings=model_utils.get_llava_embeddings,
|
||||||
max_model_len=4096,
|
max_model_len=4096,
|
||||||
auto_cls=AutoModelForImageTextToText,
|
auto_cls=AutoModelForVision2Seq,
|
||||||
vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
|
vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
|
||||||
custom_test_opts=[CustomTestOptions(
|
custom_test_opts=[CustomTestOptions(
|
||||||
inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
|
inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
|
||||||
@@ -121,7 +121,10 @@ VLM_TEST_SETTINGS = {
|
|||||||
"stop_sign": "caption es",
|
"stop_sign": "caption es",
|
||||||
"cherry_blossom": "What is in the picture?",
|
"cherry_blossom": "What is in the picture?",
|
||||||
}),
|
}),
|
||||||
auto_cls=AutoModelForImageTextToText,
|
auto_cls=AutoModelForVision2Seq,
|
||||||
|
postprocess_inputs=model_utils.cast_dtype_post_processor(
|
||||||
|
"pixel_values"
|
||||||
|
),
|
||||||
vllm_output_post_proc=model_utils.paligemma_vllm_to_hf_output,
|
vllm_output_post_proc=model_utils.paligemma_vllm_to_hf_output,
|
||||||
dtype="bfloat16",
|
dtype="bfloat16",
|
||||||
marks=[pytest.mark.skip(reason="vLLM does not support PrefixLM attention mask")], # noqa: E501
|
marks=[pytest.mark.skip(reason="vLLM does not support PrefixLM attention mask")], # noqa: E501
|
||||||
@@ -176,6 +179,7 @@ VLM_TEST_SETTINGS = {
|
|||||||
# "cherry_blossom": "<vlm_image>Please infer the season with reason.", # noqa: E501
|
# "cherry_blossom": "<vlm_image>Please infer the season with reason.", # noqa: E501
|
||||||
# }),
|
# }),
|
||||||
# multi_image_prompt="<vlm_image><vlm_image>Describe the two images shortly.", # noqa: E501
|
# multi_image_prompt="<vlm_image><vlm_image>Describe the two images shortly.", # noqa: E501
|
||||||
|
# postprocess_inputs=model_utils.cast_dtype_post_processor("pixel_values"), # noqa: E501
|
||||||
# stop_str=["<|im_end|>"],
|
# stop_str=["<|im_end|>"],
|
||||||
# image_size_factors=[(0.10, 0.15)],
|
# image_size_factors=[(0.10, 0.15)],
|
||||||
# max_tokens=64,
|
# max_tokens=64,
|
||||||
@@ -186,7 +190,7 @@ VLM_TEST_SETTINGS = {
|
|||||||
test_type=VLMTestType.IMAGE,
|
test_type=VLMTestType.IMAGE,
|
||||||
prompt_formatter=lambda img_prompt: f"Question: {img_prompt} Answer:",
|
prompt_formatter=lambda img_prompt: f"Question: {img_prompt} Answer:",
|
||||||
img_idx_to_prompt=lambda idx: "",
|
img_idx_to_prompt=lambda idx: "",
|
||||||
auto_cls=AutoModelForImageTextToText,
|
auto_cls=AutoModelForVision2Seq,
|
||||||
vllm_output_post_proc=model_utils.blip2_vllm_to_hf_output,
|
vllm_output_post_proc=model_utils.blip2_vllm_to_hf_output,
|
||||||
),
|
),
|
||||||
"chameleon": VLMTestInfo(
|
"chameleon": VLMTestInfo(
|
||||||
@@ -195,7 +199,10 @@ VLM_TEST_SETTINGS = {
|
|||||||
prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
|
prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
|
||||||
max_model_len=4096,
|
max_model_len=4096,
|
||||||
max_num_seqs=2,
|
max_num_seqs=2,
|
||||||
auto_cls=AutoModelForImageTextToText,
|
auto_cls=AutoModelForVision2Seq,
|
||||||
|
postprocess_inputs=model_utils.cast_dtype_post_processor(
|
||||||
|
"pixel_values"
|
||||||
|
),
|
||||||
# For chameleon, we only compare the sequences
|
# For chameleon, we only compare the sequences
|
||||||
vllm_output_post_proc = lambda vllm_output, model: vllm_output[:2],
|
vllm_output_post_proc = lambda vllm_output, model: vllm_output[:2],
|
||||||
hf_output_post_proc = lambda hf_output, model: hf_output[:2],
|
hf_output_post_proc = lambda hf_output, model: hf_output[:2],
|
||||||
@@ -215,6 +222,7 @@ VLM_TEST_SETTINGS = {
|
|||||||
}),
|
}),
|
||||||
multi_image_prompt="image_1:<image>\nimage_2:<image>\nWhich image can we see the car and the tower?", # noqa: E501
|
multi_image_prompt="image_1:<image>\nimage_2:<image>\nWhich image can we see the car and the tower?", # noqa: E501
|
||||||
patch_hf_runner=model_utils.deepseekvl2_patch_hf_runner,
|
patch_hf_runner=model_utils.deepseekvl2_patch_hf_runner,
|
||||||
|
postprocess_inputs=model_utils.cast_dtype_post_processor("images"),
|
||||||
hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output,
|
hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output,
|
||||||
stop_str=["<|end▁of▁sentence|>", "<|begin▁of▁sentence|>"], # noqa: E501
|
stop_str=["<|end▁of▁sentence|>", "<|begin▁of▁sentence|>"], # noqa: E501
|
||||||
image_size_factors=[(), (1.0, ), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)],
|
image_size_factors=[(), (1.0, ), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)],
|
||||||
@@ -232,7 +240,6 @@ VLM_TEST_SETTINGS = {
|
|||||||
img_idx_to_prompt=lambda idx: "",
|
img_idx_to_prompt=lambda idx: "",
|
||||||
max_model_len=2048,
|
max_model_len=2048,
|
||||||
max_num_seqs=2,
|
max_num_seqs=2,
|
||||||
auto_cls=AutoModelForImageTextToText,
|
|
||||||
use_tokenizer_eos=True,
|
use_tokenizer_eos=True,
|
||||||
vllm_output_post_proc=model_utils.fuyu_vllm_to_hf_output,
|
vllm_output_post_proc=model_utils.fuyu_vllm_to_hf_output,
|
||||||
num_logprobs=10,
|
num_logprobs=10,
|
||||||
@@ -249,7 +256,9 @@ VLM_TEST_SETTINGS = {
|
|||||||
multi_image_prompt="<start_of_image><start_of_image>Describe the two images in detail.", # noqa: E501
|
multi_image_prompt="<start_of_image><start_of_image>Describe the two images in detail.", # noqa: E501
|
||||||
max_model_len=4096,
|
max_model_len=4096,
|
||||||
max_num_seqs=2,
|
max_num_seqs=2,
|
||||||
auto_cls=AutoModelForImageTextToText,
|
# TODO: Use AutoModelForVision2Seq once transformers supports this
|
||||||
|
auto_cls=AutoModelForPreTraining,
|
||||||
|
dtype="bfloat16",
|
||||||
vllm_runner_kwargs={"mm_processor_kwargs": {"do_pan_and_scan": True}},
|
vllm_runner_kwargs={"mm_processor_kwargs": {"do_pan_and_scan": True}},
|
||||||
patch_hf_runner=model_utils.gemma3_patch_hf_runner,
|
patch_hf_runner=model_utils.gemma3_patch_hf_runner,
|
||||||
),
|
),
|
||||||
@@ -263,6 +272,7 @@ VLM_TEST_SETTINGS = {
|
|||||||
}),
|
}),
|
||||||
max_model_len=2048,
|
max_model_len=2048,
|
||||||
max_num_seqs=2,
|
max_num_seqs=2,
|
||||||
|
dtype="bfloat16",
|
||||||
get_stop_token_ids=lambda tok: [151329, 151336, 151338],
|
get_stop_token_ids=lambda tok: [151329, 151336, 151338],
|
||||||
patch_hf_runner=model_utils.glm4v_patch_hf_runner,
|
patch_hf_runner=model_utils.glm4v_patch_hf_runner,
|
||||||
# The image embeddings match with HF but the outputs of the language
|
# The image embeddings match with HF but the outputs of the language
|
||||||
@@ -285,6 +295,7 @@ VLM_TEST_SETTINGS = {
|
|||||||
}),
|
}),
|
||||||
multi_image_prompt="Image-1: <image>\nImage-2: <image>\nDescribe the two images in short.", # noqa: E501
|
multi_image_prompt="Image-1: <image>\nImage-2: <image>\nDescribe the two images in short.", # noqa: E501
|
||||||
max_model_len=8192,
|
max_model_len=8192,
|
||||||
|
dtype="bfloat16",
|
||||||
use_tokenizer_eos=True,
|
use_tokenizer_eos=True,
|
||||||
num_logprobs=10,
|
num_logprobs=10,
|
||||||
patch_hf_runner=model_utils.h2ovl_patch_hf_runner,
|
patch_hf_runner=model_utils.h2ovl_patch_hf_runner,
|
||||||
@@ -296,7 +307,7 @@ VLM_TEST_SETTINGS = {
|
|||||||
img_idx_to_prompt=lambda idx: "<image>",
|
img_idx_to_prompt=lambda idx: "<image>",
|
||||||
max_model_len=8192,
|
max_model_len=8192,
|
||||||
max_num_seqs=2,
|
max_num_seqs=2,
|
||||||
auto_cls=AutoModelForImageTextToText,
|
auto_cls=AutoModelForVision2Seq,
|
||||||
hf_output_post_proc=model_utils.idefics3_trunc_hf_output,
|
hf_output_post_proc=model_utils.idefics3_trunc_hf_output,
|
||||||
),
|
),
|
||||||
"intern_vl": VLMTestInfo(
|
"intern_vl": VLMTestInfo(
|
||||||
@@ -313,6 +324,10 @@ VLM_TEST_SETTINGS = {
|
|||||||
}),
|
}),
|
||||||
multi_image_prompt="Image-1: <image>\nImage-2: <image>\nDescribe the two images in short.", # noqa: E501
|
multi_image_prompt="Image-1: <image>\nImage-2: <image>\nDescribe the two images in short.", # noqa: E501
|
||||||
max_model_len=4096,
|
max_model_len=4096,
|
||||||
|
# NOTE: Mono-InternVL-2B doesn't work with fp16,
|
||||||
|
# it will result NaN during inference.
|
||||||
|
# See: https://huggingface.co/OpenGVLab/Mono-InternVL-2B/discussions/9
|
||||||
|
dtype="bfloat16",
|
||||||
use_tokenizer_eos=True,
|
use_tokenizer_eos=True,
|
||||||
patch_hf_runner=model_utils.internvl_patch_hf_runner,
|
patch_hf_runner=model_utils.internvl_patch_hf_runner,
|
||||||
),
|
),
|
||||||
@@ -321,7 +336,7 @@ VLM_TEST_SETTINGS = {
|
|||||||
test_type=(VLMTestType.IMAGE, VLMTestType.CUSTOM_INPUTS),
|
test_type=(VLMTestType.IMAGE, VLMTestType.CUSTOM_INPUTS),
|
||||||
prompt_formatter=lambda img_prompt: f"[INST] {img_prompt} [/INST]",
|
prompt_formatter=lambda img_prompt: f"[INST] {img_prompt} [/INST]",
|
||||||
max_model_len=10240,
|
max_model_len=10240,
|
||||||
auto_cls=AutoModelForImageTextToText,
|
auto_cls=AutoModelForVision2Seq,
|
||||||
vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
|
vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
|
||||||
custom_test_opts=[CustomTestOptions(
|
custom_test_opts=[CustomTestOptions(
|
||||||
inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
|
inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
|
||||||
@@ -336,6 +351,9 @@ VLM_TEST_SETTINGS = {
|
|||||||
prompt_formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
|
prompt_formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
|
||||||
num_video_frames=16,
|
num_video_frames=16,
|
||||||
max_model_len=16384,
|
max_model_len=16384,
|
||||||
|
postprocess_inputs=model_utils.cast_dtype_post_processor(
|
||||||
|
"pixel_values_videos"
|
||||||
|
),
|
||||||
auto_cls=AutoModelForVision2Seq,
|
auto_cls=AutoModelForVision2Seq,
|
||||||
vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
|
vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
|
||||||
custom_test_opts=[CustomTestOptions(
|
custom_test_opts=[CustomTestOptions(
|
||||||
@@ -360,8 +378,11 @@ VLM_TEST_SETTINGS = {
|
|||||||
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
|
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
|
||||||
prompt_formatter=lambda img_prompt: f"<|start_header_id|>user<|end_header_id|>\n\n{img_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", # noqa: E501
|
prompt_formatter=lambda img_prompt: f"<|start_header_id|>user<|end_header_id|>\n\n{img_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", # noqa: E501
|
||||||
max_model_len=4096,
|
max_model_len=4096,
|
||||||
|
postprocess_inputs=model_utils.cast_dtype_post_processor(
|
||||||
|
"pixel_values"
|
||||||
|
),
|
||||||
get_stop_token_ids=lambda tok: [128009],
|
get_stop_token_ids=lambda tok: [128009],
|
||||||
auto_cls=AutoModelForImageTextToText,
|
auto_cls=AutoModelForVision2Seq,
|
||||||
vllm_output_post_proc=model_utils.mantis_vllm_to_hf_output,
|
vllm_output_post_proc=model_utils.mantis_vllm_to_hf_output,
|
||||||
patch_hf_runner=model_utils.mantis_patch_hf_runner,
|
patch_hf_runner=model_utils.mantis_patch_hf_runner,
|
||||||
marks=[
|
marks=[
|
||||||
@@ -379,8 +400,8 @@ VLM_TEST_SETTINGS = {
|
|||||||
max_model_len=4096,
|
max_model_len=4096,
|
||||||
max_num_seqs=2,
|
max_num_seqs=2,
|
||||||
get_stop_token_ids=lambda tok: [tok.eos_id, tok.eot_id],
|
get_stop_token_ids=lambda tok: [tok.eos_id, tok.eot_id],
|
||||||
|
postprocess_inputs=model_utils.wrap_inputs_post_processor,
|
||||||
hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
|
hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
|
||||||
patch_hf_runner=model_utils.minicpmv_25_patch_hf_runner,
|
|
||||||
),
|
),
|
||||||
"minicpmo_26": VLMTestInfo(
|
"minicpmo_26": VLMTestInfo(
|
||||||
models=["openbmb/MiniCPM-o-2_6"],
|
models=["openbmb/MiniCPM-o-2_6"],
|
||||||
@@ -390,8 +411,11 @@ VLM_TEST_SETTINGS = {
|
|||||||
max_model_len=4096,
|
max_model_len=4096,
|
||||||
max_num_seqs=2,
|
max_num_seqs=2,
|
||||||
get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(['<|im_end|>', '<|endoftext|>']), # noqa: E501
|
get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(['<|im_end|>', '<|endoftext|>']), # noqa: E501
|
||||||
|
postprocess_inputs=model_utils.ignore_inputs_post_processor(
|
||||||
|
"image_sizes"
|
||||||
|
),
|
||||||
hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
|
hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
|
||||||
patch_hf_runner=model_utils.minicpmo_26_patch_hf_runner,
|
patch_hf_runner=model_utils.minicpmo_patch_hf_runner
|
||||||
),
|
),
|
||||||
"minicpmv_26": VLMTestInfo(
|
"minicpmv_26": VLMTestInfo(
|
||||||
models=["openbmb/MiniCPM-V-2_6"],
|
models=["openbmb/MiniCPM-V-2_6"],
|
||||||
@@ -401,8 +425,10 @@ VLM_TEST_SETTINGS = {
|
|||||||
max_model_len=4096,
|
max_model_len=4096,
|
||||||
max_num_seqs=2,
|
max_num_seqs=2,
|
||||||
get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(['<|im_end|>', '<|endoftext|>']), # noqa: E501
|
get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(['<|im_end|>', '<|endoftext|>']), # noqa: E501
|
||||||
|
postprocess_inputs=model_utils.ignore_inputs_post_processor(
|
||||||
|
"image_sizes"
|
||||||
|
),
|
||||||
hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
|
hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
|
||||||
patch_hf_runner=model_utils.minicpmv_26_patch_hf_runner,
|
|
||||||
),
|
),
|
||||||
"molmo": VLMTestInfo(
|
"molmo": VLMTestInfo(
|
||||||
models=["allenai/Molmo-7B-D-0924"],
|
models=["allenai/Molmo-7B-D-0924"],
|
||||||
@@ -411,6 +437,7 @@ VLM_TEST_SETTINGS = {
|
|||||||
max_model_len=4096,
|
max_model_len=4096,
|
||||||
max_num_seqs=2,
|
max_num_seqs=2,
|
||||||
patch_hf_runner=model_utils.molmo_patch_hf_runner,
|
patch_hf_runner=model_utils.molmo_patch_hf_runner,
|
||||||
|
postprocess_inputs=model_utils.molmo_post_processor,
|
||||||
),
|
),
|
||||||
# Tests for phi3v currently live in another file because of a bug in
|
# Tests for phi3v currently live in another file because of a bug in
|
||||||
# transformers. Once this issue is fixed, we can enable them here instead.
|
# transformers. Once this issue is fixed, we can enable them here instead.
|
||||||
@@ -436,7 +463,7 @@ VLM_TEST_SETTINGS = {
|
|||||||
img_idx_to_prompt=lambda idx: "[IMG]",
|
img_idx_to_prompt=lambda idx: "[IMG]",
|
||||||
max_model_len=8192,
|
max_model_len=8192,
|
||||||
max_num_seqs=2,
|
max_num_seqs=2,
|
||||||
auto_cls=AutoModelForImageTextToText,
|
auto_cls=AutoModelForVision2Seq,
|
||||||
marks=[large_gpu_mark(min_gb=48)],
|
marks=[large_gpu_mark(min_gb=48)],
|
||||||
),
|
),
|
||||||
"qwen_vl": VLMTestInfo(
|
"qwen_vl": VLMTestInfo(
|
||||||
@@ -454,7 +481,10 @@ VLM_TEST_SETTINGS = {
|
|||||||
models=["facebook/chameleon-7b"],
|
models=["facebook/chameleon-7b"],
|
||||||
prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
|
prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
|
||||||
max_model_len=4096,
|
max_model_len=4096,
|
||||||
auto_cls=AutoModelForImageTextToText,
|
auto_cls=AutoModelForVision2Seq,
|
||||||
|
postprocess_inputs=model_utils.cast_dtype_post_processor(
|
||||||
|
"pixel_values"
|
||||||
|
),
|
||||||
vllm_output_post_proc = lambda vllm_output, model: vllm_output[:2],
|
vllm_output_post_proc = lambda vllm_output, model: vllm_output[:2],
|
||||||
hf_output_post_proc = lambda hf_output, model: hf_output[:2],
|
hf_output_post_proc = lambda hf_output, model: hf_output[:2],
|
||||||
comparator=check_outputs_equal,
|
comparator=check_outputs_equal,
|
||||||
@@ -465,7 +495,7 @@ VLM_TEST_SETTINGS = {
|
|||||||
models=["llava-hf/llava-1.5-7b-hf"],
|
models=["llava-hf/llava-1.5-7b-hf"],
|
||||||
prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
|
prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
|
||||||
max_model_len=4096,
|
max_model_len=4096,
|
||||||
auto_cls=AutoModelForImageTextToText,
|
auto_cls=AutoModelForVision2Seq,
|
||||||
vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
|
vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
|
||||||
marks=multi_gpu_marks(num_gpus=2),
|
marks=multi_gpu_marks(num_gpus=2),
|
||||||
**COMMON_BROADCAST_SETTINGS # type: ignore
|
**COMMON_BROADCAST_SETTINGS # type: ignore
|
||||||
@@ -474,7 +504,7 @@ VLM_TEST_SETTINGS = {
|
|||||||
models=["llava-hf/llava-v1.6-mistral-7b-hf"],
|
models=["llava-hf/llava-v1.6-mistral-7b-hf"],
|
||||||
prompt_formatter=lambda img_prompt: f"[INST] {img_prompt} [/INST]",
|
prompt_formatter=lambda img_prompt: f"[INST] {img_prompt} [/INST]",
|
||||||
max_model_len=10240,
|
max_model_len=10240,
|
||||||
auto_cls=AutoModelForImageTextToText,
|
auto_cls=AutoModelForVision2Seq,
|
||||||
vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
|
vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
|
||||||
marks=multi_gpu_marks(num_gpus=2),
|
marks=multi_gpu_marks(num_gpus=2),
|
||||||
**COMMON_BROADCAST_SETTINGS # type: ignore
|
**COMMON_BROADCAST_SETTINGS # type: ignore
|
||||||
@@ -499,6 +529,9 @@ VLM_TEST_SETTINGS = {
|
|||||||
test_type=VLMTestType.CUSTOM_INPUTS,
|
test_type=VLMTestType.CUSTOM_INPUTS,
|
||||||
max_model_len=16384,
|
max_model_len=16384,
|
||||||
max_num_seqs=2,
|
max_num_seqs=2,
|
||||||
|
postprocess_inputs=model_utils.cast_dtype_post_processor(
|
||||||
|
"pixel_values"
|
||||||
|
),
|
||||||
auto_cls=AutoModelForVision2Seq,
|
auto_cls=AutoModelForVision2Seq,
|
||||||
vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
|
vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
|
||||||
custom_test_opts=[CustomTestOptions(
|
custom_test_opts=[CustomTestOptions(
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
Run `pytest tests/models/test_mistral.py`.
|
Run `pytest tests/models/test_mistral.py`.
|
||||||
"""
|
"""
|
||||||
import json
|
import json
|
||||||
|
import uuid
|
||||||
from dataclasses import asdict
|
from dataclasses import asdict
|
||||||
from typing import TYPE_CHECKING, Any, Optional
|
from typing import TYPE_CHECKING, Any, Optional
|
||||||
|
|
||||||
@@ -15,7 +16,8 @@ from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
|
|||||||
from mistral_common.tokens.tokenizers.multimodal import image_from_chunk
|
from mistral_common.tokens.tokenizers.multimodal import image_from_chunk
|
||||||
from transformers import AutoProcessor
|
from transformers import AutoProcessor
|
||||||
|
|
||||||
from vllm import RequestOutput, SamplingParams, TextPrompt, TokensPrompt
|
from vllm import (EngineArgs, LLMEngine, RequestOutput, SamplingParams,
|
||||||
|
TextPrompt, TokensPrompt)
|
||||||
from vllm.multimodal import MultiModalDataBuiltins
|
from vllm.multimodal import MultiModalDataBuiltins
|
||||||
from vllm.multimodal.inputs import PlaceholderRange
|
from vllm.multimodal.inputs import PlaceholderRange
|
||||||
from vllm.sequence import Logprob, SampleLogprobs
|
from vllm.sequence import Logprob, SampleLogprobs
|
||||||
@@ -26,11 +28,7 @@ from ...utils import check_logprobs_close
|
|||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from _typeshed import StrPath
|
from _typeshed import StrPath
|
||||||
|
|
||||||
PIXTRAL_ID = "mistralai/Pixtral-12B-2409"
|
MODELS = ["mistralai/Pixtral-12B-2409"]
|
||||||
MISTRAL_SMALL_3_1_ID = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
|
|
||||||
|
|
||||||
MODELS = [PIXTRAL_ID, MISTRAL_SMALL_3_1_ID]
|
|
||||||
|
|
||||||
IMG_URLS = [
|
IMG_URLS = [
|
||||||
"https://picsum.photos/id/237/400/300",
|
"https://picsum.photos/id/237/400/300",
|
||||||
"https://picsum.photos/id/231/200/300",
|
"https://picsum.photos/id/231/200/300",
|
||||||
@@ -127,10 +125,8 @@ MAX_MODEL_LEN = [8192, 65536]
|
|||||||
FIXTURES_PATH = VLLM_PATH / "tests/models/fixtures"
|
FIXTURES_PATH = VLLM_PATH / "tests/models/fixtures"
|
||||||
assert FIXTURES_PATH.exists()
|
assert FIXTURES_PATH.exists()
|
||||||
|
|
||||||
FIXTURE_LOGPROBS_CHAT = {
|
FIXTURE_LOGPROBS_CHAT = FIXTURES_PATH / "pixtral_chat.json"
|
||||||
PIXTRAL_ID: FIXTURES_PATH / "pixtral_chat.json",
|
FIXTURE_LOGPROBS_ENGINE = FIXTURES_PATH / "pixtral_chat_engine.json"
|
||||||
MISTRAL_SMALL_3_1_ID: FIXTURES_PATH / "mistral_small_3_chat.json",
|
|
||||||
}
|
|
||||||
|
|
||||||
OutputsLogprobs = list[tuple[list[int], str, Optional[SampleLogprobs]]]
|
OutputsLogprobs = list[tuple[list[int], str, Optional[SampleLogprobs]]]
|
||||||
|
|
||||||
@@ -170,12 +166,12 @@ def test_chat(
|
|||||||
model: str,
|
model: str,
|
||||||
dtype: str,
|
dtype: str,
|
||||||
) -> None:
|
) -> None:
|
||||||
EXPECTED_CHAT_LOGPROBS = load_outputs_w_logprobs(
|
EXPECTED_CHAT_LOGPROBS = load_outputs_w_logprobs(FIXTURE_LOGPROBS_CHAT)
|
||||||
FIXTURE_LOGPROBS_CHAT[model])
|
|
||||||
with vllm_runner(
|
with vllm_runner(
|
||||||
model,
|
model,
|
||||||
dtype=dtype,
|
dtype=dtype,
|
||||||
tokenizer_mode="mistral",
|
tokenizer_mode="mistral",
|
||||||
|
enable_chunked_prefill=False,
|
||||||
max_model_len=max_model_len,
|
max_model_len=max_model_len,
|
||||||
limit_mm_per_prompt=LIMIT_MM_PER_PROMPT,
|
limit_mm_per_prompt=LIMIT_MM_PER_PROMPT,
|
||||||
) as vllm_model:
|
) as vllm_model:
|
||||||
@@ -187,40 +183,70 @@ def test_chat(
|
|||||||
outputs.extend(output)
|
outputs.extend(output)
|
||||||
|
|
||||||
logprobs = vllm_runner._final_steps_generate_w_logprobs(outputs)
|
logprobs = vllm_runner._final_steps_generate_w_logprobs(outputs)
|
||||||
# Remove last `None` prompt_logprobs to compare with fixture
|
|
||||||
for i in range(len(logprobs)):
|
|
||||||
assert logprobs[i][-1] is None
|
|
||||||
logprobs[i] = logprobs[i][:-1]
|
|
||||||
check_logprobs_close(outputs_0_lst=EXPECTED_CHAT_LOGPROBS,
|
check_logprobs_close(outputs_0_lst=EXPECTED_CHAT_LOGPROBS,
|
||||||
outputs_1_lst=logprobs,
|
outputs_1_lst=logprobs,
|
||||||
name_0="h100_ref",
|
name_0="h100_ref",
|
||||||
name_1="output")
|
name_1="output")
|
||||||
|
|
||||||
|
|
||||||
|
@large_gpu_test(min_gb=80)
|
||||||
|
@pytest.mark.parametrize("model", MODELS)
|
||||||
|
@pytest.mark.parametrize("dtype", ["bfloat16"])
|
||||||
|
def test_model_engine(vllm_runner, model: str, dtype: str) -> None:
|
||||||
|
EXPECTED_ENGINE_LOGPROBS = load_outputs_w_logprobs(FIXTURE_LOGPROBS_ENGINE)
|
||||||
|
args = EngineArgs(
|
||||||
|
model=model,
|
||||||
|
tokenizer_mode="mistral",
|
||||||
|
enable_chunked_prefill=False,
|
||||||
|
limit_mm_per_prompt=LIMIT_MM_PER_PROMPT,
|
||||||
|
dtype=dtype,
|
||||||
|
)
|
||||||
|
engine = LLMEngine.from_engine_args(args)
|
||||||
|
|
||||||
|
engine.add_request(uuid.uuid4().hex, ENGINE_INPUTS[0], SAMPLING_PARAMS)
|
||||||
|
engine.add_request(uuid.uuid4().hex, ENGINE_INPUTS[1], SAMPLING_PARAMS)
|
||||||
|
|
||||||
|
outputs = []
|
||||||
|
count = 0
|
||||||
|
while True:
|
||||||
|
out = engine.step()
|
||||||
|
count += 1
|
||||||
|
for request_output in out:
|
||||||
|
if request_output.finished:
|
||||||
|
outputs.append(request_output)
|
||||||
|
|
||||||
|
if count == 2:
|
||||||
|
engine.add_request(uuid.uuid4().hex, ENGINE_INPUTS[2],
|
||||||
|
SAMPLING_PARAMS)
|
||||||
|
if not engine.has_unfinished_requests():
|
||||||
|
break
|
||||||
|
|
||||||
|
logprobs = vllm_runner._final_steps_generate_w_logprobs(outputs)
|
||||||
|
check_logprobs_close(outputs_0_lst=EXPECTED_ENGINE_LOGPROBS,
|
||||||
|
outputs_1_lst=logprobs,
|
||||||
|
name_0="h100_ref",
|
||||||
|
name_1="output")
|
||||||
|
|
||||||
|
|
||||||
@large_gpu_test(min_gb=48)
|
@large_gpu_test(min_gb=48)
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"prompt,expected_ranges",
|
"prompt,expected_ranges",
|
||||||
[(_create_engine_inputs_hf(IMG_URLS[:1]), [{
|
[(_create_engine_inputs_hf(IMG_URLS[:1]), [{
|
||||||
"offset": 11,
|
"offset": 10,
|
||||||
"length": 494
|
"length": 494
|
||||||
}]),
|
}]),
|
||||||
(_create_engine_inputs_hf(IMG_URLS[1:4]), [{
|
(_create_engine_inputs_hf(IMG_URLS[1:4]), [{
|
||||||
"offset": 11,
|
"offset": 10,
|
||||||
"length": 266
|
"length": 266
|
||||||
}, {
|
}, {
|
||||||
"offset": 277,
|
"offset": 276,
|
||||||
"length": 1056
|
"length": 1056
|
||||||
}, {
|
}, {
|
||||||
"offset": 1333,
|
"offset": 1332,
|
||||||
"length": 418
|
"length": 418
|
||||||
}])])
|
}])])
|
||||||
def test_multi_modal_placeholders(vllm_runner, prompt,
|
def test_multi_modal_placeholders(
|
||||||
expected_ranges: list[PlaceholderRange],
|
vllm_runner, prompt, expected_ranges: list[PlaceholderRange]) -> None:
|
||||||
monkeypatch) -> None:
|
|
||||||
|
|
||||||
# This placeholder checking test only works with V0 engine
|
|
||||||
# where `multi_modal_placeholders` is returned with `RequestOutput`
|
|
||||||
monkeypatch.setenv("VLLM_USE_V1", "0")
|
|
||||||
with vllm_runner(
|
with vllm_runner(
|
||||||
"mistral-community/pixtral-12b",
|
"mistral-community/pixtral-12b",
|
||||||
max_model_len=8192,
|
max_model_len=8192,
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ from typing import Any, Callable, Optional, Union
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
from PIL.Image import Image
|
from PIL.Image import Image
|
||||||
|
from transformers import BatchEncoding
|
||||||
from transformers.models.auto.auto_factory import _BaseAutoModelClass
|
from transformers.models.auto.auto_factory import _BaseAutoModelClass
|
||||||
|
|
||||||
from vllm.config import TaskOption
|
from vllm.config import TaskOption
|
||||||
@@ -30,6 +31,7 @@ def run_test(
|
|||||||
vllm_output_post_proc: Optional[Callable[[RunnerOutput, str], Any]],
|
vllm_output_post_proc: Optional[Callable[[RunnerOutput, str], Any]],
|
||||||
auto_cls: type[_BaseAutoModelClass],
|
auto_cls: type[_BaseAutoModelClass],
|
||||||
use_tokenizer_eos: bool,
|
use_tokenizer_eos: bool,
|
||||||
|
postprocess_inputs: Callable[[BatchEncoding], BatchEncoding],
|
||||||
comparator: Callable[..., None],
|
comparator: Callable[..., None],
|
||||||
get_stop_token_ids: Optional[Callable[[AnyTokenizer], list[int]]],
|
get_stop_token_ids: Optional[Callable[[AnyTokenizer], list[int]]],
|
||||||
stop_str: Optional[list[str]],
|
stop_str: Optional[list[str]],
|
||||||
@@ -99,6 +101,7 @@ def run_test(
|
|||||||
hf_model = hf_runner(model,
|
hf_model = hf_runner(model,
|
||||||
dtype=dtype,
|
dtype=dtype,
|
||||||
auto_cls=auto_cls,
|
auto_cls=auto_cls,
|
||||||
|
postprocess_inputs=postprocess_inputs,
|
||||||
model_kwargs=hf_model_kwargs)
|
model_kwargs=hf_model_kwargs)
|
||||||
|
|
||||||
# Some models need to patch things like the model processor, e.g., internvl
|
# Some models need to patch things like the model processor, e.g., internvl
|
||||||
|
|||||||
@@ -6,15 +6,16 @@ typically specific to a small subset of models.
|
|||||||
import re
|
import re
|
||||||
import types
|
import types
|
||||||
from pathlib import PosixPath
|
from pathlib import PosixPath
|
||||||
from typing import Optional, Union
|
from typing import Callable, Optional, Union
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from PIL.Image import Image
|
from PIL.Image import Image
|
||||||
from transformers import (AutoConfig, AutoTokenizer, BatchFeature,
|
from transformers import (AutoConfig, AutoTokenizer, BatchEncoding,
|
||||||
GenerationConfig)
|
GenerationConfig)
|
||||||
|
|
||||||
from vllm.sequence import SampleLogprobs
|
from vllm.sequence import SampleLogprobs
|
||||||
from vllm.transformers_utils.tokenizer import patch_padding_side
|
from vllm.transformers_utils.tokenizer import patch_padding_side
|
||||||
|
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE
|
||||||
|
|
||||||
from .....conftest import HfRunner, ImageAsset, _ImageAssets
|
from .....conftest import HfRunner, ImageAsset, _ImageAssets
|
||||||
from .types import RunnerOutput
|
from .types import RunnerOutput
|
||||||
@@ -210,6 +211,40 @@ def get_llava_embeddings(image_assets: _ImageAssets):
|
|||||||
return [asset.image_embeds for asset in image_assets]
|
return [asset.image_embeds for asset in image_assets]
|
||||||
|
|
||||||
|
|
||||||
|
####### postprocessors to run on HF BatchEncoding
|
||||||
|
def cast_dtype_post_processor(
|
||||||
|
hf_inp_key: str) -> Callable[[BatchEncoding, str], BatchEncoding]:
|
||||||
|
"""Gets a handle to a post processor which converts a given key into a
|
||||||
|
target data type."""
|
||||||
|
|
||||||
|
def process(hf_inputs: BatchEncoding, dtype: str):
|
||||||
|
torch_dtype = STR_DTYPE_TO_TORCH_DTYPE[dtype]
|
||||||
|
hf_inputs[hf_inp_key] = hf_inputs[hf_inp_key].to(torch_dtype)
|
||||||
|
return hf_inputs
|
||||||
|
|
||||||
|
return process
|
||||||
|
|
||||||
|
|
||||||
|
def ignore_inputs_post_processor(
|
||||||
|
hf_inp_key: str) -> Callable[[BatchEncoding, str], BatchEncoding]:
|
||||||
|
"""Gets a handle to a post processor which ignores a given key."""
|
||||||
|
|
||||||
|
def process(hf_inputs: BatchEncoding, dtype: str):
|
||||||
|
del hf_inputs[hf_inp_key]
|
||||||
|
return hf_inputs
|
||||||
|
|
||||||
|
return process
|
||||||
|
|
||||||
|
|
||||||
|
def wrap_inputs_post_processor(hf_inputs: BatchEncoding, dtype: str):
|
||||||
|
return {"model_inputs": hf_inputs}
|
||||||
|
|
||||||
|
|
||||||
|
def molmo_post_processor(hf_inputs: BatchEncoding, dtype: str):
|
||||||
|
hf_inputs = cast_dtype_post_processor("images")(hf_inputs, dtype)
|
||||||
|
return {k: v.unsqueeze(0) for k, v in hf_inputs.items()}
|
||||||
|
|
||||||
|
|
||||||
####### Prompt path encoders for models that need models on disk
|
####### Prompt path encoders for models that need models on disk
|
||||||
def qwen_prompt_path_encoder(
|
def qwen_prompt_path_encoder(
|
||||||
tmp_path: PosixPath, prompt: str, assets: Union[list[ImageAsset],
|
tmp_path: PosixPath, prompt: str, assets: Union[list[ImageAsset],
|
||||||
@@ -260,7 +295,8 @@ def deepseekvl2_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
|
|||||||
for k in inputs.keys() # noqa
|
for k in inputs.keys() # noqa
|
||||||
if k not in ("seq_lens", "sft_format")
|
if k not in ("seq_lens", "sft_format")
|
||||||
}
|
}
|
||||||
return BatchFeature(data=inputs, tensor_type="pt")
|
inputs = BatchEncoding(data=inputs, tensor_type="pt")
|
||||||
|
return inputs
|
||||||
|
|
||||||
hf_model.processor = processor
|
hf_model.processor = processor
|
||||||
hf_model.model.get_output_embeddings = lambda: \
|
hf_model.model.get_output_embeddings = lambda: \
|
||||||
@@ -493,52 +529,10 @@ def mantis_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
|
|||||||
return hf_model
|
return hf_model
|
||||||
|
|
||||||
|
|
||||||
def minicpmv_25_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
|
def minicpmo_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
|
||||||
orig_generate = hf_model.model.generate
|
orig_generate = hf_model.model.generate
|
||||||
|
|
||||||
def _generate(
|
def _generate(self, *args, **kwargs):
|
||||||
self,
|
|
||||||
*args,
|
|
||||||
input_ids=None,
|
|
||||||
pixel_values=None,
|
|
||||||
image_sizes=None,
|
|
||||||
image_bound=None,
|
|
||||||
tgt_sizes=None,
|
|
||||||
**kwargs,
|
|
||||||
):
|
|
||||||
model_inputs = {
|
|
||||||
"input_ids": input_ids,
|
|
||||||
"pixel_values": pixel_values,
|
|
||||||
"image_sizes": image_sizes,
|
|
||||||
"image_bound": image_bound,
|
|
||||||
"tgt_sizes": tgt_sizes,
|
|
||||||
}
|
|
||||||
for k in list(model_inputs.keys()):
|
|
||||||
if model_inputs[k] is None:
|
|
||||||
model_inputs.pop(k)
|
|
||||||
|
|
||||||
return orig_generate(model_inputs, *args, decode_text=False, **kwargs)
|
|
||||||
|
|
||||||
hf_model.model.generate = types.MethodType(_generate, hf_model.model)
|
|
||||||
|
|
||||||
return hf_model
|
|
||||||
|
|
||||||
|
|
||||||
def minicpmo_26_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
|
|
||||||
orig_generate = hf_model.model.generate
|
|
||||||
|
|
||||||
def _generate(self, *args, image_sizes=None, **kwargs):
|
|
||||||
return orig_generate(*args, decode_text=False, **kwargs)
|
|
||||||
|
|
||||||
hf_model.model.generate = types.MethodType(_generate, hf_model.model)
|
|
||||||
|
|
||||||
return hf_model
|
|
||||||
|
|
||||||
|
|
||||||
def minicpmv_26_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
|
|
||||||
orig_generate = hf_model.model.generate
|
|
||||||
|
|
||||||
def _generate(self, *args, image_sizes=None, **kwargs):
|
|
||||||
return orig_generate(*args, decode_text=False, **kwargs)
|
return orig_generate(*args, decode_text=False, **kwargs)
|
||||||
|
|
||||||
hf_model.model.generate = types.MethodType(_generate, hf_model.model)
|
hf_model.model.generate = types.MethodType(_generate, hf_model.model)
|
||||||
@@ -557,11 +551,10 @@ def molmo_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
|
|||||||
|
|
||||||
def _generate(self, max_new_tokens=None, do_sample=None, **kwargs):
|
def _generate(self, max_new_tokens=None, do_sample=None, **kwargs):
|
||||||
batch = {
|
batch = {
|
||||||
k: kwargs.pop(k).unsqueeze(0)
|
k: kwargs.pop(k)
|
||||||
for k in ("input_ids", "images", "image_input_idx", "image_masks")
|
for k in ("input_ids", "images", "image_input_idx", "image_masks")
|
||||||
if k in kwargs
|
if k in kwargs
|
||||||
}
|
}
|
||||||
batch = BatchFeature(batch).to(dtype=self.dtype)
|
|
||||||
|
|
||||||
return self.generate_from_batch(
|
return self.generate_from_batch(
|
||||||
batch,
|
batch,
|
||||||
|
|||||||
@@ -8,12 +8,13 @@ from typing import Any, Callable, NamedTuple, Optional, Union
|
|||||||
import torch
|
import torch
|
||||||
from PIL.Image import Image
|
from PIL.Image import Image
|
||||||
from pytest import MarkDecorator
|
from pytest import MarkDecorator
|
||||||
from transformers import AutoModelForCausalLM
|
from transformers import AutoModelForCausalLM, BatchEncoding
|
||||||
from transformers.models.auto.auto_factory import _BaseAutoModelClass
|
from transformers.models.auto.auto_factory import _BaseAutoModelClass
|
||||||
|
|
||||||
from vllm.config import TaskOption
|
from vllm.config import TaskOption
|
||||||
from vllm.sequence import SampleLogprobs
|
from vllm.sequence import SampleLogprobs
|
||||||
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||||
|
from vllm.utils import identity
|
||||||
|
|
||||||
from .....conftest import IMAGE_ASSETS, HfRunner, ImageAsset, _ImageAssets
|
from .....conftest import IMAGE_ASSETS, HfRunner, ImageAsset, _ImageAssets
|
||||||
from ....utils import check_logprobs_close
|
from ....utils import check_logprobs_close
|
||||||
@@ -109,6 +110,11 @@ class VLMTestInfo(NamedTuple):
|
|||||||
# Indicates we should explicitly pass the EOS from the tokenizer
|
# Indicates we should explicitly pass the EOS from the tokenizer
|
||||||
use_tokenizer_eos: bool = False
|
use_tokenizer_eos: bool = False
|
||||||
auto_cls: type[_BaseAutoModelClass] = AutoModelForCausalLM
|
auto_cls: type[_BaseAutoModelClass] = AutoModelForCausalLM
|
||||||
|
# Callable to pass to the HF runner to run on inputs; for now, we also pass
|
||||||
|
# the data type to input post processing, because almost all of the uses of
|
||||||
|
# postprocess_inputs are to fix the data types of BatchEncoding values.
|
||||||
|
postprocess_inputs: Callable[[BatchEncoding, str],
|
||||||
|
BatchEncoding] = identity
|
||||||
patch_hf_runner: Optional[Callable[[HfRunner], HfRunner]] = None
|
patch_hf_runner: Optional[Callable[[HfRunner], HfRunner]] = None
|
||||||
|
|
||||||
# Post processors that if defined, will run oun the outputs of the
|
# Post processors that if defined, will run oun the outputs of the
|
||||||
@@ -124,7 +130,7 @@ class VLMTestInfo(NamedTuple):
|
|||||||
# is all combinations of .models + all fields below
|
# is all combinations of .models + all fields below
|
||||||
max_tokens: Union[int, tuple[int]] = 128
|
max_tokens: Union[int, tuple[int]] = 128
|
||||||
num_logprobs: Union[int, tuple[int]] = 5
|
num_logprobs: Union[int, tuple[int]] = 5
|
||||||
dtype: Union[str, Union[list[str], tuple[str, ...]]] = "auto"
|
dtype: Union[str, Iterable[str]] = "half"
|
||||||
distributed_executor_backend: Optional[Union[str, Iterable[str]]] = None
|
distributed_executor_backend: Optional[Union[str, Iterable[str]]] = None
|
||||||
# Only expanded in video tests
|
# Only expanded in video tests
|
||||||
num_video_frames: Union[int, tuple[int]] = 16
|
num_video_frames: Union[int, tuple[int]] = 16
|
||||||
@@ -165,6 +171,7 @@ class VLMTestInfo(NamedTuple):
|
|||||||
"vllm_output_post_proc": self.vllm_output_post_proc,
|
"vllm_output_post_proc": self.vllm_output_post_proc,
|
||||||
"auto_cls": self.auto_cls,
|
"auto_cls": self.auto_cls,
|
||||||
"use_tokenizer_eos": self.use_tokenizer_eos,
|
"use_tokenizer_eos": self.use_tokenizer_eos,
|
||||||
|
"postprocess_inputs": self.postprocess_inputs,
|
||||||
"comparator": self.comparator,
|
"comparator": self.comparator,
|
||||||
"get_stop_token_ids": self.get_stop_token_ids,
|
"get_stop_token_ids": self.get_stop_token_ids,
|
||||||
"hf_model_kwargs": self.hf_model_kwargs,
|
"hf_model_kwargs": self.hf_model_kwargs,
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
from functools import partial
|
||||||
from typing import Callable
|
from typing import Callable
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from transformers import Qwen2VLForConditionalGeneration
|
from transformers import BatchEncoding, Qwen2VLForConditionalGeneration
|
||||||
|
|
||||||
from ....conftest import IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner
|
from ....conftest import IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner
|
||||||
from ....utils import large_gpu_test
|
from ....utils import large_gpu_test
|
||||||
@@ -75,6 +75,10 @@ def apply_chat_template_and_add_eos(
|
|||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
|
|
||||||
|
def postprocess_inputs(hf_model: HfRunner, inputs: BatchEncoding, **kwargs):
|
||||||
|
return hf_model.model.prepare_inputs_for_generation(**inputs, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def _run_test(
|
def _run_test(
|
||||||
hf_runner: type[HfRunner],
|
hf_runner: type[HfRunner],
|
||||||
vllm_runner: type[VllmRunner],
|
vllm_runner: type[VllmRunner],
|
||||||
@@ -114,8 +118,14 @@ def _run_test(
|
|||||||
with hf_runner(model,
|
with hf_runner(model,
|
||||||
dtype=dtype,
|
dtype=dtype,
|
||||||
auto_cls=Qwen2VLForConditionalGeneration) as hf_model:
|
auto_cls=Qwen2VLForConditionalGeneration) as hf_model:
|
||||||
|
hf_model.postprocess_inputs = partial(
|
||||||
prompts = []
|
postprocess_inputs,
|
||||||
|
hf_model,
|
||||||
|
cache_position=torch.arange(
|
||||||
|
0,
|
||||||
|
1, # 1 for batch size
|
||||||
|
requires_grad=False),
|
||||||
|
use_cache=False)
|
||||||
for text, image, embed_text in zip(input_texts, input_images,
|
for text, image, embed_text in zip(input_texts, input_images,
|
||||||
embed_texts):
|
embed_texts):
|
||||||
# dse requires non-standard input processing
|
# dse requires non-standard input processing
|
||||||
@@ -123,34 +133,20 @@ def _run_test(
|
|||||||
messages = get_messages(image, text, embed_text)
|
messages = get_messages(image, text, embed_text)
|
||||||
prompt = apply_chat_template_and_add_eos(
|
prompt = apply_chat_template_and_add_eos(
|
||||||
messages, hf_model.processor.apply_chat_template)
|
messages, hf_model.processor.apply_chat_template)
|
||||||
|
inputs = hf_model.get_inputs(
|
||||||
prompts.append(prompt)
|
prompts=[[prompt]],
|
||||||
|
images=[[image]],
|
||||||
all_inputs = hf_model.get_inputs(
|
)
|
||||||
prompts=prompts,
|
with torch.no_grad():
|
||||||
images=input_images,
|
|
||||||
)
|
|
||||||
|
|
||||||
with torch.no_grad():
|
|
||||||
all_outputs = []
|
|
||||||
for inputs in all_inputs:
|
|
||||||
inputs = hf_model.model.prepare_inputs_for_generation(
|
|
||||||
**inputs,
|
|
||||||
cache_position=torch.arange(1), # 1 for batch size
|
|
||||||
use_cache=False,
|
|
||||||
)
|
|
||||||
outputs = hf_model.model(
|
outputs = hf_model.model(
|
||||||
**hf_model.wrap_device(inputs),
|
**hf_model.wrap_device(inputs[0],
|
||||||
|
device=hf_model.model.device.type),
|
||||||
return_dict=True,
|
return_dict=True,
|
||||||
output_hidden_states=True,
|
output_hidden_states=True,
|
||||||
)
|
)
|
||||||
pooled_output = F.normalize(outputs.hidden_states[-1][0, -1],
|
pooled_output = torch.nn.functional.normalize(
|
||||||
p=2,
|
outputs.hidden_states[-1][0, -1], p=2, dim=-1)
|
||||||
dim=-1)
|
hf_outputs.append(pooled_output.tolist())
|
||||||
|
|
||||||
all_outputs.append(pooled_output.tolist())
|
|
||||||
|
|
||||||
hf_outputs = all_outputs
|
|
||||||
|
|
||||||
check_embeddings_close(
|
check_embeddings_close(
|
||||||
embeddings_0_lst=hf_outputs,
|
embeddings_0_lst=hf_outputs,
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from transformers import AutoModelForImageTextToText
|
from transformers import AutoModelForVision2Seq
|
||||||
|
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
|
|
||||||
@@ -70,7 +70,7 @@ def _run_test(
|
|||||||
vllm_outputs = vllm_model.encode(input_texts, images=input_images)
|
vllm_outputs = vllm_model.encode(input_texts, images=input_images)
|
||||||
|
|
||||||
with hf_runner(model, dtype=dtype,
|
with hf_runner(model, dtype=dtype,
|
||||||
auto_cls=AutoModelForImageTextToText) as hf_model:
|
auto_cls=AutoModelForVision2Seq) as hf_model:
|
||||||
# Patch the issue where generation_config.json is missing
|
# Patch the issue where generation_config.json is missing
|
||||||
hf_model.processor.patch_size = \
|
hf_model.processor.patch_size = \
|
||||||
hf_model.model.config.vision_config.patch_size
|
hf_model.model.config.vision_config.patch_size
|
||||||
@@ -86,7 +86,8 @@ def _run_test(
|
|||||||
for inputs in all_inputs:
|
for inputs in all_inputs:
|
||||||
# Based on: https://huggingface.co/royokong/e5-v
|
# Based on: https://huggingface.co/royokong/e5-v
|
||||||
outputs = hf_model.model(
|
outputs = hf_model.model(
|
||||||
**hf_model.wrap_device(inputs),
|
**hf_model.wrap_device(inputs,
|
||||||
|
device=hf_model.model.device.type),
|
||||||
return_dict=True,
|
return_dict=True,
|
||||||
output_hidden_states=True,
|
output_hidden_states=True,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -53,7 +53,8 @@ def _run_test(
|
|||||||
for inputs in all_inputs:
|
for inputs in all_inputs:
|
||||||
# Based on: https://github.com/TIGER-AI-Lab/VLM2Vec/blob/db3b951bccabba220c1f53ab46a734e50dd2fc08/src/model.py
|
# Based on: https://github.com/TIGER-AI-Lab/VLM2Vec/blob/db3b951bccabba220c1f53ab46a734e50dd2fc08/src/model.py
|
||||||
outputs = hf_model.model(
|
outputs = hf_model.model(
|
||||||
**hf_model.wrap_device(inputs),
|
**hf_model.wrap_device(inputs,
|
||||||
|
device=hf_model.model.device.type),
|
||||||
return_dict=True,
|
return_dict=True,
|
||||||
output_hidden_states=True,
|
output_hidden_states=True,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -4,7 +4,8 @@ from typing import Optional, overload
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import torch
|
import torch
|
||||||
from transformers import AutoConfig, AutoModelForImageTextToText, AutoTokenizer
|
from transformers import (AutoConfig, AutoModelForVision2Seq, AutoTokenizer,
|
||||||
|
BatchEncoding)
|
||||||
|
|
||||||
from vllm import LLM, SamplingParams
|
from vllm import LLM, SamplingParams
|
||||||
from vllm.attention.backends.flash_attn import FlashAttentionMetadata
|
from vllm.attention.backends.flash_attn import FlashAttentionMetadata
|
||||||
@@ -226,10 +227,14 @@ def _run_test(
|
|||||||
for prompts, images in inputs
|
for prompts, images in inputs
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def process(hf_inputs: BatchEncoding, **kwargs):
|
||||||
|
return hf_inputs
|
||||||
|
|
||||||
with hf_runner(model,
|
with hf_runner(model,
|
||||||
dtype=dtype,
|
dtype=dtype,
|
||||||
model_kwargs={"device_map": "auto"},
|
model_kwargs={"device_map": "auto"},
|
||||||
auto_cls=AutoModelForImageTextToText) as hf_model:
|
postprocess_inputs=process,
|
||||||
|
auto_cls=AutoModelForVision2Seq) as hf_model:
|
||||||
hf_outputs_per_image = [
|
hf_outputs_per_image = [
|
||||||
hf_model.generate_greedy_logprobs_limit(prompts,
|
hf_model.generate_greedy_logprobs_limit(prompts,
|
||||||
max_tokens,
|
max_tokens,
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
1
tests/models/fixtures/pixtral_chat_engine.json
Normal file
1
tests/models/fixtures/pixtral_chat_engine.json
Normal file
File diff suppressed because one or more lines are too long
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import warnings
|
import warnings
|
||||||
from collections.abc import Sequence
|
from collections.abc import Sequence
|
||||||
from typing import Any, Optional, Union
|
from typing import Optional, Union
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
@@ -254,9 +254,9 @@ def check_logprobs_close(
|
|||||||
def build_model_context(
|
def build_model_context(
|
||||||
model_id: str,
|
model_id: str,
|
||||||
task: TaskOption = "auto",
|
task: TaskOption = "auto",
|
||||||
dtype: Union[str, torch.dtype] = "auto",
|
dtype: Optional[Union[str, torch.dtype]] = None,
|
||||||
mm_processor_kwargs: Optional[dict[str, Any]] = None,
|
mm_processor_kwargs: Optional[dict] = None,
|
||||||
limit_mm_per_prompt: Optional[dict[str, int]] = None,
|
limit_mm_per_prompt: Optional[dict] = None,
|
||||||
disable_mm_preprocessor_cache: bool = True,
|
disable_mm_preprocessor_cache: bool = True,
|
||||||
):
|
):
|
||||||
"""Creates an InputContext for a given model.
|
"""Creates an InputContext for a given model.
|
||||||
@@ -274,6 +274,9 @@ def build_model_context(
|
|||||||
model_info.check_available_online(on_fail="skip")
|
model_info.check_available_online(on_fail="skip")
|
||||||
model_info.check_transformers_version(on_fail="skip")
|
model_info.check_transformers_version(on_fail="skip")
|
||||||
|
|
||||||
|
if dtype is None:
|
||||||
|
dtype = "half"
|
||||||
|
|
||||||
model_config = ModelConfig(
|
model_config = ModelConfig(
|
||||||
model_id,
|
model_id,
|
||||||
task=task,
|
task=task,
|
||||||
|
|||||||
@@ -7,25 +7,19 @@ from unittest.mock import MagicMock
|
|||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pytest
|
import pytest
|
||||||
import torch
|
|
||||||
from transformers import ProcessorMixin
|
from transformers import ProcessorMixin
|
||||||
|
|
||||||
from vllm.config import ModelConfig
|
from vllm.config import ModelConfig
|
||||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||||
from vllm.multimodal.inputs import (MultiModalFieldElem, MultiModalKwargs,
|
|
||||||
MultiModalKwargsItem,
|
|
||||||
MultiModalSharedField)
|
|
||||||
# yapf conflicts with isort for this block
|
# yapf conflicts with isort for this block
|
||||||
# yapf: disable
|
# yapf: disable
|
||||||
from vllm.multimodal.processing import (PlaceholderFeaturesInfo,
|
from vllm.multimodal.processing import (PlaceholderFeaturesInfo,
|
||||||
ProcessingCache, PromptIndexTargets,
|
PromptIndexTargets, PromptInsertion,
|
||||||
PromptInsertion, PromptReplacement,
|
PromptReplacement, apply_text_matches,
|
||||||
apply_text_matches,
|
|
||||||
apply_token_matches,
|
apply_token_matches,
|
||||||
find_mm_placeholders,
|
find_mm_placeholders,
|
||||||
find_text_matches, find_token_matches,
|
find_text_matches, find_token_matches,
|
||||||
iter_token_matches,
|
iter_token_matches)
|
||||||
replace_token_matches)
|
|
||||||
# yapf: enable
|
# yapf: enable
|
||||||
from vllm.multimodal.profiling import MultiModalProfiler
|
from vllm.multimodal.profiling import MultiModalProfiler
|
||||||
from vllm.transformers_utils.tokenizer import (AnyTokenizer,
|
from vllm.transformers_utils.tokenizer import (AnyTokenizer,
|
||||||
@@ -95,58 +89,6 @@ def test_iter_token_matches(token_ids, match_ids, expected):
|
|||||||
assert all(match_len == len(match_ids) for match_len in match_lens)
|
assert all(match_len == len(match_ids) for match_len in match_lens)
|
||||||
|
|
||||||
|
|
||||||
# yapf: disable
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
("token_ids", "match_ids", "new_ids", "expected"),
|
|
||||||
[
|
|
||||||
([], [], [-1], []),
|
|
||||||
([], [32000], [-1], []),
|
|
||||||
(
|
|
||||||
[32000, 32000, 32000],
|
|
||||||
[32000],
|
|
||||||
[-1],
|
|
||||||
[-1, -1, -1],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
[32000, 32000, 32000],
|
|
||||||
[32000, 32000],
|
|
||||||
[-1],
|
|
||||||
[-1, 32000],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
[32000, 32000, 32000],
|
|
||||||
[32000, 32000, 32000],
|
|
||||||
[-1],
|
|
||||||
[-1],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
[9833, 28747, 32000, 32000, 32000, 9833, 28747, 32000, 32000, 918],
|
|
||||||
[28747, 32000],
|
|
||||||
[-1],
|
|
||||||
[9833, -1, 32000, 32000, 9833, -1, 32000, 918],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
[9833, 28747, 32000, 32000, 32000, 9833, 28747, 32000, 32000, 918],
|
|
||||||
[28747, 32000, 32000, 32000],
|
|
||||||
[-1],
|
|
||||||
[9833, -1, 9833, 28747, 32000, 32000, 918],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
[9833, 28747, 32000, 32000, 32000, 9833, 28747, 32000, 32000, 918],
|
|
||||||
[28747, 0, 32000],
|
|
||||||
[-1],
|
|
||||||
[9833, 28747, 32000, 32000, 32000, 9833, 28747, 32000, 32000, 918],
|
|
||||||
),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
# yapf: enable
|
|
||||||
def test_replace_token_matches(token_ids, match_ids, new_ids, expected):
|
|
||||||
result = replace_token_matches(token_ids, match_ids, new_ids)
|
|
||||||
|
|
||||||
# Manually constructed results
|
|
||||||
assert result == expected
|
|
||||||
|
|
||||||
|
|
||||||
# yapf: disable
|
# yapf: disable
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
("prompt", "target_by_key", "expected_by_key"),
|
("prompt", "target_by_key", "expected_by_key"),
|
||||||
@@ -895,45 +837,6 @@ def test_find_mm_placeholders(
|
|||||||
assert result == expected
|
assert result == expected
|
||||||
|
|
||||||
|
|
||||||
def _dummy_elem(modality: str, key: str, size: int):
|
|
||||||
return MultiModalFieldElem(
|
|
||||||
modality=modality,
|
|
||||||
key=key,
|
|
||||||
data=torch.empty((size, ), dtype=torch.int8),
|
|
||||||
field=MultiModalSharedField(1),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _dummy_item(modality: str, size_by_key: dict[str, int]):
|
|
||||||
return MultiModalKwargsItem.from_elems([
|
|
||||||
_dummy_elem(modality, key, size) for key, size in size_by_key.items()
|
|
||||||
])
|
|
||||||
|
|
||||||
|
|
||||||
def _dummy_kw(size_by_key_modality: dict[str, dict[str, int]]):
|
|
||||||
return MultiModalKwargs.from_items([
|
|
||||||
_dummy_item(modality, size_by_key)
|
|
||||||
for modality, size_by_key in size_by_key_modality.items()
|
|
||||||
])
|
|
||||||
|
|
||||||
|
|
||||||
# yapf: disable
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
("item", "expected_size"),
|
|
||||||
[
|
|
||||||
(_dummy_item("a", {"a1": 100}), 100),
|
|
||||||
(_dummy_item("a", {"a1": 100, "a2": 110}), 210),
|
|
||||||
(_dummy_kw({"a": {"a1": 100, "a2": 110}, "b": {"b1": 120, "b2": 130}}), 460), # noqa: E501
|
|
||||||
],
|
|
||||||
)
|
|
||||||
# yapf: enable
|
|
||||||
def test_cache_item_size(item, expected_size):
|
|
||||||
cache = ProcessingCache.get_lru_cache(2048, type(item))
|
|
||||||
cache[""] = item
|
|
||||||
|
|
||||||
assert cache.currsize == expected_size
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("model_id", ["llava-hf/llava-v1.6-mistral-7b-hf"])
|
@pytest.mark.parametrize("model_id", ["llava-hf/llava-v1.6-mistral-7b-hf"])
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
("limit", "num_supported", "is_valid"),
|
("limit", "num_supported", "is_valid"),
|
||||||
@@ -950,7 +853,7 @@ def test_limit_mm_per_prompt_dummy(model_id, limit, num_supported, is_valid):
|
|||||||
tokenizer_mode="auto",
|
tokenizer_mode="auto",
|
||||||
trust_remote_code=False,
|
trust_remote_code=False,
|
||||||
seed=0,
|
seed=0,
|
||||||
dtype="auto",
|
dtype="half",
|
||||||
revision=None,
|
revision=None,
|
||||||
limit_mm_per_prompt=limit_mm_per_prompt,
|
limit_mm_per_prompt=limit_mm_per_prompt,
|
||||||
)
|
)
|
||||||
@@ -989,7 +892,7 @@ def test_limit_mm_per_prompt_apply(model_id, num_images, limit, is_valid):
|
|||||||
tokenizer_mode="auto",
|
tokenizer_mode="auto",
|
||||||
trust_remote_code=False,
|
trust_remote_code=False,
|
||||||
seed=0,
|
seed=0,
|
||||||
dtype="auto",
|
dtype="half",
|
||||||
revision=None,
|
revision=None,
|
||||||
limit_mm_per_prompt=limit_mm_per_prompt,
|
limit_mm_per_prompt=limit_mm_per_prompt,
|
||||||
)
|
)
|
||||||
@@ -1062,7 +965,7 @@ def test_hf_processor_kwargs(model_id, call_kwargs, expected_kwargs):
|
|||||||
tokenizer_mode="auto",
|
tokenizer_mode="auto",
|
||||||
trust_remote_code=False,
|
trust_remote_code=False,
|
||||||
seed=0,
|
seed=0,
|
||||||
dtype="auto",
|
dtype="half",
|
||||||
revision=None,
|
revision=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -314,7 +314,7 @@ def get_active_block_tables(block_tables, query_lens, seq_lens, block_size,
|
|||||||
|
|
||||||
# Test edge cases
|
# Test edge cases
|
||||||
(1, 128, 16, 1024, 4, 2, 16, False), # large decode batch
|
(1, 128, 16, 1024, 4, 2, 16, False), # large decode batch
|
||||||
(16, 4, 8, 1024, 4, 2, 128, True), # large prefill batch
|
(16, 4, 8, 8192, 48, 1, 128, True), # large prefill batch
|
||||||
(4, 12, 32, 2048, 16, 1, 32, True), # multi-head attention (MHA)
|
(4, 12, 32, 2048, 16, 1, 32, True), # multi-head attention (MHA)
|
||||||
(4, 12, 32, 2048, 16, 16, 32, True), # multi-query attention (MQA)
|
(4, 12, 32, 2048, 16, 16, 32, True), # multi-query attention (MQA)
|
||||||
])
|
])
|
||||||
|
|||||||
@@ -15,8 +15,6 @@ from ..utils import compare_two_settings, create_new_process_for_each_test
|
|||||||
|
|
||||||
models_4bit_to_test = [
|
models_4bit_to_test = [
|
||||||
("facebook/opt-125m", "quantize opt model inflight"),
|
("facebook/opt-125m", "quantize opt model inflight"),
|
||||||
("mistralai/Mistral-7B-Instruct-v0.3",
|
|
||||||
"quantize inflight model with both HF and Mistral format weights")
|
|
||||||
]
|
]
|
||||||
|
|
||||||
models_pre_qaunt_4bit_to_test = [
|
models_pre_qaunt_4bit_to_test = [
|
||||||
|
|||||||
@@ -166,7 +166,7 @@ def test_vllm_model_can_load_with_lora(vllm_runner, tmp_path):
|
|||||||
test_prompts = multilora_inference.create_test_prompts(lora_path)
|
test_prompts = multilora_inference.create_test_prompts(lora_path)
|
||||||
|
|
||||||
# Serialize model before deserializing and binding LoRA adapters
|
# Serialize model before deserializing and binding LoRA adapters
|
||||||
with vllm_runner(model_ref) as vllm_model:
|
with vllm_runner(model_ref, ) as vllm_model:
|
||||||
model_path = tmp_path / (model_ref + ".tensors")
|
model_path = tmp_path / (model_ref + ".tensors")
|
||||||
|
|
||||||
vllm_model.apply_model(
|
vllm_model.apply_model(
|
||||||
@@ -208,7 +208,7 @@ def test_load_without_tensorizer_load_format(vllm_runner):
|
|||||||
@pytest.mark.skipif(not is_curl_installed(), reason="cURL is not installed")
|
@pytest.mark.skipif(not is_curl_installed(), reason="cURL is not installed")
|
||||||
def test_openai_apiserver_with_tensorizer(vllm_runner, tmp_path):
|
def test_openai_apiserver_with_tensorizer(vllm_runner, tmp_path):
|
||||||
## Serialize model
|
## Serialize model
|
||||||
with vllm_runner(model_ref) as vllm_model:
|
with vllm_runner(model_ref, ) as vllm_model:
|
||||||
model_path = tmp_path / (model_ref + ".tensors")
|
model_path = tmp_path / (model_ref + ".tensors")
|
||||||
|
|
||||||
vllm_model.apply_model(
|
vllm_model.apply_model(
|
||||||
|
|||||||
@@ -34,9 +34,7 @@ with depyf.prepare_debug(temp_dir):
|
|||||||
|
|
||||||
# disable custom dispatcher, let Dynamo takes over
|
# disable custom dispatcher, let Dynamo takes over
|
||||||
# all the control
|
# all the control
|
||||||
llm = LLM(model="Qwen/Qwen2.5-1.5B-Instruct",
|
llm = LLM(model="google/gemma-2b",
|
||||||
max_model_len=512,
|
|
||||||
max_num_seqs=64,
|
|
||||||
enforce_eager=True,
|
enforce_eager=True,
|
||||||
compilation_config={"level": CompilationLevel.DYNAMO_AS_IS})
|
compilation_config={"level": CompilationLevel.DYNAMO_AS_IS})
|
||||||
outputs = llm.generate(prompts, sampling_params)
|
outputs = llm.generate(prompts, sampling_params)
|
||||||
@@ -46,51 +44,38 @@ with depyf.prepare_debug(temp_dir):
|
|||||||
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
|
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
|
||||||
assert generated_text.startswith(answer)
|
assert generated_text.startswith(answer)
|
||||||
|
|
||||||
compiled_codes = sorted(
|
compiled_code = sorted(
|
||||||
glob.glob(os.path.join(temp_dir, "__transformed_code*.py")))
|
glob.glob(os.path.join(temp_dir, "__transformed_code*.py")))
|
||||||
|
|
||||||
for i, compiled_code in enumerate(compiled_codes):
|
# we should only trigger Dynamo compilation three times:
|
||||||
print("{} file: {}".format(i + 1, compiled_code))
|
# one for the profiling phase without kv cache
|
||||||
|
# one for the prefill phase with symbolic shapes
|
||||||
# We should only trigger Dynamo compilation 4 times:
|
# one for the decode phase with symbolic shapes
|
||||||
# 1. forward pass (symbolic)
|
|
||||||
# 2. compute_logits (symbolic)
|
|
||||||
# 3. forward pass (shape 16)
|
|
||||||
# 4. forward pass (shape 32)
|
|
||||||
# and later calls should not trigger Dynamo compilation again.
|
# and later calls should not trigger Dynamo compilation again.
|
||||||
# NOTE: It might still trigger XLA compilation.
|
# NOTE: it might still trigger XLA compilation.
|
||||||
|
|
||||||
# Check we have 4 compiled codes
|
# check we have three compiled code
|
||||||
assert len(compiled_codes) == 4
|
# this is the assumption when we use the custom dispatcher
|
||||||
|
assert len(compiled_code) == 3
|
||||||
|
|
||||||
kv_cache_prefix = "kv_cache"
|
# check all the compilations are as expected
|
||||||
attn_prefix = "ragged_paged_attention"
|
compiled_fn = sorted(
|
||||||
|
|
||||||
# Check all the compilations are as expected
|
|
||||||
compiled_fns = sorted(
|
|
||||||
glob.glob(os.path.join(temp_dir, "__compiled_fn*Captured*.py")))
|
glob.glob(os.path.join(temp_dir, "__compiled_fn*Captured*.py")))
|
||||||
|
|
||||||
for i, compiled_fn in enumerate(compiled_fns):
|
# the first compilation is the profiling phase,
|
||||||
print("{} file: {}".format(i + 1, compiled_fn))
|
# it should not have any kv cache
|
||||||
|
with open(compiled_fn[0]) as f:
|
||||||
# The first compilation is symbolic, so it should not have any kv_caches
|
|
||||||
with open(compiled_fns[0]) as f:
|
|
||||||
content = f.read()
|
content = f.read()
|
||||||
assert kv_cache_prefix not in content
|
assert "kv_caches" not in content
|
||||||
|
|
||||||
# The second compilation is symbolic, so it should not have any kv_caches
|
# the second compilation is the prefill phase,
|
||||||
with open(compiled_fns[1]) as f:
|
# it should have kv cache and the flash_attention op
|
||||||
|
with open(compiled_fn[1]) as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
assert kv_cache_prefix not in content
|
assert "kv_caches" in content and "torch.ops.xla.flash_attention" in content
|
||||||
|
|
||||||
# The third compilation is shape 16, so it should have kv_caches and the
|
# the third compilation is the decode phase,
|
||||||
# ragged_paged_attention
|
# it should have kv cache and the paged_attention op
|
||||||
with open(compiled_fns[2]) as f:
|
with open(compiled_fn[2]) as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
assert (kv_cache_prefix in content and attn_prefix in content)
|
assert "kv_caches" in content and "torch.ops.xla.paged_attention" in content
|
||||||
|
|
||||||
# The forth compilation is shape 32, so it should have kv_caches and the
|
|
||||||
# ragged_paged_attention
|
|
||||||
with open(compiled_fns[3]) as f:
|
|
||||||
content = f.read()
|
|
||||||
assert (kv_cache_prefix in content and attn_prefix in content)
|
|
||||||
|
|||||||
@@ -14,17 +14,12 @@ from ..utils import compare_two_settings
|
|||||||
def test_custom_dispatcher(monkeypatch: pytest.MonkeyPatch):
|
def test_custom_dispatcher(monkeypatch: pytest.MonkeyPatch):
|
||||||
with monkeypatch.context() as m:
|
with monkeypatch.context() as m:
|
||||||
m.setenv("VLLM_RPC_TIMEOUT", "30000")
|
m.setenv("VLLM_RPC_TIMEOUT", "30000")
|
||||||
compare_two_settings("Qwen/Qwen2.5-1.5B-Instruct",
|
compare_two_settings(
|
||||||
arg1=[
|
"google/gemma-2b",
|
||||||
"--max-model-len=256",
|
arg1=[
|
||||||
"--max-num-seqs=32",
|
"--enforce-eager",
|
||||||
"--enforce-eager",
|
f"-O{CompilationLevel.DYNAMO_ONCE}",
|
||||||
f"-O{CompilationLevel.DYNAMO_ONCE}",
|
],
|
||||||
],
|
arg2=["--enforce-eager", f"-O{CompilationLevel.DYNAMO_AS_IS}"],
|
||||||
arg2=[
|
env1={},
|
||||||
"--max-model-len=256", "--max-num-seqs=32",
|
env2={})
|
||||||
"--enforce-eager",
|
|
||||||
f"-O{CompilationLevel.DYNAMO_AS_IS}"
|
|
||||||
],
|
|
||||||
env1={},
|
|
||||||
env2={})
|
|
||||||
|
|||||||
@@ -76,18 +76,21 @@ async def generate(engine: AsyncLLM,
|
|||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"output_kind", [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY])
|
"output_kind", [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY])
|
||||||
@pytest.mark.parametrize("engine_args,prompt",
|
@pytest.mark.parametrize("engine_args_and_prompt",
|
||||||
[(TEXT_ENGINE_ARGS, TEXT_PROMPT),
|
[(TEXT_ENGINE_ARGS, TEXT_PROMPT),
|
||||||
(VISION_ENGINE_ARGS, VISION_PROMPT)])
|
(VISION_ENGINE_ARGS, VISION_PROMPT)])
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_load(monkeypatch: pytest.MonkeyPatch,
|
async def test_load(
|
||||||
output_kind: RequestOutputKind,
|
monkeypatch: pytest.MonkeyPatch,
|
||||||
engine_args: AsyncEngineArgs, prompt: PromptType):
|
output_kind: RequestOutputKind,
|
||||||
|
engine_args_and_prompt: tuple[AsyncEngineArgs, PromptType],
|
||||||
|
):
|
||||||
# TODO(rickyx): Remove monkeypatch once we have a better way to test V1
|
# TODO(rickyx): Remove monkeypatch once we have a better way to test V1
|
||||||
# so that in the future when we switch, we don't have to change all the
|
# so that in the future when we switch, we don't have to change all the
|
||||||
# tests.
|
# tests.
|
||||||
with monkeypatch.context() as m, ExitStack() as after:
|
with monkeypatch.context() as m, ExitStack() as after:
|
||||||
m.setenv("VLLM_USE_V1", "1")
|
m.setenv("VLLM_USE_V1", "1")
|
||||||
|
engine_args, prompt = engine_args_and_prompt
|
||||||
|
|
||||||
engine = AsyncLLM.from_engine_args(engine_args)
|
engine = AsyncLLM.from_engine_args(engine_args)
|
||||||
after.callback(engine.shutdown)
|
after.callback(engine.shutdown)
|
||||||
@@ -121,16 +124,18 @@ async def test_load(monkeypatch: pytest.MonkeyPatch,
|
|||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"output_kind", [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY])
|
"output_kind", [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY])
|
||||||
@pytest.mark.parametrize("engine_args,prompt",
|
@pytest.mark.parametrize("engine_args_and_prompt",
|
||||||
[(TEXT_ENGINE_ARGS, TEXT_PROMPT),
|
[(TEXT_ENGINE_ARGS, TEXT_PROMPT),
|
||||||
(VISION_ENGINE_ARGS, VISION_PROMPT)])
|
(VISION_ENGINE_ARGS, VISION_PROMPT)])
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_abort(monkeypatch: pytest.MonkeyPatch,
|
async def test_abort(monkeypatch: pytest.MonkeyPatch,
|
||||||
output_kind: RequestOutputKind,
|
output_kind: RequestOutputKind,
|
||||||
engine_args: AsyncEngineArgs, prompt: PromptType):
|
engine_args_and_prompt: tuple[AsyncEngineArgs,
|
||||||
|
PromptType]):
|
||||||
|
|
||||||
with monkeypatch.context() as m, ExitStack() as after:
|
with monkeypatch.context() as m, ExitStack() as after:
|
||||||
m.setenv("VLLM_USE_V1", "1")
|
m.setenv("VLLM_USE_V1", "1")
|
||||||
|
engine_args, prompt = engine_args_and_prompt
|
||||||
|
|
||||||
engine = AsyncLLM.from_engine_args(engine_args)
|
engine = AsyncLLM.from_engine_args(engine_args)
|
||||||
after.callback(engine.shutdown)
|
after.callback(engine.shutdown)
|
||||||
@@ -188,15 +193,17 @@ async def test_abort(monkeypatch: pytest.MonkeyPatch,
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("n", [1, 3])
|
@pytest.mark.parametrize("n", [1, 3])
|
||||||
@pytest.mark.parametrize("engine_args,prompt",
|
@pytest.mark.parametrize("engine_args_and_prompt",
|
||||||
[(TEXT_ENGINE_ARGS, TEXT_PROMPT),
|
[(TEXT_ENGINE_ARGS, TEXT_PROMPT),
|
||||||
(VISION_ENGINE_ARGS, VISION_PROMPT)])
|
(VISION_ENGINE_ARGS, VISION_PROMPT)])
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_finished_flag(monkeypatch: pytest.MonkeyPatch, n: int,
|
async def test_finished_flag(monkeypatch, n: int,
|
||||||
engine_args: AsyncEngineArgs, prompt: PromptType):
|
engine_args_and_prompt: tuple[AsyncEngineArgs,
|
||||||
|
PromptType]):
|
||||||
|
|
||||||
with monkeypatch.context() as m, ExitStack() as after:
|
with monkeypatch.context() as m, ExitStack() as after:
|
||||||
m.setenv("VLLM_USE_V1", "1")
|
m.setenv("VLLM_USE_V1", "1")
|
||||||
|
engine_args, prompt = engine_args_and_prompt
|
||||||
|
|
||||||
engine = AsyncLLM.from_engine_args(engine_args)
|
engine = AsyncLLM.from_engine_args(engine_args)
|
||||||
after.callback(engine.shutdown)
|
after.callback(engine.shutdown)
|
||||||
|
|||||||
@@ -50,7 +50,7 @@ def _get_test_sampling_params(
|
|||||||
"""Generate random sampling params for a batch."""
|
"""Generate random sampling params for a batch."""
|
||||||
|
|
||||||
def get_mostly_n_gt1() -> int:
|
def get_mostly_n_gt1() -> int:
|
||||||
r"""Mostly n \in [2,20], ~1/3 n=1"""
|
"""Mostly n \in [2,20], ~1/3 n=1"""
|
||||||
x = random.randint(0, 28)
|
x = random.randint(0, 28)
|
||||||
if x < 10:
|
if x < 10:
|
||||||
return 1
|
return 1
|
||||||
|
|||||||
@@ -6,23 +6,20 @@ import torch
|
|||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
|
||||||
from vllm.v1.sample.metadata import SamplingMetadata
|
from vllm.v1.sample.metadata import SamplingMetadata
|
||||||
from vllm.v1.sample.rejection_sampler import (PLACEHOLDER_TOKEN_ID,
|
from vllm.v1.sample.rejection_sampler import INVALID_TOKEN_ID, RejectionSampler
|
||||||
RejectionSampler)
|
|
||||||
from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
|
|
||||||
|
|
||||||
DEVICE = "cuda"
|
DEVICE = "cpu"
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def rejection_sampler():
|
def sampler():
|
||||||
return RejectionSampler()
|
return RejectionSampler()
|
||||||
|
|
||||||
|
|
||||||
def create_logits_tensor(output_token_ids: list[list[int]],
|
def create_logits_tensor(token_ids: list[list[int]],
|
||||||
vocab_size: int = 100) -> torch.Tensor:
|
vocab_size: int = 100) -> torch.Tensor:
|
||||||
"""Helper function to create logits tensor that
|
"""Helper function to create logits tensor that
|
||||||
will produce desired token ids on argmax"""
|
will produce desired token ids on argmax"""
|
||||||
token_ids = [tokens[:-1] for tokens in output_token_ids]
|
|
||||||
num_total_tokens = sum(len(tokens) for tokens in token_ids)
|
num_total_tokens = sum(len(tokens) for tokens in token_ids)
|
||||||
logits = torch.full((num_total_tokens, vocab_size), -100.0, device=DEVICE)
|
logits = torch.full((num_total_tokens, vocab_size), -100.0, device=DEVICE)
|
||||||
start_loc = 0
|
start_loc = 0
|
||||||
@@ -34,22 +31,15 @@ def create_logits_tensor(output_token_ids: list[list[int]],
|
|||||||
|
|
||||||
|
|
||||||
def create_sampling_metadata(
|
def create_sampling_metadata(
|
||||||
all_greedy: bool,
|
all_greedy: bool,
|
||||||
temperature: Optional[torch.Tensor] = None,
|
generators: Optional[dict[int, Any]] = None) -> SamplingMetadata:
|
||||||
generators: Optional[dict[int, Any]] = None,
|
|
||||||
) -> SamplingMetadata:
|
|
||||||
"""Create a v1 sampling metadata object with all_greedy set
|
"""Create a v1 sampling metadata object with all_greedy set
|
||||||
to the given value. Either all greedy or all random sampling
|
to the given value. Either all greedy or all random sampling
|
||||||
is used.
|
is used.
|
||||||
"""
|
"""
|
||||||
generators = generators or {}
|
generators = generators or {}
|
||||||
if all_greedy:
|
|
||||||
temperature = None
|
|
||||||
else:
|
|
||||||
assert temperature is not None
|
|
||||||
|
|
||||||
return SamplingMetadata(
|
return SamplingMetadata(
|
||||||
temperature=temperature,
|
temperature=torch.tensor([]),
|
||||||
all_greedy=all_greedy,
|
all_greedy=all_greedy,
|
||||||
all_random=not all_greedy,
|
all_random=not all_greedy,
|
||||||
top_p=None,
|
top_p=None,
|
||||||
@@ -71,7 +61,7 @@ def create_sampling_metadata(
|
|||||||
|
|
||||||
|
|
||||||
########################### Tests for Greedy Sampling ###################
|
########################### Tests for Greedy Sampling ###################
|
||||||
def test_perfect_match(rejection_sampler):
|
def test_perfect_match(sampler):
|
||||||
"""Test when output tokens perfectly match speculated tokens"""
|
"""Test when output tokens perfectly match speculated tokens"""
|
||||||
spec_tokens = [[1, 2, 3]]
|
spec_tokens = [[1, 2, 3]]
|
||||||
output_tokens = [[1, 2, 3, 4]] # 4 is the bonus token
|
output_tokens = [[1, 2, 3, 4]] # 4 is the bonus token
|
||||||
@@ -80,23 +70,15 @@ def test_perfect_match(rejection_sampler):
|
|||||||
logits = create_logits_tensor(output_tokens)
|
logits = create_logits_tensor(output_tokens)
|
||||||
bonus_token_tensor = torch.tensor([output_tokens[0][-1]],
|
bonus_token_tensor = torch.tensor([output_tokens[0][-1]],
|
||||||
device=logits.device)
|
device=logits.device)
|
||||||
spec_decode_metadata = SpecDecodeMetadata.make_dummy(spec_tokens,
|
|
||||||
device=logits.device)
|
|
||||||
|
|
||||||
output = rejection_sampler(
|
output = sampler(spec_tokens, None, bonus_token_tensor, logits, metadata)
|
||||||
spec_decode_metadata,
|
|
||||||
draft_probs=None,
|
|
||||||
target_logits=logits,
|
|
||||||
bonus_token_ids=bonus_token_tensor,
|
|
||||||
sampling_metadata=metadata,
|
|
||||||
)
|
|
||||||
expected = torch.tensor([[1, 2, 3, 4]],
|
expected = torch.tensor([[1, 2, 3, 4]],
|
||||||
dtype=torch.int,
|
dtype=torch.int,
|
||||||
device=logits.device)
|
device=logits.device)
|
||||||
assert torch.equal(output, expected)
|
assert torch.equal(output, expected)
|
||||||
|
|
||||||
|
|
||||||
def test_early_mismatch(rejection_sampler):
|
def test_early_mismatch(sampler):
|
||||||
"""Test when there's an early mismatch in tokens"""
|
"""Test when there's an early mismatch in tokens"""
|
||||||
spec_tokens = [[1, 2, 3]]
|
spec_tokens = [[1, 2, 3]]
|
||||||
output_tokens = [[1, 5, 3, 4]] # Mismatch at position 1
|
output_tokens = [[1, 5, 3, 4]] # Mismatch at position 1
|
||||||
@@ -105,25 +87,15 @@ def test_early_mismatch(rejection_sampler):
|
|||||||
logits = create_logits_tensor(output_tokens)
|
logits = create_logits_tensor(output_tokens)
|
||||||
bonus_token_tensor = torch.tensor([output_tokens[0][-1]],
|
bonus_token_tensor = torch.tensor([output_tokens[0][-1]],
|
||||||
device=logits.device)
|
device=logits.device)
|
||||||
spec_decode_metadata = SpecDecodeMetadata.make_dummy(spec_tokens,
|
|
||||||
device=logits.device)
|
|
||||||
|
|
||||||
output = rejection_sampler(
|
output = sampler(spec_tokens, None, bonus_token_tensor, logits, metadata)
|
||||||
spec_decode_metadata,
|
expected = torch.tensor([[1, 5, INVALID_TOKEN_ID, INVALID_TOKEN_ID]],
|
||||||
draft_probs=None,
|
dtype=torch.int,
|
||||||
target_logits=logits,
|
device=logits.device)
|
||||||
bonus_token_ids=bonus_token_tensor,
|
|
||||||
sampling_metadata=metadata,
|
|
||||||
)
|
|
||||||
expected = torch.tensor(
|
|
||||||
[[1, 5, PLACEHOLDER_TOKEN_ID, PLACEHOLDER_TOKEN_ID]],
|
|
||||||
dtype=torch.int,
|
|
||||||
device=logits.device,
|
|
||||||
)
|
|
||||||
assert torch.equal(output, expected)
|
assert torch.equal(output, expected)
|
||||||
|
|
||||||
|
|
||||||
def test_multiple_sequences(rejection_sampler):
|
def test_multiple_sequences(sampler):
|
||||||
"""Test handling multiple sequences of speculated tokens"""
|
"""Test handling multiple sequences of speculated tokens"""
|
||||||
spec_tokens = [[1, 2], [3]]
|
spec_tokens = [[1, 2], [3]]
|
||||||
output_tokens = [[1, 2, 5], [3,
|
output_tokens = [[1, 2, 5], [3,
|
||||||
@@ -133,23 +105,15 @@ def test_multiple_sequences(rejection_sampler):
|
|||||||
logits = create_logits_tensor(output_tokens)
|
logits = create_logits_tensor(output_tokens)
|
||||||
bonus_token_tensor = torch.tensor(
|
bonus_token_tensor = torch.tensor(
|
||||||
[output_tokens[0][-1], output_tokens[1][-1]], device=logits.device)
|
[output_tokens[0][-1], output_tokens[1][-1]], device=logits.device)
|
||||||
spec_decode_metadata = SpecDecodeMetadata.make_dummy(spec_tokens,
|
|
||||||
device=logits.device)
|
|
||||||
|
|
||||||
output = rejection_sampler(
|
output = sampler(spec_tokens, None, bonus_token_tensor, logits, metadata)
|
||||||
spec_decode_metadata,
|
expected = torch.tensor([[1, 2, 5], [3, 4, INVALID_TOKEN_ID]],
|
||||||
draft_probs=None,
|
|
||||||
target_logits=logits,
|
|
||||||
bonus_token_ids=bonus_token_tensor,
|
|
||||||
sampling_metadata=metadata,
|
|
||||||
)
|
|
||||||
expected = torch.tensor([[1, 2, 5], [3, 4, PLACEHOLDER_TOKEN_ID]],
|
|
||||||
dtype=torch.int,
|
dtype=torch.int,
|
||||||
device=logits.device)
|
device=logits.device)
|
||||||
assert torch.equal(output, expected)
|
assert torch.equal(output, expected)
|
||||||
|
|
||||||
|
|
||||||
def test_single_token_sequence(rejection_sampler):
|
def test_single_token_sequence(sampler):
|
||||||
"""Test handling sequences with single token"""
|
"""Test handling sequences with single token"""
|
||||||
spec_tokens = [[1]]
|
spec_tokens = [[1]]
|
||||||
output_tokens = [[1, 2]] # Single token with bonus token 2
|
output_tokens = [[1, 2]] # Single token with bonus token 2
|
||||||
@@ -158,21 +122,13 @@ def test_single_token_sequence(rejection_sampler):
|
|||||||
logits = create_logits_tensor(output_tokens)
|
logits = create_logits_tensor(output_tokens)
|
||||||
bonus_token_tensor = torch.tensor([output_tokens[0][-1]],
|
bonus_token_tensor = torch.tensor([output_tokens[0][-1]],
|
||||||
device=logits.device)
|
device=logits.device)
|
||||||
spec_decode_metadata = SpecDecodeMetadata.make_dummy(spec_tokens,
|
|
||||||
device=logits.device)
|
|
||||||
|
|
||||||
output = rejection_sampler(
|
output = sampler(spec_tokens, None, bonus_token_tensor, logits, metadata)
|
||||||
spec_decode_metadata,
|
|
||||||
draft_probs=None,
|
|
||||||
target_logits=logits,
|
|
||||||
bonus_token_ids=bonus_token_tensor,
|
|
||||||
sampling_metadata=metadata,
|
|
||||||
)
|
|
||||||
expected = torch.tensor([[1, 2]], dtype=torch.int, device=logits.device)
|
expected = torch.tensor([[1, 2]], dtype=torch.int, device=logits.device)
|
||||||
assert torch.equal(output, expected)
|
assert torch.equal(output, expected)
|
||||||
|
|
||||||
|
|
||||||
def test_empty_sequence(rejection_sampler):
|
def test_empty_sequence(sampler):
|
||||||
"""Test handling empty sequence of speculated tokens"""
|
"""Test handling empty sequence of speculated tokens"""
|
||||||
spec_tokens: list[list[int]] = [[]]
|
spec_tokens: list[list[int]] = [[]]
|
||||||
output_tokens = [[5]] # Just the bonus token
|
output_tokens = [[5]] # Just the bonus token
|
||||||
@@ -181,21 +137,13 @@ def test_empty_sequence(rejection_sampler):
|
|||||||
logits = create_logits_tensor(output_tokens)
|
logits = create_logits_tensor(output_tokens)
|
||||||
bonus_token_tensor = torch.tensor([output_tokens[0][-1]],
|
bonus_token_tensor = torch.tensor([output_tokens[0][-1]],
|
||||||
device=logits.device)
|
device=logits.device)
|
||||||
spec_decode_metadata = SpecDecodeMetadata.make_dummy(spec_tokens,
|
|
||||||
device=logits.device)
|
|
||||||
|
|
||||||
output = rejection_sampler(
|
output = sampler(spec_tokens, None, bonus_token_tensor, logits, metadata)
|
||||||
spec_decode_metadata,
|
|
||||||
draft_probs=None,
|
|
||||||
target_logits=logits,
|
|
||||||
bonus_token_ids=bonus_token_tensor,
|
|
||||||
sampling_metadata=metadata,
|
|
||||||
)
|
|
||||||
expected = torch.tensor([[5]], dtype=torch.int, device=logits.device)
|
expected = torch.tensor([[5]], dtype=torch.int, device=logits.device)
|
||||||
assert torch.equal(output, expected)
|
assert torch.equal(output, expected)
|
||||||
|
|
||||||
|
|
||||||
def test_multiple_mismatches(rejection_sampler):
|
def test_multiple_mismatches(sampler):
|
||||||
"""Test handling multiple sequences with mismatches"""
|
"""Test handling multiple sequences with mismatches"""
|
||||||
spec_tokens = [[1, 2, 3], [4, 5, 6]]
|
spec_tokens = [[1, 2, 3], [4, 5, 6]]
|
||||||
output_tokens = [[1, 2, 7, 6], [4, 8, 6,
|
output_tokens = [[1, 2, 7, 6], [4, 8, 6,
|
||||||
@@ -205,22 +153,12 @@ def test_multiple_mismatches(rejection_sampler):
|
|||||||
logits = create_logits_tensor(output_tokens)
|
logits = create_logits_tensor(output_tokens)
|
||||||
bonus_token_tensor = torch.tensor(
|
bonus_token_tensor = torch.tensor(
|
||||||
[output_tokens[0][-1], output_tokens[1][-1]], device=logits.device)
|
[output_tokens[0][-1], output_tokens[1][-1]], device=logits.device)
|
||||||
spec_decode_metadata = SpecDecodeMetadata.make_dummy(spec_tokens,
|
|
||||||
device=logits.device)
|
|
||||||
|
|
||||||
output = rejection_sampler(
|
output = sampler(spec_tokens, None, bonus_token_tensor, logits, metadata)
|
||||||
spec_decode_metadata,
|
expected = torch.tensor([[1, 2, 7, INVALID_TOKEN_ID],
|
||||||
draft_probs=None,
|
[4, 8, INVALID_TOKEN_ID, INVALID_TOKEN_ID]],
|
||||||
target_logits=logits,
|
dtype=torch.int,
|
||||||
bonus_token_ids=bonus_token_tensor,
|
device=logits.device)
|
||||||
sampling_metadata=metadata,
|
|
||||||
)
|
|
||||||
expected = torch.tensor(
|
|
||||||
[[1, 2, 7, PLACEHOLDER_TOKEN_ID],
|
|
||||||
[4, 8, PLACEHOLDER_TOKEN_ID, PLACEHOLDER_TOKEN_ID]],
|
|
||||||
dtype=torch.int,
|
|
||||||
device=logits.device,
|
|
||||||
)
|
|
||||||
assert torch.equal(output, expected)
|
assert torch.equal(output, expected)
|
||||||
|
|
||||||
|
|
||||||
@@ -228,27 +166,18 @@ def test_multiple_mismatches(rejection_sampler):
|
|||||||
"spec_tokens,output_tokens,expected",
|
"spec_tokens,output_tokens,expected",
|
||||||
[
|
[
|
||||||
([[1, 2]], [[1, 2, 3]], [[1, 2, 3]]), # Perfect match with bonus
|
([[1, 2]], [[1, 2, 3]], [[1, 2, 3]]), # Perfect match with bonus
|
||||||
([[1]], [[2, 3]], [[2, PLACEHOLDER_TOKEN_ID]]), # First mismatch
|
([[1]], [[2, 3]], [[2, INVALID_TOKEN_ID]]), # First mismatch
|
||||||
([[1, 2], [3, 4]], [[1, 5, 6], [3, 4, 7]],
|
([[1, 2], [3, 4]], [[1, 5, 6], [3, 4, 7]],
|
||||||
[[1, 5, PLACEHOLDER_TOKEN_ID], [3, 4, 7]]), # Mixed matches
|
[[1, 5, INVALID_TOKEN_ID], [3, 4, 7]]), # Mixed matches
|
||||||
])
|
])
|
||||||
def test_parametrized_cases(rejection_sampler, spec_tokens, output_tokens,
|
def test_parametrized_cases(sampler, spec_tokens, output_tokens, expected):
|
||||||
expected):
|
|
||||||
"""Parametrized test for various matching scenarios"""
|
"""Parametrized test for various matching scenarios"""
|
||||||
metadata = create_sampling_metadata(all_greedy=True)
|
metadata = create_sampling_metadata(all_greedy=True)
|
||||||
logits = create_logits_tensor(output_tokens)
|
logits = create_logits_tensor(output_tokens)
|
||||||
bonus_token_tensor = torch.tensor([tokens[-1] for tokens in output_tokens],
|
bonus_token_tensor = torch.tensor([tokens[-1] for tokens in output_tokens],
|
||||||
device=logits.device)
|
device=logits.device)
|
||||||
spec_decode_metadata = SpecDecodeMetadata.make_dummy(spec_tokens,
|
|
||||||
device=logits.device)
|
|
||||||
|
|
||||||
output = rejection_sampler(
|
output = sampler(spec_tokens, None, bonus_token_tensor, logits, metadata)
|
||||||
spec_decode_metadata,
|
|
||||||
draft_probs=None,
|
|
||||||
target_logits=logits,
|
|
||||||
bonus_token_ids=bonus_token_tensor,
|
|
||||||
sampling_metadata=metadata,
|
|
||||||
)
|
|
||||||
expected_tensor = torch.tensor(expected,
|
expected_tensor = torch.tensor(expected,
|
||||||
dtype=torch.int,
|
dtype=torch.int,
|
||||||
device=logits.device)
|
device=logits.device)
|
||||||
@@ -261,31 +190,21 @@ def test_parametrized_cases(rejection_sampler, spec_tokens, output_tokens,
|
|||||||
@pytest.mark.parametrize("batch_size", [1, 4, 8])
|
@pytest.mark.parametrize("batch_size", [1, 4, 8])
|
||||||
@pytest.mark.parametrize("frac_seeded", [0.0, 0.5])
|
@pytest.mark.parametrize("frac_seeded", [0.0, 0.5])
|
||||||
@pytest.mark.parametrize("n_rep", [20])
|
@pytest.mark.parametrize("n_rep", [20])
|
||||||
def test_deterministic_when_seeded(
|
def test_deterministic_when_seeded(sampler, k: int, vocab_size: int,
|
||||||
rejection_sampler,
|
batch_size: int, frac_seeded: float,
|
||||||
k: int,
|
n_rep: int):
|
||||||
vocab_size: int,
|
draft_probs = torch.rand(batch_size, k, vocab_size, dtype=torch.float32)
|
||||||
batch_size: int,
|
target_probs = torch.rand(batch_size * (k + 1),
|
||||||
frac_seeded: float,
|
vocab_size,
|
||||||
n_rep: int,
|
dtype=torch.float32)
|
||||||
):
|
|
||||||
num_tokens = batch_size * k
|
|
||||||
draft_probs = torch.rand(num_tokens,
|
|
||||||
vocab_size,
|
|
||||||
dtype=torch.float32,
|
|
||||||
device=DEVICE)
|
|
||||||
draft_probs = F.softmax(draft_probs, dim=-1)
|
|
||||||
target_logits = torch.rand_like(draft_probs)
|
|
||||||
bonus_token_ids = torch.randint(low=0,
|
bonus_token_ids = torch.randint(low=0,
|
||||||
high=vocab_size,
|
high=vocab_size,
|
||||||
size=(batch_size, 1),
|
size=(batch_size, 1),
|
||||||
dtype=torch.int64,
|
dtype=torch.int64)
|
||||||
device=DEVICE)
|
|
||||||
draft_token_ids = torch.randint(low=0,
|
draft_token_ids = torch.randint(low=0,
|
||||||
high=vocab_size,
|
high=vocab_size,
|
||||||
size=(batch_size, k),
|
size=(batch_size, k),
|
||||||
dtype=torch.int64,
|
dtype=torch.int64)
|
||||||
device=DEVICE)
|
|
||||||
|
|
||||||
seeded_mask = torch.rand(batch_size, dtype=torch.float32) <= frac_seeded
|
seeded_mask = torch.rand(batch_size, dtype=torch.float32) <= frac_seeded
|
||||||
|
|
||||||
@@ -296,21 +215,10 @@ def test_deterministic_when_seeded(
|
|||||||
for i in range(batch_size) if seeded_mask[i]
|
for i in range(batch_size) if seeded_mask[i]
|
||||||
}
|
}
|
||||||
|
|
||||||
temperature = torch.ones(batch_size,
|
|
||||||
dtype=torch.float32,
|
|
||||||
device=DEVICE)
|
|
||||||
sampling_metadata = create_sampling_metadata(all_greedy=False,
|
sampling_metadata = create_sampling_metadata(all_greedy=False,
|
||||||
temperature=temperature,
|
|
||||||
generators=seeded_seqs)
|
generators=seeded_seqs)
|
||||||
spec_decode_metadata = SpecDecodeMetadata.make_dummy(
|
rep_result = sampler(draft_token_ids.tolist(), draft_probs,
|
||||||
draft_token_ids.tolist(), device=DEVICE)
|
bonus_token_ids, target_probs, sampling_metadata)
|
||||||
rep_result = rejection_sampler(
|
|
||||||
spec_decode_metadata,
|
|
||||||
draft_probs=draft_probs,
|
|
||||||
target_logits=target_logits,
|
|
||||||
bonus_token_ids=bonus_token_ids,
|
|
||||||
sampling_metadata=sampling_metadata,
|
|
||||||
)
|
|
||||||
|
|
||||||
results.append(rep_result)
|
results.append(rep_result)
|
||||||
|
|
||||||
@@ -349,10 +257,10 @@ def test_rejection_sampling_approximates_target_distribution():
|
|||||||
num_reference_probs = 100
|
num_reference_probs = 100
|
||||||
|
|
||||||
# Prepare draft, target, and reference probability distributions
|
# Prepare draft, target, and reference probability distributions
|
||||||
draft_probs = F.softmax(torch.rand(vocab_size, dtype=torch.float32),
|
draft_probs, target_probs = (F.softmax(
|
||||||
dim=-1)
|
torch.rand(vocab_size, dtype=torch.float32),
|
||||||
target_logits = torch.rand(vocab_size, dtype=torch.float32)
|
dim=-1,
|
||||||
target_probs = F.softmax(target_logits, dim=-1)
|
) for _ in range(2))
|
||||||
reference_probs = F.softmax(
|
reference_probs = F.softmax(
|
||||||
torch.rand(num_reference_probs, vocab_size, dtype=torch.float32),
|
torch.rand(num_reference_probs, vocab_size, dtype=torch.float32),
|
||||||
dim=-1,
|
dim=-1,
|
||||||
@@ -365,7 +273,7 @@ def test_rejection_sampling_approximates_target_distribution():
|
|||||||
for num_samples in sample_sizes:
|
for num_samples in sample_sizes:
|
||||||
# Sample using rejection sampling.
|
# Sample using rejection sampling.
|
||||||
rej_sample_probs = estimate_rejection_sampling_pdf(
|
rej_sample_probs = estimate_rejection_sampling_pdf(
|
||||||
draft_probs, target_logits, k, vocab_size, num_samples)
|
draft_probs, target_probs, k, vocab_size, num_samples)
|
||||||
rej_sample_probs = rej_sample_probs.to(DEVICE)
|
rej_sample_probs = rej_sample_probs.to(DEVICE)
|
||||||
|
|
||||||
# Average distance from reference probs.
|
# Average distance from reference probs.
|
||||||
@@ -405,7 +313,7 @@ def get_ratio_first_to_last(elements: list[float]) -> float:
|
|||||||
|
|
||||||
def estimate_rejection_sampling_pdf(
|
def estimate_rejection_sampling_pdf(
|
||||||
draft_probs: torch.Tensor,
|
draft_probs: torch.Tensor,
|
||||||
target_logits: torch.Tensor,
|
target_probs: torch.Tensor,
|
||||||
k: int,
|
k: int,
|
||||||
vocab_size: int,
|
vocab_size: int,
|
||||||
num_samples: int,
|
num_samples: int,
|
||||||
@@ -415,44 +323,35 @@ def estimate_rejection_sampling_pdf(
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
draft_probs: Draft probability distribution.
|
draft_probs: Draft probability distribution.
|
||||||
target_logits: Target logits.
|
target_probs: Target probability distribution.
|
||||||
num_samples: Number of samples to draw.
|
num_samples: Number of samples to draw.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Estimated probability distribution of the output tokens.
|
Estimated probability distribution of the output tokens.
|
||||||
"""
|
"""
|
||||||
rejection_sampler = RejectionSampler()
|
sampler = RejectionSampler()
|
||||||
num_tokens = num_samples * k
|
# Repeat draft probs num_samples times.
|
||||||
# Repeat draft probs num_samples * k times.
|
|
||||||
draft_probs = draft_probs.reshape(1, 1,
|
draft_probs = draft_probs.reshape(1, 1,
|
||||||
vocab_size).repeat(num_samples, k, 1)
|
vocab_size).repeat(num_samples, k, 1)
|
||||||
|
|
||||||
# Repeat target probs num_tokens times.
|
# Repeat target probs num_samples * (k + 1) times.
|
||||||
target_logits = target_logits.reshape(1, vocab_size).repeat(num_tokens, 1)
|
target_probs = target_probs.reshape(1, 1, vocab_size).repeat(
|
||||||
|
num_samples, k + 1, 1).reshape(num_samples * (k + 1), vocab_size)
|
||||||
|
|
||||||
# Randomly sample draft token ids from draft probs.
|
# Randomly sample draft token ids from draft probs.
|
||||||
draft_token_ids = torch.multinomial(draft_probs[:, 0, :],
|
draft_token_ids = torch.multinomial(draft_probs[:, 0, :],
|
||||||
num_samples=k,
|
num_samples=k,
|
||||||
replacement=True).reshape(
|
replacement=True).reshape(
|
||||||
num_samples, k)
|
num_samples, k)
|
||||||
draft_probs = draft_probs.view(num_tokens, vocab_size)
|
|
||||||
|
|
||||||
# Bonus tokens not used but required.
|
# Bonus tokens not used but required.
|
||||||
bonus_token_ids = torch.zeros((1, 1), dtype=torch.int64,
|
bonus_token_ids = torch.zeros((1, 1), dtype=torch.int64,
|
||||||
device=DEVICE).repeat(num_samples, 1)
|
device=DEVICE).repeat(num_samples, 1)
|
||||||
|
|
||||||
temperature = torch.ones(num_samples, dtype=torch.float32, device=DEVICE)
|
sampling_metadata = create_sampling_metadata(all_greedy=False)
|
||||||
sampling_metadata = create_sampling_metadata(all_greedy=False,
|
output_token_ids = sampler(draft_token_ids.tolist(), draft_probs,
|
||||||
temperature=temperature)
|
bonus_token_ids, target_probs,
|
||||||
spec_decode_metadata = SpecDecodeMetadata.make_dummy(
|
sampling_metadata)
|
||||||
draft_token_ids.tolist(), device=bonus_token_ids.device)
|
|
||||||
output_token_ids = rejection_sampler(
|
|
||||||
spec_decode_metadata,
|
|
||||||
draft_probs=draft_probs,
|
|
||||||
target_logits=target_logits,
|
|
||||||
bonus_token_ids=bonus_token_ids,
|
|
||||||
sampling_metadata=sampling_metadata,
|
|
||||||
)
|
|
||||||
output_token_ids = output_token_ids[:, :-1].flatten()
|
output_token_ids = output_token_ids[:, :-1].flatten()
|
||||||
|
|
||||||
hist = torch.histogram(output_token_ids.to(dtype=torch.float,
|
hist = torch.histogram(output_token_ids.to(dtype=torch.float,
|
||||||
|
|||||||
@@ -15,10 +15,9 @@ if TYPE_CHECKING:
|
|||||||
from tests.conftest import VllmRunner
|
from tests.conftest import VllmRunner
|
||||||
|
|
||||||
MODELS = [
|
MODELS = [
|
||||||
"Qwen/Qwen2.5-1.5B-Instruct",
|
|
||||||
# TODO: Enable this models with v6e
|
|
||||||
# "Qwen/Qwen2-7B-Instruct",
|
# "Qwen/Qwen2-7B-Instruct",
|
||||||
# "meta-llama/Llama-3.1-8B",
|
"meta-llama/Llama-3.1-8B",
|
||||||
|
# TODO: Add models here as necessary
|
||||||
]
|
]
|
||||||
|
|
||||||
TENSOR_PARALLEL_SIZES = [1]
|
TENSOR_PARALLEL_SIZES = [1]
|
||||||
|
|||||||
@@ -347,7 +347,7 @@ class ModelConfig:
|
|||||||
self.encoder_config = self._get_encoder_config()
|
self.encoder_config = self._get_encoder_config()
|
||||||
self.hf_image_processor_config = get_hf_image_processor_config(
|
self.hf_image_processor_config = get_hf_image_processor_config(
|
||||||
self.model, revision)
|
self.model, revision)
|
||||||
self.dtype = _get_and_verify_dtype(self.hf_config, dtype)
|
self.dtype = _get_and_verify_dtype(self.hf_text_config, dtype)
|
||||||
self.use_async_output_proc = use_async_output_proc
|
self.use_async_output_proc = use_async_output_proc
|
||||||
self.mm_processor_kwargs = mm_processor_kwargs
|
self.mm_processor_kwargs = mm_processor_kwargs
|
||||||
self.disable_mm_preprocessor_cache = disable_mm_preprocessor_cache
|
self.disable_mm_preprocessor_cache = disable_mm_preprocessor_cache
|
||||||
@@ -2526,14 +2526,6 @@ def _get_and_verify_dtype(
|
|||||||
# NOTE: getattr(config, "torch_dtype", torch.float32) is not correct
|
# NOTE: getattr(config, "torch_dtype", torch.float32) is not correct
|
||||||
# because config.torch_dtype can be None.
|
# because config.torch_dtype can be None.
|
||||||
config_dtype = getattr(config, "torch_dtype", None)
|
config_dtype = getattr(config, "torch_dtype", None)
|
||||||
|
|
||||||
# Fallbacks for multi-modal models if the root config
|
|
||||||
# does not define torch_dtype
|
|
||||||
if config_dtype is None and hasattr(config, "text_config"):
|
|
||||||
config_dtype = getattr(config.text_config, "torch_dtype", None)
|
|
||||||
if config_dtype is None and hasattr(config, "vision_config"):
|
|
||||||
config_dtype = getattr(config.vision_config, "torch_dtype", None)
|
|
||||||
|
|
||||||
if config_dtype is None:
|
if config_dtype is None:
|
||||||
config_dtype = torch.float32
|
config_dtype = torch.float32
|
||||||
|
|
||||||
@@ -2541,8 +2533,16 @@ def _get_and_verify_dtype(
|
|||||||
dtype = dtype.lower()
|
dtype = dtype.lower()
|
||||||
if dtype == "auto":
|
if dtype == "auto":
|
||||||
if config_dtype == torch.float32:
|
if config_dtype == torch.float32:
|
||||||
# Following common practice, we use float16 for float32 models
|
if config.model_type in ("gemma2", "gemma3", "gemma3_text"):
|
||||||
torch_dtype = torch.float16
|
logger.info(
|
||||||
|
"For Gemma 2 and 3, we downcast float32 to bfloat16 "
|
||||||
|
"instead of float16 by default. Please specify `dtype` "
|
||||||
|
"if you want to use float16.")
|
||||||
|
torch_dtype = torch.bfloat16
|
||||||
|
else:
|
||||||
|
# Following the common practice, we use float16 for float32
|
||||||
|
# models.
|
||||||
|
torch_dtype = torch.float16
|
||||||
else:
|
else:
|
||||||
torch_dtype = config_dtype
|
torch_dtype = config_dtype
|
||||||
|
|
||||||
|
|||||||
@@ -1469,12 +1469,8 @@ class EngineArgs:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
# Need at least Ampere for now (FA support required).
|
# Need at least Ampere for now (FA support required).
|
||||||
# Skip this check if we are running on a non-GPU platform,
|
|
||||||
# or if the device capability is not available
|
|
||||||
# (e.g. in a Ray actor without GPUs).
|
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
if (current_platform.is_cuda()
|
if (current_platform.is_cuda()
|
||||||
and current_platform.get_device_capability()
|
|
||||||
and current_platform.get_device_capability().major < 8):
|
and current_platform.get_device_capability().major < 8):
|
||||||
_raise_or_fallback(feature_name="Compute Capability < 8.0",
|
_raise_or_fallback(feature_name="Compute Capability < 8.0",
|
||||||
recommend_to_remove=False)
|
recommend_to_remove=False)
|
||||||
@@ -1578,13 +1574,6 @@ class EngineArgs:
|
|||||||
_raise_or_fallback(feature_name=name, recommend_to_remove=True)
|
_raise_or_fallback(feature_name=name, recommend_to_remove=True)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# No support for device type other than CUDA, AMD (experiemntal) or
|
|
||||||
# TPU (experimental) so far.
|
|
||||||
if not (current_platform.is_cuda_alike() or current_platform.is_tpu()):
|
|
||||||
_raise_or_fallback(
|
|
||||||
feature_name=f"device type={current_platform.device_type}",
|
|
||||||
recommend_to_remove=False)
|
|
||||||
return False
|
|
||||||
#############################################################
|
#############################################################
|
||||||
# Experimental Features - allow users to opt in.
|
# Experimental Features - allow users to opt in.
|
||||||
|
|
||||||
|
|||||||
@@ -548,7 +548,7 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
|||||||
if top_logprobs < 0:
|
if top_logprobs < 0:
|
||||||
raise ValueError("`top_logprobs` must be a positive value.")
|
raise ValueError("`top_logprobs` must be a positive value.")
|
||||||
|
|
||||||
if top_logprobs > 0 and not data.get("logprobs"):
|
if not data.get("logprobs"):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"when using `top_logprobs`, `logprobs` must be set to true."
|
"when using `top_logprobs`, `logprobs` must be set to true."
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ if TYPE_CHECKING:
|
|||||||
VLLM_TRACE_FUNCTION: int = 0
|
VLLM_TRACE_FUNCTION: int = 0
|
||||||
VLLM_ATTENTION_BACKEND: Optional[str] = None
|
VLLM_ATTENTION_BACKEND: Optional[str] = None
|
||||||
VLLM_USE_FLASHINFER_SAMPLER: Optional[bool] = None
|
VLLM_USE_FLASHINFER_SAMPLER: Optional[bool] = None
|
||||||
|
VLLM_USE_FLASHINFER_REJECTION_SAMPLER: bool = False
|
||||||
VLLM_FLASHINFER_FORCE_TENSOR_CORES: bool = False
|
VLLM_FLASHINFER_FORCE_TENSOR_CORES: bool = False
|
||||||
VLLM_PP_LAYER_PARTITION: Optional[str] = None
|
VLLM_PP_LAYER_PARTITION: Optional[str] = None
|
||||||
VLLM_CPU_KVCACHE_SPACE: int = 0
|
VLLM_CPU_KVCACHE_SPACE: int = 0
|
||||||
|
|||||||
@@ -16,8 +16,12 @@ import torch
|
|||||||
|
|
||||||
from vllm.config import VllmConfig
|
from vllm.config import VllmConfig
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
|
from vllm.triton_utils.importing import HAS_TRITON
|
||||||
from vllm.utils import _check_multiproc_method, get_mp_context, run_method
|
from vllm.utils import _check_multiproc_method, get_mp_context, run_method
|
||||||
|
|
||||||
|
if HAS_TRITON:
|
||||||
|
from vllm.triton_utils import maybe_set_triton_cache_manager
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
T = TypeVar('T')
|
T = TypeVar('T')
|
||||||
@@ -310,3 +314,7 @@ def set_multiprocessing_worker_envs(parallel_config):
|
|||||||
current_parallelism, default_omp_num_threads)
|
current_parallelism, default_omp_num_threads)
|
||||||
os.environ["OMP_NUM_THREADS"] = str(default_omp_num_threads)
|
os.environ["OMP_NUM_THREADS"] = str(default_omp_num_threads)
|
||||||
torch.set_num_threads(default_omp_num_threads)
|
torch.set_num_threads(default_omp_num_threads)
|
||||||
|
|
||||||
|
# workaround for https://github.com/vllm-project/vllm/issues/6103
|
||||||
|
if HAS_TRITON and parallel_config.world_size > 1:
|
||||||
|
maybe_set_triton_cache_manager()
|
||||||
|
|||||||
@@ -30,7 +30,6 @@ from vllm.lora.utils import (from_layer, from_layer_logits_processor,
|
|||||||
is_regex_target_modules,
|
is_regex_target_modules,
|
||||||
parse_fine_tuned_lora_name, replace_submodule)
|
parse_fine_tuned_lora_name, replace_submodule)
|
||||||
from vllm.model_executor.models import SupportsLoRA, supports_multimodal
|
from vllm.model_executor.models import SupportsLoRA, supports_multimodal
|
||||||
from vllm.model_executor.models.interfaces import is_pooling_model
|
|
||||||
from vllm.model_executor.models.module_mapping import MultiModelKeys
|
from vllm.model_executor.models.module_mapping import MultiModelKeys
|
||||||
from vllm.model_executor.models.utils import PPMissingLayer, WeightsMapper
|
from vllm.model_executor.models.utils import PPMissingLayer, WeightsMapper
|
||||||
from vllm.utils import is_pin_memory_available
|
from vllm.utils import is_pin_memory_available
|
||||||
@@ -105,9 +104,6 @@ class LoRAModel(AdapterModel):
|
|||||||
"""Get LoRA for a given module by name"""
|
"""Get LoRA for a given module by name"""
|
||||||
return self.loras.get(module_name, None)
|
return self.loras.get(module_name, None)
|
||||||
|
|
||||||
def check_lora_name(self, lora_name: str) -> bool:
|
|
||||||
return lora_name in self.loras
|
|
||||||
|
|
||||||
# (yard1): TODO see if we can derive target_embedding_padding automatically
|
# (yard1): TODO see if we can derive target_embedding_padding automatically
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_lora_tensors(
|
def from_lora_tensors(
|
||||||
@@ -339,7 +335,6 @@ class LoRAModelManager(AdapterModelManager):
|
|||||||
# Used for long context lora.
|
# Used for long context lora.
|
||||||
self.scaling_factor_to_offset: Dict[float, int] = {}
|
self.scaling_factor_to_offset: Dict[float, int] = {}
|
||||||
super().__init__(model)
|
super().__init__(model)
|
||||||
|
|
||||||
self.supported_lora_modules = get_supported_lora_modules(self.model)
|
self.supported_lora_modules = get_supported_lora_modules(self.model)
|
||||||
assert self.supported_lora_modules, "No supported LoRA modules found in"
|
assert self.supported_lora_modules, "No supported LoRA modules found in"
|
||||||
f"{self.model.__class__.__name__}."
|
f"{self.model.__class__.__name__}."
|
||||||
@@ -355,7 +350,6 @@ class LoRAModelManager(AdapterModelManager):
|
|||||||
# In case the model only supports LoRA for
|
# In case the model only supports LoRA for
|
||||||
# text modules (e.g. ChatGLM)
|
# text modules (e.g. ChatGLM)
|
||||||
and hasattr(self.model, "get_mm_mapping"))
|
and hasattr(self.model, "get_mm_mapping"))
|
||||||
self.is_pooling_model = is_pooling_model(self.model)
|
|
||||||
self.packed_modules: Dict[str, List[str]] = {}
|
self.packed_modules: Dict[str, List[str]] = {}
|
||||||
self.modules: Dict[str, BaseLayerWithLoRA] = {}
|
self.modules: Dict[str, BaseLayerWithLoRA] = {}
|
||||||
# Dict instead of a Set for compatibility with LRUCache.
|
# Dict instead of a Set for compatibility with LRUCache.
|
||||||
@@ -395,7 +389,7 @@ class LoRAModelManager(AdapterModelManager):
|
|||||||
lora_model.id, index)
|
lora_model.id, index)
|
||||||
self.lora_index_to_id[index] = lora_model.id
|
self.lora_index_to_id[index] = lora_model.id
|
||||||
for module_name, module in self.modules.items():
|
for module_name, module in self.modules.items():
|
||||||
module_lora = self._get_lora_layer_weights(lora_model, module_name)
|
module_lora = lora_model.get_lora(module_name)
|
||||||
if module_lora:
|
if module_lora:
|
||||||
module_lora.optimize()
|
module_lora.optimize()
|
||||||
# Bias is not explicitly enabled with the flag enable_lora_bias.
|
# Bias is not explicitly enabled with the flag enable_lora_bias.
|
||||||
@@ -632,7 +626,7 @@ class LoRAModelManager(AdapterModelManager):
|
|||||||
replaced_module: Set[str] = set()
|
replaced_module: Set[str] = set()
|
||||||
has_replacement = False
|
has_replacement = False
|
||||||
for r in new_module_names:
|
for r in new_module_names:
|
||||||
lora = self._get_lora_layer_weights(lora_model, r)
|
lora = lora_model.get_lora(r)
|
||||||
replacement_loras.append(lora)
|
replacement_loras.append(lora)
|
||||||
if lora:
|
if lora:
|
||||||
has_replacement = True
|
has_replacement = True
|
||||||
@@ -643,34 +637,12 @@ class LoRAModelManager(AdapterModelManager):
|
|||||||
if replacement_loras[i]:
|
if replacement_loras[i]:
|
||||||
continue
|
continue
|
||||||
replacement_loras[i] = None
|
replacement_loras[i] = None
|
||||||
# HACK Temporary solution for the pool model.
|
|
||||||
if self.is_pooling_model and not lora_model.check_lora_name(
|
|
||||||
module_name):
|
|
||||||
replaced_module_name = module_name.replace("model.", "")
|
|
||||||
if lora_model.check_lora_name(module_name):
|
|
||||||
module_name = replaced_module_name
|
|
||||||
lora_model.loras[module_name] = PackedLoRALayerWeights.pack(
|
lora_model.loras[module_name] = PackedLoRALayerWeights.pack(
|
||||||
replacement_loras)
|
replacement_loras)
|
||||||
# Remove the modules that have been replaced.
|
# Remove the modules that have been replaced.
|
||||||
for module in replaced_module:
|
for module in replaced_module:
|
||||||
lora_model.loras.pop(module, None)
|
lora_model.loras.pop(module, None)
|
||||||
|
|
||||||
def _get_lora_layer_weights(
|
|
||||||
self, lora_model: LoRAModel,
|
|
||||||
module_name: str) -> Optional[LoRALayerWeights]:
|
|
||||||
org_module_name = module_name
|
|
||||||
if self.is_pooling_model and not lora_model.check_lora_name(
|
|
||||||
module_name):
|
|
||||||
# If it's a pool model, and the layer name is not found,
|
|
||||||
# remove the prefix 'model.' and search again.
|
|
||||||
module_name = module_name.replace("model.", "")
|
|
||||||
if lora_model.check_lora_name(module_name):
|
|
||||||
org_module_name = module_name
|
|
||||||
logger.info_once(
|
|
||||||
"For the pool model, successfully loaded the LoRA weights "
|
|
||||||
"after removing the prefix 'model.'.")
|
|
||||||
return lora_model.get_lora(org_module_name)
|
|
||||||
|
|
||||||
def deactivate_adapter(self, adapter_id: int) -> bool:
|
def deactivate_adapter(self, adapter_id: int) -> bool:
|
||||||
return deactivate_adapter(adapter_id, self._active_adapters,
|
return deactivate_adapter(adapter_id, self._active_adapters,
|
||||||
self._deactivate_adapter)
|
self._deactivate_adapter)
|
||||||
|
|||||||
@@ -1,200 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 256,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 256,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 256,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"num_warps": 1,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"num_warps": 1,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_M": 128,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 64,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_M": 128,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 64,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_M": 128,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 64,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,200 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 256,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 256,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 256,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"num_warps": 1,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"num_warps": 1,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_M": 128,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 64,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_M": 128,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 64,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_M": 128,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 64,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,200 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 16,
|
|
||||||
"BLOCK_SIZE_K": 256,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 2,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 2,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 4,
|
|
||||||
"num_warps": 2,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 4,
|
|
||||||
"num_warps": 1,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 2,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_M": 128,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 64,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_M": 128,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 64,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_M": 128,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 64,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,200 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 16,
|
|
||||||
"BLOCK_SIZE_K": 256,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 2,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 2,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 4,
|
|
||||||
"num_warps": 2,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 4,
|
|
||||||
"num_warps": 1,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 2,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_M": 128,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 64,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_M": 128,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 64,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_M": 128,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 64,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,200 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 16,
|
|
||||||
"BLOCK_SIZE_K": 256,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 1
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 4,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 2,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 2,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 4,
|
|
||||||
"num_warps": 2,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 4,
|
|
||||||
"num_warps": 1,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_M": 16,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"num_warps": 2,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_M": 128,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 64,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_M": 128,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 64,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_M": 128,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"BLOCK_SIZE_K": 64,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"num_warps": 8,
|
|
||||||
"num_stages": 2,
|
|
||||||
"waves_per_eu": 0,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"kpack": 2
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -783,12 +783,8 @@ def invoke_fused_moe_kernel(A: torch.Tensor,
|
|||||||
use_int8_w8a16=use_int8_w8a16,
|
use_int8_w8a16=use_int8_w8a16,
|
||||||
**config,
|
**config,
|
||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
config = config.copy()
|
|
||||||
BLOCK_SIZE_K = config.pop("BLOCK_SIZE_K")
|
|
||||||
if block_shape is not None:
|
|
||||||
BLOCK_SIZE_K = min(BLOCK_SIZE_K, min(block_shape[0],
|
|
||||||
block_shape[1]))
|
|
||||||
fused_moe_kernel[grid](
|
fused_moe_kernel[grid](
|
||||||
A,
|
A,
|
||||||
B,
|
B,
|
||||||
@@ -827,7 +823,6 @@ def invoke_fused_moe_kernel(A: torch.Tensor,
|
|||||||
compute_type=compute_type,
|
compute_type=compute_type,
|
||||||
use_fp8_w8a8=use_fp8_w8a8,
|
use_fp8_w8a8=use_fp8_w8a8,
|
||||||
use_int8_w8a16=use_int8_w8a16,
|
use_int8_w8a16=use_int8_w8a16,
|
||||||
BLOCK_SIZE_K=BLOCK_SIZE_K,
|
|
||||||
**config,
|
**config,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,164 +0,0 @@
|
|||||||
{
|
|
||||||
"1": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"8": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"16": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"24": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"32": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"48": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"64": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 32,
|
|
||||||
"GROUP_SIZE_M": 8,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"96": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"128": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"256": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 64,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"512": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 1,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1024": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"1536": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 32,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 16,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"2048": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"3072": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
},
|
|
||||||
"4096": {
|
|
||||||
"BLOCK_SIZE_K": 128,
|
|
||||||
"BLOCK_SIZE_M": 64,
|
|
||||||
"BLOCK_SIZE_N": 128,
|
|
||||||
"GROUP_SIZE_M": 32,
|
|
||||||
"kpack": 1,
|
|
||||||
"matrix_instr_nonkdim": 16,
|
|
||||||
"num_warps": 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user