[Bugfix][CPU] Skip set_num_threads after thread binding (#38535)
Signed-off-by: jiang1.li <jiang1.li@intel.com>
(cherry picked from commit 6557f4937f)
This commit is contained in:
@@ -1,9 +1,10 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
set -euox pipefail
|
set -euox pipefail
|
||||||
export VLLM_CPU_CI_ENV=0
|
export VLLM_CPU_CI_ENV=0
|
||||||
|
export VLLM_CPU_KVCACHE_SPACE=1 # avoid OOM
|
||||||
|
|
||||||
echo "--- PP+TP"
|
echo "--- PP+TP"
|
||||||
vllm serve meta-llama/Llama-3.2-3B-Instruct -tp=2 -pp=2 &
|
vllm serve meta-llama/Llama-3.2-3B-Instruct -tp=2 -pp=2 --max-model-len=4096 &
|
||||||
server_pid=$!
|
server_pid=$!
|
||||||
timeout 600 bash -c "until curl localhost:8000/v1/models > /dev/null 2>&1; do sleep 1; done" || exit 1
|
timeout 600 bash -c "until curl localhost:8000/v1/models > /dev/null 2>&1; do sleep 1; done" || exit 1
|
||||||
vllm bench serve \
|
vllm bench serve \
|
||||||
@@ -23,7 +24,7 @@ if [ "$failed_req" -ne 0 ]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
echo "--- DP+TP"
|
echo "--- DP+TP"
|
||||||
vllm serve meta-llama/Llama-3.2-3B-Instruct -tp=2 -dp=2 &
|
vllm serve meta-llama/Llama-3.2-3B-Instruct -tp=2 -dp=2 --max-model-len=4096 &
|
||||||
server_pid=$!
|
server_pid=$!
|
||||||
timeout 600 bash -c "until curl localhost:8000/v1/models > /dev/null 2>&1; do sleep 1; done" || exit 1
|
timeout 600 bash -c "until curl localhost:8000/v1/models > /dev/null 2>&1; do sleep 1; done" || exit 1
|
||||||
vllm bench serve \
|
vllm bench serve \
|
||||||
|
|||||||
@@ -108,6 +108,15 @@ class CPUWorker(Worker):
|
|||||||
if ret:
|
if ret:
|
||||||
logger.info(ret)
|
logger.info(ret)
|
||||||
|
|
||||||
|
# After the thread binding, changing thread num is not allowed
|
||||||
|
def skip_set_num_threads(x: int):
|
||||||
|
logger.warning(
|
||||||
|
"CPU backend doesn't allow to use "
|
||||||
|
"`torch.set_num_threads` after the thread binding, skip it."
|
||||||
|
)
|
||||||
|
|
||||||
|
torch.set_num_threads = skip_set_num_threads
|
||||||
|
|
||||||
# Note: unique identifier for creating allreduce shared memory
|
# Note: unique identifier for creating allreduce shared memory
|
||||||
os.environ["VLLM_DIST_IDENT"] = self.distributed_init_method.split(":")[-1]
|
os.environ["VLLM_DIST_IDENT"] = self.distributed_init_method.split(":")[-1]
|
||||||
# Initialize the distributed environment.
|
# Initialize the distributed environment.
|
||||||
|
|||||||
Reference in New Issue
Block a user