[ROCm][CI] Handle pytest status code 5 when a shard isn't allocated any tests (#32040)

Signed-off-by: Divakar Verma <divakar.verma@amd.com>
This commit is contained in:
Divakar Verma
2026-01-12 16:35:49 -06:00
committed by GitHub
parent 629584bfc9
commit a28d9f4470
2 changed files with 11 additions and 2 deletions

View File

@@ -209,12 +209,21 @@ if [[ $commands == *"--shard-id="* ]]; then
wait "${pid}"
STATUS+=($?)
done
at_least_one_shard_with_tests=0
for st in "${STATUS[@]}"; do
if [[ ${st} -ne 0 ]]; then
if [[ ${st} -ne 0 ]] && [[ ${st} -ne 5 ]]; then
echo "One of the processes failed with $st"
exit "${st}"
elif [[ ${st} -eq 5 ]]; then
echo "Shard exited with status 5 (no tests collected) - treating as success"
else # This means st is 0
at_least_one_shard_with_tests=1
fi
done
if [[ ${#STATUS[@]} -gt 0 && ${at_least_one_shard_with_tests} -eq 0 ]]; then
echo "All shards reported no tests collected. Failing the build."
exit 1
fi
else
echo "Render devices: $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES"
docker run \

View File

@@ -870,7 +870,7 @@ steps:
- label: Language Models Tests (Extra Standard) %N
timeout_in_minutes: 45
mirror_hardwares: [amdexperimental]
agent_pool: mi325_2
agent_pool: mi325_8
# grade: Blocking
torch_nightly: true
source_file_dependencies: