Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build_cc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
python-version: '3.11'
- uses: lukka/get-cmake@latest
- run: python -m pip install uv
- run: python -m uv pip install --system tensorflow
- run: source/install/uv_with_retry.sh pip install --system tensorflow
- name: Download libtorch
run: |
wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.1.2%2Bcpu.zip -O libtorch.zip
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test_cc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
mpi: mpich
- uses: lukka/get-cmake@latest
- run: python -m pip install uv
- run: python -m uv pip install --system tensorflow
- run: source/install/uv_with_retry.sh pip install --system tensorflow
- name: Download libtorch
run: |
wget https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.1.2%2Bcpu.zip -O libtorch.zip
Expand All @@ -49,7 +49,7 @@ jobs:
# test lammps
- run: |
export TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
python -m uv pip install --system -e .[cpu,test,lmp] mpi4py
source/install/uv_with_retry.sh pip install --system -e .[cpu,test,lmp] mpi4py
env:
DP_BUILD_TESTING: 1
if: ${{ !matrix.check_memleak }}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ jobs:
&& sudo apt-get -y install cuda-12-3 libcudnn8=8.9.5.*-1+cuda12.3
if: false # skip as we use nvidia image
- run: python -m pip install -U uv
- run: python -m uv pip install --system "tensorflow>=2.15.0rc0" "torch>=2.2.0"
- run: source/install/uv_with_retry.sh pip install --system "tensorflow>=2.15.0rc0" "torch>=2.2.0"
- run: |
export TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
python -m uv pip install --system -v -e .[gpu,test,lmp,cu12,torch] mpi4py
source/install/uv_with_retry.sh pip install --system -v -e .[gpu,test,lmp,cu12,torch] mpi4py
env:
DP_VARIANT: cuda
DP_ENABLE_NATIVE_OPTIMIZATION: 1
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/test_python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ jobs:
python-version: ${{ matrix.python }}
- run: python -m pip install -U uv
- run: |
uv pip install --system mpich
uv pip install --system "torch==2.3.0+cpu.cxx11.abi" -i https://download.pytorch.org/whl/
source/install/uv_with_retry.sh pip install --system mpich
source/install/uv_with_retry.sh pip install --system "torch==2.3.0+cpu.cxx11.abi" -i https://download.pytorch.org/whl/
export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])')
uv pip install --system --only-binary=horovod -e .[cpu,test] horovod[tensorflow-cpu] mpi4py
source/install/uv_with_retry.sh pip install --system --only-binary=horovod -e .[cpu,test] horovod[tensorflow-cpu] mpi4py
env:
# Please note that uv has some issues with finding
# existing TensorFlow package. Currently, it uses
Expand Down
32 changes: 32 additions & 0 deletions source/install/uv_with_retry.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash
# This script is used to retry the uv command if the error "error decoding response body" is encountered.
# See also:
# https://github.com/astral-sh/uv/issues/2586
# https://github.com/astral-sh/uv/issues/3456
# https://github.com/astral-sh/uv/issues/3514
# https://github.com/astral-sh/uv/issues/4402
tmpstderr=$(mktemp)
max_retry=3
while true; do
uv "$@" 2> >(tee -a "${tmpstderr}" >&2)
exit_code=$?
# exit if ok
if [ $exit_code -eq 0 ]; then
rm -f "${tmpstderr}"
exit 0
fi
# check if "error decoding response body" is in the stderr
if grep -q "error decoding response body" "${tmpstderr}"; then
echo "Retrying uv in 1 s..."
max_retry=$((max_retry - 1))
if [ $max_retry -eq 0 ]; then
echo "Max retry reached, exiting..."
rm -f "${tmpstderr}"
exit 1
fi
sleep 1
else
rm -f "${tmpstderr}"
exit $exit_code
fi
done