Skip to content

Optimize lookup

Optimize lookup #798

name: Optimum Neuron - Test training on Trainium
on:
push:
branches: [ main ]
paths:
- "pyproject.toml"
- "optimum/neuron/models/training/**.py"
- "tests/training/**.py"
- ".github/workflows/test_trainium_training.yml"
- "optimum/neuron/utils/**.py"
pull_request:
branches: [ main ]
paths:
- "pyproject.toml"
- "optimum/neuron/models/training/**.py"
- "tests/training/**.py"
- ".github/workflows/test_trainium_training.yml"
- "optimum/neuron/utils/**.py"
schedule:
- cron: "0 4 * * 1-5" # 5 AM UTC + 1 time, Monday-Friday
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
optimum-neuron-tests:
name: Run distributed tests on Trainium 1
runs-on:
group: aws-trn1-32xlarge
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install Neuronx runtime
uses: ./.github/actions/install_neuronx_runtime
- name: Prepare virtual environment
uses: ./.github/actions/prepare_venv
- name: Install optimum-neuron
uses: ./.github/actions/install_optimum_neuron
- name: Setup PATH
run: echo "/home/ubuntu/.local/bin" >> $GITHUB_PATH
- name: Install training dependencies
run: |
source aws_neuron_venv_pytorch/bin/activate
pip install .[training]
- name: Collect all tests on Neuron Cores
run: |
source aws_neuron_venv_pytorch/bin/activate
HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m "is_trainium_test" tests/training/ --collect-only
- name: All training tests except overfit tests on Neuron Cores
run: |
source aws_neuron_venv_pytorch/bin/activate
HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} EARLY_EXIT=1 pytest \
-m "is_trainium_test" \
--ignore tests/training/test_overfit.py \
tests/training/ \
-v
# TODO: fix the overfit tests recompilation issue ASAP.
# - name: Overfit tests on Neuron Cores
# env:
# IS_CRON: ${{ github.event_name == 'schedule' }}
# run: |
# # There are two scenarios for running the overfit tests:
# # - Run all the overfit tests if the workflow is triggered by a cron job.
# # - Or run the overfit tests only on the flagship model if the workflow is triggered by a push or pull
# # request.
# source aws_neuron_venv_pytorch/bin/activate
# if [ "$IS_CRON" = "true" ]; then
# echo "Running all overfit tests"
# HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} EARLY_EXIT=1 pytest \
# -m "is_trainium_test" \
# tests/training/test_overfit.py \
# -v
# else
# echo "Running overfit tests only on the flagship model"
# HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} EARLY_EXIT=1 pytest \
# -m "is_trainium_test and flagship_model" \
# tests/training/test_overfit.py \
# -vs
# fi