Optimize lookup #798
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Optimum Neuron - Test training on Trainium | |
| on: | |
| push: | |
| branches: [ main ] | |
| paths: | |
| - "pyproject.toml" | |
| - "optimum/neuron/models/training/**.py" | |
| - "tests/training/**.py" | |
| - ".github/workflows/test_trainium_training.yml" | |
| - "optimum/neuron/utils/**.py" | |
| pull_request: | |
| branches: [ main ] | |
| paths: | |
| - "pyproject.toml" | |
| - "optimum/neuron/models/training/**.py" | |
| - "tests/training/**.py" | |
| - ".github/workflows/test_trainium_training.yml" | |
| - "optimum/neuron/utils/**.py" | |
| schedule: | |
| - cron: "0 4 * * 1-5" # 5 AM UTC + 1 time, Monday-Friday | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
| cancel-in-progress: true | |
| jobs: | |
| optimum-neuron-tests: | |
| name: Run distributed tests on Trainium 1 | |
| runs-on: | |
| group: aws-trn1-32xlarge | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Install Neuronx runtime | |
| uses: ./.github/actions/install_neuronx_runtime | |
| - name: Prepare virtual environment | |
| uses: ./.github/actions/prepare_venv | |
| - name: Install optimum-neuron | |
| uses: ./.github/actions/install_optimum_neuron | |
| - name: Setup PATH | |
| run: echo "/home/ubuntu/.local/bin" >> $GITHUB_PATH | |
| - name: Install training dependencies | |
| run: | | |
| source aws_neuron_venv_pytorch/bin/activate | |
| pip install .[training] | |
| - name: Collect all tests on Neuron Cores | |
| run: | | |
| source aws_neuron_venv_pytorch/bin/activate | |
| HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m "is_trainium_test" tests/training/ --collect-only | |
| - name: All training tests except overfit tests on Neuron Cores | |
| run: | | |
| source aws_neuron_venv_pytorch/bin/activate | |
| HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} EARLY_EXIT=1 pytest \ | |
| -m "is_trainium_test" \ | |
| --ignore tests/training/test_overfit.py \ | |
| tests/training/ \ | |
| -v | |
| # TODO: fix the overfit tests recompilation issue ASAP. | |
| # - name: Overfit tests on Neuron Cores | |
| # env: | |
| # IS_CRON: ${{ github.event_name == 'schedule' }} | |
| # run: | | |
| # # There are two scenarios for running the overfit tests: | |
| # # - Run all the overfit tests if the workflow is triggered by a cron job. | |
| # # - Or run the overfit tests only on the flagship model if the workflow is triggered by a push or pull | |
| # # request. | |
| # source aws_neuron_venv_pytorch/bin/activate | |
| # if [ "$IS_CRON" = "true" ]; then | |
| # echo "Running all overfit tests" | |
| # HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} EARLY_EXIT=1 pytest \ | |
| # -m "is_trainium_test" \ | |
| # tests/training/test_overfit.py \ | |
| # -v | |
| # else | |
| # echo "Running overfit tests only on the flagship model" | |
| # HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} EARLY_EXIT=1 pytest \ | |
| # -m "is_trainium_test and flagship_model" \ | |
| # tests/training/test_overfit.py \ | |
| # -vs | |
| # fi |