Skip to content

Optimum neuron LLM inference cache builder #183

Optimum neuron LLM inference cache builder

Optimum neuron LLM inference cache builder #183

Workflow file for this run

name: Optimum neuron LLM inference cache builder
on:
# Manual trigger
workflow_dispatch:
# For branch and tag pushes
push:
tags:
- 'v[0-9]+.[0-9]+.[0-9]+'
pull_request:
branches: [ main ]
paths:
- '.github/workflows/cache_llm.yml'
- 'tools/cache/auto_fill_llm_cache.py'
# Daily scheduled run
schedule:
- cron: '0 0 * * *'
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref_name || github.run_id }}
cancel-in-progress: true
jobs:
sanity:
name: Sanity
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Run sanity check
uses: ./.github/actions/sanity-check
with:
hf_token: ${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CACHE }}
cache:
name: Create optimum-neuron LLM inference cache
needs: sanity
runs-on:
group: aws-inf2-8xlarge
strategy:
fail-fast: false
matrix:
config: [
trn1/llama3,
trn1/llama4,
trn1/mixtral,
trn1/qwen3,
trn1/qwen3-moe,
trn1/granite,
trn1/phi4,
trn1/smollm3,
trn2/llama3,
trn2/llama4,
trn2/qwen3-moe,
]
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install Neuronx runtime
uses: ./.github/actions/install_neuronx_runtime
- name: Setup virtual environment
uses: ./.github/actions/setup_venv
- name: Create cache for ${{matrix.config}} models
run: |
source aws_neuron_venv_pytorch/bin/activate
config_prefix_url=https://huggingface.co/aws-neuron/optimum-neuron-cache/raw/main/inference-cache-config
HF_TOKEN=${{secrets.HF_TOKEN_OPTIMUM_NEURON_CACHE}} \
python tools/cache/auto_fill_llm_cache.py --config_file ${config_prefix_url}/${{matrix.config}}.json