| | name: Self-hosted runner AMD GPU (push) |
| |
|
| | on: |
| | workflow_call: |
| | inputs: |
| | gpu_flavor: |
| | required: true |
| | type: string |
| |
|
| | env: |
| | HF_HOME: /mnt/cache |
| | TRANSFORMERS_IS_CI: yes |
| | OMP_NUM_THREADS: 8 |
| | MKL_NUM_THREADS: 8 |
| | PYTEST_TIMEOUT: 60 |
| | TF_FORCE_GPU_ALLOW_GROWTH: true |
| | HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} |
| |
|
| | jobs: |
| | check_runner_status: |
| | name: Check Runner Status |
| | runs-on: ubuntu-22.04 |
| | steps: |
| | - name: Checkout transformers |
| | uses: actions/checkout@v4 |
| | with: |
| | fetch-depth: 2 |
| |
|
| | - name: Check Runner Status |
| | run: python utils/check_self_hosted_runner.py --target_runners amd-mi210-single-gpu-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} |
| |
|
| | check_runners: |
| | name: Check Runners |
| | needs: check_runner_status |
| | strategy: |
| | matrix: |
| | machine_type: [single-gpu, multi-gpu] |
| | runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] |
| | container: |
| | image: huggingface/transformers-pytorch-amd-gpu-push-ci |
| | options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ |
| | steps: |
| | - name: ROCM-SMI |
| | run: | |
| | rocm-smi |
| | - name: ROCM-INFO |
| | run: | |
| | rocminfo | grep "Agent" -A 14 |
| | - name: Show ROCR environment |
| | run: | |
| | echo "ROCR: $ROCR_VISIBLE_DEVICES" |
| | |
| | setup_gpu: |
| | name: Setup |
| | needs: check_runners |
| | strategy: |
| | matrix: |
| | machine_type: [single-gpu, multi-gpu] |
| | runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] |
| | container: |
| | image: huggingface/transformers-pytorch-amd-gpu-push-ci |
| | options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ |
| | outputs: |
| | matrix: ${{ steps.set-matrix.outputs.matrix }} |
| | test_map: ${{ steps.set-matrix.outputs.test_map }} |
| | env: |
| | |
| | |
| | |
| | |
| | CI_BRANCH_PUSH: ${{ github.event.ref }} |
| | CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} |
| | CI_SHA_PUSH: ${{ github.event.head_commit.id }} |
| | CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} |
| | steps: |
| | |
| | |
| | - name: Prepare custom environment variables |
| | shell: bash |
| | |
| | |
| | run: | |
| | CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} |
| | echo $CI_BRANCH_PUSH |
| | echo $CI_BRANCH_WORKFLOW_RUN |
| | echo $CI_SHA_PUSH |
| | echo $CI_SHA_WORKFLOW_RUN |
| | [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV |
| | [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV |
| | |
| | - name: print environment variables |
| | run: | |
| | echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" |
| | echo "env.CI_SHA = ${{ env.CI_SHA }}" |
| | |
| | - name: Update clone using environment variables |
| | working-directory: /transformers |
| | run: | |
| | echo "original branch = $(git branch --show-current)" |
| | git fetch && git checkout ${{ env.CI_BRANCH }} |
| | echo "updated branch = $(git branch --show-current)" |
| | git checkout ${{ env.CI_SHA }} |
| | echo "log = $(git log -n 1)" |
| | |
| | - name: Cleanup |
| | working-directory: /transformers |
| | run: | |
| | rm -rf tests/__pycache__ |
| | rm -rf tests/models/__pycache__ |
| | rm -rf reports |
| | |
| | - name: Show installed libraries and their versions |
| | working-directory: /transformers |
| | run: pip freeze |
| |
|
| | - name: Fetch the tests to run |
| | working-directory: /transformers |
| | |
| | run: | |
| | pip install --upgrade git-python |
| | python3 utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt |
| | |
| | - name: Report fetched tests |
| | uses: actions/upload-artifact@v4 |
| | with: |
| | name: test_fetched |
| | path: /transformers/test_preparation.txt |
| |
|
| | - id: set-matrix |
| | name: Organize tests into models |
| | working-directory: /transformers |
| | |
| | |
| | |
| | run: | |
| | if [ -f test_map.json ]; then |
| | keys=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); d = list(test_map.keys()); print(d)') |
| | test_map=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); print(test_map)') |
| | else |
| | keys=$(python3 -c 'keys = ["dummy"]; print(keys)') |
| | test_map=$(python3 -c 'test_map = {"dummy": []}; print(test_map)') |
| | fi |
| | echo $keys |
| | echo $test_map |
| | echo "matrix=$keys" >> $GITHUB_OUTPUT |
| | echo "test_map=$test_map" >> $GITHUB_OUTPUT |
| | |
| | run_models_gpu: |
| | name: Model tests |
| | needs: setup_gpu |
| | |
| | if: contains(fromJson(needs.setup_gpu.outputs.matrix), 'dummy') != true |
| | strategy: |
| | fail-fast: false |
| | matrix: |
| | folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }} |
| | machine_type: [single-gpu, multi-gpu] |
| | runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] |
| | container: |
| | image: huggingface/transformers-pytorch-amd-gpu-push-ci |
| | options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ |
| | env: |
| | |
| | CI_BRANCH_PUSH: ${{ github.event.ref }} |
| | CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} |
| | CI_SHA_PUSH: ${{ github.event.head_commit.id }} |
| | CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} |
| | steps: |
| | |
| | |
| | - name: Prepare custom environment variables |
| | shell: bash |
| | |
| | run: | |
| | CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} |
| | echo $CI_BRANCH_PUSH |
| | echo $CI_BRANCH_WORKFLOW_RUN |
| | echo $CI_SHA_PUSH |
| | echo $CI_SHA_WORKFLOW_RUN |
| | [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV |
| | [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV |
| | |
| | - name: print environment variables |
| | run: | |
| | echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" |
| | echo "env.CI_SHA = ${{ env.CI_SHA }}" |
| | |
| | - name: Update clone using environment variables |
| | working-directory: /transformers |
| | run: | |
| | echo "original branch = $(git branch --show-current)" |
| | git fetch && git checkout ${{ env.CI_BRANCH }} |
| | echo "updated branch = $(git branch --show-current)" |
| | git checkout ${{ env.CI_SHA }} |
| | echo "log = $(git log -n 1)" |
| | |
| | - name: Reinstall transformers in edit mode (remove the one installed during docker image build) |
| | working-directory: /transformers |
| | run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . |
| |
|
| | - name: Echo folder ${{ matrix.folders }} |
| | shell: bash |
| | |
| | |
| | run: | |
| | echo "${{ matrix.folders }}" |
| | echo "${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }}" |
| | matrix_folders=${{ matrix.folders }} |
| | matrix_folders=${matrix_folders/'models/'/'models_'} |
| | echo "$matrix_folders" |
| | echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV |
| | |
| | - name: ROCM-SMI |
| | run: | |
| | rocm-smi |
| | - name: ROCM-INFO |
| | run: | |
| | rocminfo | grep "Agent" -A 14 |
| | - name: Show ROCR environment |
| | run: | |
| | echo "ROCR: $ROCR_VISIBLE_DEVICES" |
| | |
| | - name: Environment |
| | working-directory: /transformers |
| | run: | |
| | python3 utils/print_env.py |
| | |
| | - name: Show installed libraries and their versions |
| | working-directory: /transformers |
| | run: pip freeze |
| |
|
| | - name: Run all non-slow selected tests on GPU |
| | working-directory: /transformers |
| | run: | |
| | python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports ${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }} -m "not not_device_test" |
| | |
| | - name: Failure short reports |
| | if: ${{ failure() }} |
| | continue-on-error: true |
| | run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt |
| |
|
| | - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" |
| | if: ${{ always() }} |
| | uses: actions/upload-artifact@v4 |
| | with: |
| | name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports |
| | path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports |
| |
|
| | send_results: |
| | name: Send results to webhook |
| | runs-on: ubuntu-22.04 |
| | if: always() |
| | needs: [ |
| | check_runner_status, |
| | check_runners, |
| | setup_gpu, |
| | run_models_gpu, |
| | |
| | |
| | ] |
| | env: |
| | |
| | CI_BRANCH_PUSH: ${{ github.event.ref }} |
| | CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} |
| | CI_SHA_PUSH: ${{ github.event.head_commit.id }} |
| | CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} |
| | steps: |
| | - name: Preliminary job status |
| | shell: bash |
| | |
| | run: | |
| | echo "Runner availability: ${{ needs.check_runner_status.result }}" |
| | echo "Setup status: ${{ needs.setup_gpu.result }}" |
| | echo "Runner status: ${{ needs.check_runners.result }}" |
| | |
| | |
| | |
| | - name: Prepare custom environment variables |
| | shell: bash |
| | |
| | run: | |
| | CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} |
| | echo $CI_BRANCH_PUSH |
| | echo $CI_BRANCH_WORKFLOW_RUN |
| | echo $CI_SHA_PUSH |
| | echo $CI_SHA_WORKFLOW_RUN |
| | [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV |
| | [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV |
| | |
| | - name: print environment variables |
| | run: | |
| | echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" |
| | echo "env.CI_SHA = ${{ env.CI_SHA }}" |
| | |
| | - uses: actions/checkout@v4 |
| | |
| | |
| | |
| | with: |
| | fetch-depth: 20 |
| |
|
| | - name: Update clone using environment variables |
| | run: | |
| | echo "original branch = $(git branch --show-current)" |
| | git fetch && git checkout ${{ env.CI_BRANCH }} |
| | echo "updated branch = $(git branch --show-current)" |
| | git checkout ${{ env.CI_SHA }} |
| | echo "log = $(git log -n 1)" |
| | |
| | - uses: actions/download-artifact@v4 |
| | - name: Send message to Slack |
| | env: |
| | CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} |
| | CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }} |
| | CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} |
| | CI_SLACK_CHANNEL_ID_AMD: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }} |
| | CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} |
| | CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }} |
| | ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} |
| | CI_EVENT: Push CI (AMD) - ${{ inputs.gpu_flavor }} |
| | CI_TITLE_PUSH: ${{ github.event.head_commit.message }} |
| | CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }} |
| | CI_SHA: ${{ env.CI_SHA }} |
| | RUNNER_STATUS: ${{ needs.check_runner_status.result }} |
| | RUNNER_ENV_STATUS: ${{ needs.check_runners.result }} |
| | SETUP_STATUS: ${{ needs.setup_gpu.result }} |
| |
|
| | |
| | |
| | run: | |
| | pip install huggingface_hub |
| | pip install slack_sdk |
| | pip show slack_sdk |
| | python utils/notification_service.py "${{ needs.setup_gpu.outputs.matrix }}" |
| | |