| | name: SSH into our runners |
| |
|
| | on: |
| | workflow_dispatch: |
| | inputs: |
| | runner_type: |
| | description: 'Type of runner to test (a10 or t4)' |
| | required: true |
| | docker_image: |
| | description: 'Name of the Docker image' |
| | required: true |
| | num_gpus: |
| | description: 'Type of the number of gpus to use (`single` or `multi`)' |
| | required: true |
| |
|
| | env: |
| | HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} |
| | HF_HOME: /mnt/cache |
| | TRANSFORMERS_IS_CI: yes |
| | OMP_NUM_THREADS: 8 |
| | MKL_NUM_THREADS: 8 |
| | RUN_SLOW: yes |
| | SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} |
| | TF_FORCE_GPU_ALLOW_GROWTH: true |
| | CUDA_VISIBLE_DEVICES: 0,1 |
| |
|
| | jobs: |
| | get_runner: |
| | name: "Get runner to use" |
| | runs-on: ubuntu-22.04 |
| | outputs: |
| | RUNNER: ${{ steps.set_runner.outputs.RUNNER }} |
| | steps: |
| | - name: Get runner to use |
| | shell: bash |
| | run: | |
| | if [[ "${{ github.event.inputs.num_gpus }}" == "single" && "${{ github.event.inputs.runner_type }}" == "t4" ]]; then |
| | echo "RUNNER=aws-g4dn-2xlarge-cache" >> $GITHUB_ENV |
| | elif [[ "${{ github.event.inputs.num_gpus }}" == "multi" && "${{ github.event.inputs.runner_type }}" == "t4" ]]; then |
| | echo "RUNNER=aws-g4dn-12xlarge-cache" >> $GITHUB_ENV |
| | elif [[ "${{ github.event.inputs.num_gpus }}" == "single" && "${{ github.event.inputs.runner_type }}" == "a10" ]]; then |
| | echo "RUNNER=aws-g5-4xlarge-cache" >> $GITHUB_ENV |
| | elif [[ "${{ github.event.inputs.num_gpus }}" == "multi" && "${{ github.event.inputs.runner_type }}" == "a10" ]]; then |
| | echo "RUNNER=aws-g5-12xlarge-cache" >> $GITHUB_ENV |
| | else |
| | echo "RUNNER=" >> $GITHUB_ENV |
| | fi |
| | |
| | - name: Set runner to use |
| | id: set_runner |
| | run: | |
| | echo ${{ env.RUNNER }} |
| | echo "RUNNER=${{ env.RUNNER }}" >> $GITHUB_OUTPUT |
| | |
| | ssh_runner: |
| | name: "SSH" |
| | needs: get_runner |
| | runs-on: |
| | group: ${{ needs.get_runner.outputs.RUNNER }} |
| | container: |
| | image: ${{ github.event.inputs.docker_image }} |
| | options: --gpus all --privileged --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ |
| |
|
| | steps: |
| | - name: Update clone |
| | working-directory: /transformers |
| | run: | |
| | git fetch && git checkout ${{ github.sha }} |
| | |
| | - name: Cleanup |
| | working-directory: /transformers |
| | run: | |
| | rm -rf tests/__pycache__ |
| | rm -rf tests/models/__pycache__ |
| | rm -rf reports |
| | |
| | - name: Show installed libraries and their versions |
| | working-directory: /transformers |
| | run: pip freeze |
| |
|
| | - name: NVIDIA-SMI |
| | run: | |
| | nvidia-smi |
| | |
| | - name: Store Slack infos |
| | |
| | shell: bash |
| | run: | |
| | echo "${{ github.actor }}" |
| | github_actor=${{ github.actor }} |
| | github_actor=${github_actor/'-'/'_'} |
| | echo "$github_actor" |
| | echo "github_actor=$github_actor" >> $GITHUB_ENV |
| | |
| | - name: Store Slack infos |
| | |
| | shell: bash |
| | run: | |
| | echo "${{ env.github_actor }}" |
| | if [ "${{ secrets[format('{0}_{1}', env.github_actor, 'SLACK_ID')] }}" != "" ]; then |
| | echo "SLACKCHANNEL=${{ secrets[format('{0}_{1}', env.github_actor, 'SLACK_ID')] }}" >> $GITHUB_ENV |
| | else |
| | echo "SLACKCHANNEL=${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}" >> $GITHUB_ENV |
| | fi |
| | |
| | - name: Tailscale |
| | uses: huggingface/tailscale-action@main |
| | with: |
| | authkey: ${{ secrets.TAILSCALE_SSH_AUTHKEY }} |
| | slackChannel: ${{ env.SLACKCHANNEL }} |
| | slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} |
| | waitForSSH: true |
| | sshTimeout: 15m |
| |
|