soutrik
added env in GHA
4c0bcea
raw
history blame
6.39 kB
name: Deploy PyTorch Training with Hugging Face Sync
on:
push:
branches:
- master
- main
workflow_dispatch:
jobs:
start-runner:
name: Start self-hosted EC2 runner
runs-on: ubuntu-latest
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- name: Start EC2 runner
id: start-ec2-runner
uses: machulav/ec2-github-runner@v2
with:
mode: start
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
ec2-image-id: ami-044b0717aadbc9dfa
ec2-instance-type: t2.xlarge
subnet-id: subnet-024811dee81325f1c
security-group-id: sg-0646c2a337a355a31
deploy:
name: Deploy PyTorch Training Pipeline
needs: start-runner
runs-on: ${{ needs.start-runner.outputs.label }}
outputs:
ecr-registry: ${{ steps.login-ecr.outputs.registry }}
image-tag: ${{ github.sha }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- name: Cache Docker layers
uses: actions/cache@v3
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-docker-${{ github.sha }}
restore-keys: |
${{ runner.os }}-docker-
- name: Log in to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2
- name: Create .env file
run: |
echo "HYDRA_FULL_ERROR=1" >> .env
echo "AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }}" >> .env
echo "AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY }}" >> .env
echo "AWS_REGION=${{ secrets.AWS_REGION }}" >> .env
echo "::add-mask::${{ secrets.AWS_ACCESS_KEY_ID }}"
echo "::add-mask::${{ secrets.AWS_SECRET_ACCESS_KEY }}"
- name: Run Docker Compose for all services
run: |
docker-compose --env-file .env build --no-cache
docker-compose --env-file .env up -d
docker-compose logs --follow train eval
docker-compose down --remove-orphans
- name: Build, tag, and push Docker image to Amazon ECR
env:
REGISTRY: ${{ steps.login-ecr.outputs.registry }}
REPOSITORY: soutrik71/mnist
IMAGE_TAG: ${{ github.sha }}
run: |
docker build -t $REGISTRY/$REPOSITORY:$IMAGE_TAG .
docker push $REGISTRY/$REPOSITORY:$IMAGE_TAG
docker tag $REGISTRY/$REPOSITORY:$IMAGE_TAG $REGISTRY/$REPOSITORY:latest
docker push $REGISTRY/$REPOSITORY:latest
- name: Pull Docker image from ECR and verify
env:
REGISTRY: ${{ steps.login-ecr.outputs.registry }}
REPOSITORY: soutrik71/mnist
IMAGE_TAG: ${{ github.sha }}
run: |
docker pull $REGISTRY/$REPOSITORY:$IMAGE_TAG
docker images | grep "$REGISTRY/$REPOSITORY"
- name: Clean up environment
run: |
docker system prune -af --volumes
sync-to-hub:
name: Sync to Hugging Face Hub
needs: deploy
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
lfs: true
- name: Install Git LFS
run: |
curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash
sudo apt-get install git-lfs
git lfs install
- name: Configure Git identity
run: |
git config --global user.name "soutrik"
git config --global user.email "soutrik.chowdhury@ab-inbev.com"
- name: Add remote
run: |
git remote add space https://$USER:$HF_TOKEN@huggingface.co/spaces/$USER/$SPACE
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
USER: soutrik
SPACE: gradio_demo_MNIST_Classifier
- name: Ensure LFS objects are present
run: git lfs checkout
- name: Add README.md
run: |
cat <<EOF > README.md
---
title: My Gradio App MNIST Classifier
emoji: πŸš€
colorFrom: blue
colorTo: green
sdk: gradio
sdk_version: "5.7.1"
app_file: app.py
pinned: false
---
EOF
git add README.md
git commit -m "Add README.md" || echo "Skip commit if no changes"
- name: Push to hub
run: |
git push --force https://$USER:$HF_TOKEN@huggingface.co/spaces/$USER/$SPACE main
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
USER: soutrik
SPACE: gradio_demo_MNIST_Classifier
stop-runner:
name: Stop self-hosted EC2 runner
needs:
- start-runner
- deploy
- sync-to-hub
runs-on: ubuntu-latest
if: ${{ always() }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- name: Stop EC2 runner
uses: machulav/ec2-github-runner@v2
with:
mode: stop
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
label: ${{ needs.start-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}
- name: Validate EC2 termination
run: |
aws ec2 describe-instances --instance-ids ${{ needs.start-runner.outputs.ec2-instance-id }} \
--query "Reservations[].Instances[].State.Name" --output text | grep "terminated" || echo "Runner not terminated."