soutrik
orphan branch
c3d82b0
raw
history blame
4.64 kB
name: Deploy PyTorch Training with all advanced features like self-hosted EC2 runner, Docker Buildx, Amazon ECR, Hugging Face Spaces
on:
push:
branches:
- master
workflow_dispatch:
jobs:
start-runner:
name: Start self-hosted EC2 runner
runs-on: ubuntu-latest
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- name: Start EC2 runner
id: start-ec2-runner
uses: machulav/ec2-github-runner@v2
with:
mode: start
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
ec2-image-id: ami-044b0717aadbc9dfa
ec2-instance-type: t2.xlarge
subnet-id: subnet-024811dee81325f1c
security-group-id: sg-0646c2a337a355a31
deploy:
name: Deploy PyTorch Training Pipeline
needs: start-runner
runs-on: ${{ needs.start-runner.outputs.label }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- name: Cache Docker layers
uses: actions/cache@v3
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-docker-${{ github.sha }}
restore-keys: |
${{ runner.os }}-docker-
- name: Log in to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2
- name: Create .env file
run: |
echo "AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }}" >> .env
echo "AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY }}" >> .env
echo "AWS_REGION=${{ secrets.AWS_REGION }}" >> .env
echo "::add-mask::${{ secrets.AWS_ACCESS_KEY_ID }}"
echo "::add-mask::${{ secrets.AWS_SECRET_ACCESS_KEY }}"
- name: Run Docker Compose for all services
run: |
docker-compose build --no-cache
docker-compose up -d
docker-compose logs --follow train eval
docker-compose down --remove-orphans
- name: Build, tag, and push Docker image to Amazon ECR
env:
REGISTRY: ${{ steps.login-ecr.outputs.registry }}
REPOSITORY: soutrik71/mnist
IMAGE_TAG: ${{ github.sha }}
run: |
docker build -t $REGISTRY/$REPOSITORY:$IMAGE_TAG .
docker push $REGISTRY/$REPOSITORY:$IMAGE_TAG
docker tag $REGISTRY/$REPOSITORY:$IMAGE_TAG $REGISTRY/$REPOSITORY:latest
docker push $REGISTRY/$REPOSITORY:latest
- name: Pull Docker image from ECR and verify
env:
REGISTRY: ${{ steps.login-ecr.outputs.registry }}
REPOSITORY: soutrik71/mnist
IMAGE_TAG: ${{ github.sha }}
run: |
docker pull $REGISTRY/$REPOSITORY:$IMAGE_TAG
docker images | grep "$REGISTRY/$REPOSITORY"
- name: Clean up environment
run: |
rm -f .env
docker system prune -af --volumes
stop-runner:
name: Stop self-hosted EC2 runner
needs:
- start-runner
- deploy
runs-on: ubuntu-latest
if: ${{ always() }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- name: Stop EC2 runner
uses: machulav/ec2-github-runner@v2
with:
mode: stop
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
label: ${{ needs.start-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}
- name: Validate EC2 termination
run: |
aws ec2 describe-instances --instance-ids ${{ needs.start-runner.outputs.ec2-instance-id }} \
--query "Reservations[].Instances[].State.Name" --output text | grep "terminated" || echo "Runner not terminated."