|
name: Deploy PyTorch Training with all advanced features like self-hosted EC2 runner, Docker Buildx, Amazon ECR, Hugging Face Spaces |
|
|
|
on: |
|
push: |
|
branches: |
|
- master |
|
workflow_dispatch: |
|
|
|
jobs: |
|
start-runner: |
|
name: Start self-hosted EC2 runner |
|
runs-on: ubuntu-latest |
|
outputs: |
|
label: ${{ steps.start-ec2-runner.outputs.label }} |
|
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} |
|
steps: |
|
- name: Configure AWS credentials |
|
uses: aws-actions/configure-aws-credentials@v4 |
|
with: |
|
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} |
|
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} |
|
aws-region: ${{ secrets.AWS_REGION }} |
|
|
|
- name: Start EC2 runner |
|
id: start-ec2-runner |
|
uses: machulav/ec2-github-runner@v2 |
|
with: |
|
mode: start |
|
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} |
|
ec2-image-id: ami-044b0717aadbc9dfa |
|
ec2-instance-type: t2.xlarge |
|
subnet-id: subnet-024811dee81325f1c |
|
security-group-id: sg-0646c2a337a355a31 |
|
|
|
deploy: |
|
name: Deploy PyTorch Training Pipeline |
|
needs: start-runner |
|
runs-on: ${{ needs.start-runner.outputs.label }} |
|
steps: |
|
- name: Checkout repository |
|
uses: actions/checkout@v4 |
|
|
|
- name: Set up Docker Buildx |
|
uses: docker/setup-buildx-action@v3 |
|
|
|
- name: Configure AWS credentials |
|
uses: aws-actions/configure-aws-credentials@v4 |
|
with: |
|
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} |
|
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} |
|
aws-region: ${{ secrets.AWS_REGION }} |
|
|
|
- name: Cache Docker layers |
|
uses: actions/cache@v3 |
|
with: |
|
path: /tmp/.buildx-cache |
|
key: ${{ runner.os }}-docker-${{ github.sha }} |
|
restore-keys: | |
|
${{ runner.os }}-docker- |
|
|
|
- name: Log in to Amazon ECR |
|
id: login-ecr |
|
uses: aws-actions/amazon-ecr-login@v2 |
|
|
|
- name: Create .env file |
|
run: | |
|
echo "AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }}" >> .env |
|
echo "AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY }}" >> .env |
|
echo "AWS_REGION=${{ secrets.AWS_REGION }}" >> .env |
|
echo "::add-mask::${{ secrets.AWS_ACCESS_KEY_ID }}" |
|
echo "::add-mask::${{ secrets.AWS_SECRET_ACCESS_KEY }}" |
|
|
|
- name: Run Docker Compose for all services |
|
run: | |
|
docker-compose build --no-cache |
|
docker-compose up -d |
|
docker-compose logs --follow train eval |
|
docker-compose down --remove-orphans |
|
|
|
- name: Build, tag, and push Docker image to Amazon ECR |
|
env: |
|
REGISTRY: ${{ steps.login-ecr.outputs.registry }} |
|
REPOSITORY: soutrik71/mnist |
|
IMAGE_TAG: ${{ github.sha }} |
|
run: | |
|
docker build -t $REGISTRY/$REPOSITORY:$IMAGE_TAG . |
|
docker push $REGISTRY/$REPOSITORY:$IMAGE_TAG |
|
docker tag $REGISTRY/$REPOSITORY:$IMAGE_TAG $REGISTRY/$REPOSITORY:latest |
|
docker push $REGISTRY/$REPOSITORY:latest |
|
|
|
- name: Pull Docker image from ECR and verify |
|
env: |
|
REGISTRY: ${{ steps.login-ecr.outputs.registry }} |
|
REPOSITORY: soutrik71/mnist |
|
IMAGE_TAG: ${{ github.sha }} |
|
run: | |
|
docker pull $REGISTRY/$REPOSITORY:$IMAGE_TAG |
|
docker images | grep "$REGISTRY/$REPOSITORY" |
|
|
|
- name: Clean up environment |
|
run: | |
|
rm -f .env |
|
docker system prune -af --volumes |
|
|
|
stop-runner: |
|
name: Stop self-hosted EC2 runner |
|
needs: |
|
- start-runner |
|
- deploy |
|
runs-on: ubuntu-latest |
|
if: ${{ always() }} |
|
steps: |
|
- name: Configure AWS credentials |
|
uses: aws-actions/configure-aws-credentials@v4 |
|
with: |
|
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} |
|
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} |
|
aws-region: ${{ secrets.AWS_REGION }} |
|
|
|
- name: Stop EC2 runner |
|
uses: machulav/ec2-github-runner@v2 |
|
with: |
|
mode: stop |
|
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} |
|
label: ${{ needs.start-runner.outputs.label }} |
|
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} |
|
|
|
- name: Validate EC2 termination |
|
run: | |
|
aws ec2 describe-instances --instance-ids ${{ needs.start-runner.outputs.ec2-instance-id }} \ |
|
--query "Reservations[].Instances[].State.Name" --output text | grep "terminated" || echo "Runner not terminated." |
|
|