|
name: Deploy PyTorch Training with Hugging Face Sync |
|
|
|
on: |
|
push: |
|
branches: |
|
- master |
|
- main |
|
workflow_dispatch: |
|
|
|
jobs: |
|
start-runner: |
|
name: Start self-hosted EC2 runner |
|
runs-on: ubuntu-latest |
|
outputs: |
|
label: ${{ steps.start-ec2-runner.outputs.label }} |
|
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} |
|
steps: |
|
- name: Configure AWS credentials |
|
uses: aws-actions/configure-aws-credentials@v4 |
|
with: |
|
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} |
|
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} |
|
aws-region: ${{ secrets.AWS_REGION }} |
|
|
|
- name: Start EC2 runner |
|
id: start-ec2-runner |
|
uses: machulav/ec2-github-runner@v2 |
|
with: |
|
mode: start |
|
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} |
|
ec2-image-id: ami-044b0717aadbc9dfa |
|
ec2-instance-type: t2.xlarge |
|
subnet-id: subnet-024811dee81325f1c |
|
security-group-id: sg-0646c2a337a355a31 |
|
|
|
deploy: |
|
name: Deploy PyTorch Training Pipeline |
|
needs: start-runner |
|
runs-on: ${{ needs.start-runner.outputs.label }} |
|
outputs: |
|
ecr-registry: ${{ steps.login-ecr.outputs.registry }} |
|
image-tag: ${{ github.sha }} |
|
steps: |
|
- name: Checkout repository |
|
uses: actions/checkout@v4 |
|
|
|
- name: Set up Docker Buildx |
|
uses: docker/setup-buildx-action@v3 |
|
|
|
- name: Configure AWS credentials |
|
uses: aws-actions/configure-aws-credentials@v4 |
|
with: |
|
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} |
|
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} |
|
aws-region: ${{ secrets.AWS_REGION }} |
|
|
|
- name: Cache Docker layers |
|
uses: actions/cache@v3 |
|
with: |
|
path: /tmp/.buildx-cache |
|
key: ${{ runner.os }}-docker-${{ github.sha }} |
|
restore-keys: | |
|
${{ runner.os }}-docker- |
|
|
|
- name: Log in to Amazon ECR |
|
id: login-ecr |
|
uses: aws-actions/amazon-ecr-login@v2 |
|
|
|
- name: Create .env file |
|
run: | |
|
echo "HYDRA_FULL_ERROR=1" >> .env |
|
echo "AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }}" >> .env |
|
echo "AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY }}" >> .env |
|
echo "AWS_REGION=${{ secrets.AWS_REGION }}" >> .env |
|
echo "::add-mask::${{ secrets.AWS_ACCESS_KEY_ID }}" |
|
echo "::add-mask::${{ secrets.AWS_SECRET_ACCESS_KEY }}" |
|
|
|
- name: Run Docker Compose for all services |
|
run: | |
|
docker-compose --env-file .env build --no-cache |
|
docker-compose --env-file .env up -d |
|
docker-compose logs --follow train eval |
|
docker-compose down --remove-orphans |
|
|
|
- name: Build, tag, and push Docker image to Amazon ECR |
|
env: |
|
REGISTRY: ${{ steps.login-ecr.outputs.registry }} |
|
REPOSITORY: soutrik71/mnist |
|
IMAGE_TAG: ${{ github.sha }} |
|
run: | |
|
docker build -t $REGISTRY/$REPOSITORY:$IMAGE_TAG . |
|
docker push $REGISTRY/$REPOSITORY:$IMAGE_TAG |
|
docker tag $REGISTRY/$REPOSITORY:$IMAGE_TAG $REGISTRY/$REPOSITORY:latest |
|
docker push $REGISTRY/$REPOSITORY:latest |
|
|
|
- name: Pull Docker image from ECR and verify |
|
env: |
|
REGISTRY: ${{ steps.login-ecr.outputs.registry }} |
|
REPOSITORY: soutrik71/mnist |
|
IMAGE_TAG: ${{ github.sha }} |
|
run: | |
|
docker pull $REGISTRY/$REPOSITORY:$IMAGE_TAG |
|
docker images | grep "$REGISTRY/$REPOSITORY" |
|
|
|
- name: Clean up environment |
|
run: | |
|
docker system prune -af --volumes |
|
|
|
sync-to-hub: |
|
name: Sync to Hugging Face Hub |
|
needs: deploy |
|
runs-on: ubuntu-latest |
|
steps: |
|
- uses: actions/checkout@v4 |
|
with: |
|
fetch-depth: 0 |
|
lfs: true |
|
|
|
- name: Install Git LFS |
|
run: | |
|
curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash |
|
sudo apt-get install git-lfs |
|
git lfs install |
|
|
|
- name: Configure Git identity |
|
run: | |
|
git config --global user.name "soutrik" |
|
git config --global user.email "soutrik.chowdhury@ab-inbev.com" |
|
|
|
- name: Add remote |
|
run: | |
|
git remote add space https://$USER:$HF_TOKEN@huggingface.co/spaces/$USER/$SPACE |
|
env: |
|
HF_TOKEN: ${{ secrets.HF_TOKEN }} |
|
USER: soutrik |
|
SPACE: gradio_demo_MNIST_Classifier |
|
|
|
- name: Ensure LFS objects are present |
|
run: git lfs checkout |
|
|
|
- name: Add README.md |
|
run: | |
|
cat <<EOF > README.md |
|
--- |
|
title: My Gradio App MNIST Classifier |
|
emoji: π |
|
colorFrom: blue |
|
colorTo: green |
|
sdk: gradio |
|
sdk_version: "5.7.1" |
|
app_file: app.py |
|
pinned: false |
|
--- |
|
EOF |
|
git add README.md |
|
git commit -m "Add README.md" || echo "Skip commit if no changes" |
|
|
|
- name: Push to hub |
|
run: | |
|
git push --force https://$USER:$HF_TOKEN@huggingface.co/spaces/$USER/$SPACE main |
|
env: |
|
HF_TOKEN: ${{ secrets.HF_TOKEN }} |
|
USER: soutrik |
|
SPACE: gradio_demo_MNIST_Classifier |
|
|
|
stop-runner: |
|
name: Stop self-hosted EC2 runner |
|
needs: |
|
- start-runner |
|
- deploy |
|
- sync-to-hub |
|
runs-on: ubuntu-latest |
|
if: ${{ always() }} |
|
steps: |
|
- name: Configure AWS credentials |
|
uses: aws-actions/configure-aws-credentials@v4 |
|
with: |
|
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} |
|
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} |
|
aws-region: ${{ secrets.AWS_REGION }} |
|
|
|
- name: Stop EC2 runner |
|
uses: machulav/ec2-github-runner@v2 |
|
with: |
|
mode: stop |
|
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} |
|
label: ${{ needs.start-runner.outputs.label }} |
|
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} |
|
|
|
- name: Validate EC2 termination |
|
run: | |
|
aws ec2 describe-instances --instance-ids ${{ needs.start-runner.outputs.ec2-instance-id }} \ |
|
--query "Reservations[].Instances[].State.Name" --output text | grep "terminated" || echo "Runner not terminated." |
|
|