Rename ci-cd to ci-cd.yml #28
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# name: CI/CD Pipeline | |
# on: | |
# push: | |
# branches: [ main, develop ] | |
# pull_request: | |
# branches: [ main, develop ] | |
# release: | |
# types: [ created ] | |
# env: | |
# PYTHON_VERSION: '3.10' | |
# PYTORCH_VERSION: '2.0.0' | |
# jobs: | |
# code-quality: | |
# name: Code Quality Checks | |
# runs-on: ubuntu-latest | |
# steps: | |
# - uses: actions/checkout@v3 | |
# - name: Set up Python | |
# uses: actions/setup-python@v4 | |
# with: | |
# python-version: ${{ env.PYTHON_VERSION }} | |
# - name: Install dependencies | |
# run: | | |
# pip install black isort flake8 mypy pylint bandit | |
# - name: Black formatting check | |
# run: black --check . | |
# - name: isort import sorting check | |
# run: isort --check-only . | |
# - name: Flake8 linting | |
# run: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics | |
# - name: Security check with bandit | |
# run: bandit -r . -f json -o bandit-report.json | |
# - name: Type checking with mypy | |
# run: mypy . --ignore-missing-imports || true | |
# unit-tests: | |
# name: Unit Tests | |
# runs-on: ubuntu-latest | |
# strategy: | |
# matrix: | |
# python-version: ['3.8', '3.9', '3.10'] | |
# steps: | |
# - uses: actions/checkout@v3 | |
# - name: Set up Python ${{ matrix.python-version }} | |
# uses: actions/setup-python@v4 | |
# with: | |
# python-version: ${{ matrix.python-version }} | |
# - name: Install dependencies | |
# run: | | |
# pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu | |
# pip install -r requirements.txt | |
# pip install pytest pytest-cov pytest-xdist | |
# pip install -e . | |
# - name: Run unit tests | |
# run: | | |
# pytest test_distributed.py -v --cov=. --cov-report=xml --cov-report=html | |
# - name: Upload coverage to Codecov | |
# uses: codecov/codecov-action@v3 | |
# with: | |
# file: ./coverage.xml | |
# integration-tests: | |
# name: Integration Tests (GPU) | |
# runs-on: [self-hosted, gpu] | |
# if: github.event_name == 'push' | |
# steps: | |
# - uses: actions/checkout@v3 | |
# - name: Set up Python | |
# uses: actions/setup-python@v4 | |
# with: | |
# python-version: ${{ env.PYTHON_VERSION }} | |
# - name: Install dependencies | |
# run: | | |
# pip install torch torchvision --index-url https://download.pytorch.org/whl/cu118 | |
# pip install -r requirements.txt | |
# pip install -e . | |
# - name: Run single GPU tests | |
# run: | | |
# python production_train.py --batch-size 16 --epochs 1 | |
# - name: Run multi-GPU tests | |
# run: | | |
# torchrun --nproc_per_node=2 production_train.py --batch-size 16 --epochs 1 --strategy ddp | |
# - name: Run benchmarks | |
# run: | | |
# python run_benchmark.py --gpus 1 2 --strategies ddp --batch-sizes 32 | |
# docker-build: | |
# name: Build Docker Image | |
# runs-on: ubuntu-latest | |
# needs: [code-quality, unit-tests] | |
# steps: | |
# - uses: actions/checkout@v3 | |
# - name: Set up Docker Buildx | |
# uses: docker/setup-buildx-action@v2 | |
# - name: Login to DockerHub | |
# uses: docker/login-action@v2 | |
# with: | |
# username: ${{ secrets.DOCKERHUB_USERNAME }} | |
# password: ${{ secrets.DOCKERHUB_TOKEN }} | |
# - name: Build and push | |
# uses: docker/build-push-action@v4 | |
# with: | |
# context: . | |
# push: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} | |
# tags: | | |
# ${{ secrets.DOCKERHUB_USERNAME }}/distributed-training:latest | |
# ${{ secrets.DOCKERHUB_USERNAME }}/distributed-training:${{ github.sha }} | |
# cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/distributed-training:buildcache | |
# cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/distributed-training:buildcache,mode=max | |
# performance-benchmarks: | |
# name: Performance Benchmarks | |
# runs-on: [self-hosted, gpu] | |
# if: github.event_name == 'pull_request' | |
# steps: | |
# - uses: actions/checkout@v3 | |
# - name: Install dependencies | |
# run: | | |
# pip install torch torchvision | |
# pip install -r requirements.txt | |
# pip install -e . | |
# - name: Run performance benchmarks | |
# run: | | |
# python run_benchmark.py --gpus 1 2 4 --strategies ddp fsdp --output-dir benchmark-results | |
# - name: Upload benchmark results | |
# uses: actions/upload-artifact@v3 | |
# with: | |
# name: benchmark-results | |
# path: benchmark-results/ | |
# security-scan: | |
# name: Security Scanning | |
# runs-on: ubuntu-latest | |
# steps: | |
# - uses: actions/checkout@v3 | |
# - name: Run Trivy vulnerability scanner | |
# uses: aquasecurity/trivy-action@master | |
# with: | |
# scan-type: 'fs' | |
# scan-ref: '.' | |
# format: 'sarif' | |
# output: 'trivy-results.sarif' | |
# - name: Upload Trivy results to GitHub Security tab | |
# uses: github/codeql-action/upload-sarif@v2 | |
# with: | |
# sarif_file: 'trivy-results.sarif' | |
# deploy-staging: | |
# name: Deploy to Staging | |
# runs-on: ubuntu-latest | |
# needs: [docker-build, integration-tests] | |
# if: github.ref == 'refs/heads/develop' | |
# steps: | |
# - uses: actions/checkout@v3 | |
# - name: Configure kubectl | |
# uses: azure/setup-kubectl@v3 | |
# - name: Deploy to staging | |
# run: | | |
# kubectl apply -f k8s-deployment.yaml --namespace=staging | |
# kubectl rollout status statefulset/distributed-training --namespace=staging | |
# deploy-production: | |
# name: Deploy to Production | |
# runs-on: ubuntu-latest | |
# needs: [docker-build, integration-tests, performance-benchmarks] | |
# if: github.event_name == 'release' | |
# steps: | |
# - uses: actions/checkout@v3 | |
# - name: Configure kubectl | |
# uses: azure/setup-kubectl@v3 | |
# - name: Deploy to production | |
# run: | | |
# kubectl apply -f k8s-deployment.yaml --namespace=production | |
# kubectl rollout status statefulset/distributed-training --namespace=production | |
# - name: Run smoke tests | |
# run: | | |
# kubectl exec -n production distributed-training-0 -- python -c "import torch; print(f'PyTorch {torch.__version__}')" | |
# notification: | |
# name: Send Notifications | |
# runs-on: ubuntu-latest | |
# needs: [code-quality, unit-tests, integration-tests] | |
# if: always() | |
# steps: | |
# - name: Send Slack notification | |
# uses: 8398a7/action-slack@v3 | |
# with: | |
# status: ${{ job.status }} | |
# text: 'CI/CD Pipeline Status: ${{ job.status }}' | |
# webhook_url: ${{ secrets.SLACK_WEBHOOK }} |