name: Update data and Update model with Hugging Face on: # schedule: # - cron: '0 0 * * *' # This cron expression runs the job daily # workflow_dispatch: push: branches: - main jobs: crawl: runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4.1.6 with: ref: main - name: Setup Python uses: actions/setup-python@v5.1.0 with: python-version: 3.11 - name: Install Dependencies run: | python -m pip install --upgrade pip if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Run update run: python ./main/main_update.py - name: Upload output uses: actions/upload-artifact@v4 with: name: movies_data.csv path: merge_data/movies_data.csv update_model: runs-on: ubuntu-latest needs: crawl steps: - name: Checkout uses: actions/checkout@v4.1.6 with: ref: main - name: Setup Python uses: actions/setup-python@v5.1.0 with: python-version: 3.11 - name: Install Dependencies run: | python -m pip install --upgrade pip pip install transformers datasets huggingface_hub if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Download artifact uses: actions/download-artifact@v4 with: name: movies_data.csv path: merge_data/ - name: Update model using Hugging Face run: | # Download a pre-trained model from Hugging Face from transformers import AutoModelForSequenceClassification, AutoTokenizer model_name = "distilbert-base-uncased" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) # Your custom model training or updating logic python update_model.py # Save updated model to Hugging Face Hub (Optional) from huggingface_hub import HfApi, Repository repo = Repository(local_dir="./model_output", clone_from="your-hf-username/your-model-name") repo.push_to_hub(commit_message="Updated model") - name: Commit and push artifacts env: GH_PAT: ${{ secrets.GH_PAT }} run: | git config --global user.name 'github-actions[bot]' git config --global user.email '41898282+github-actions[bot]@users.noreply.github.com' git add model_efa/*.pkl git add merge_data/final_merged.csv git commit -m 'Update .pkl files in model_efa directory' git push https://x-access-token:${{ secrets.GH_PAT }}@github.com/QuanTH02/2023.2-Data-Science.git main continue-on-error: true # pull_main: # runs-on: [self-hosted, quan_win] # needs: [crawl, update_model] # steps: # - name: Checkout code # uses: actions/checkout@v4.1.6 # with: # ref: main # - name: Pull latest changes # run: | # git pull origin main