| name: Train Bengali-Code LLM Model
|
|
|
| on:
|
| schedule:
|
| - cron: '0 0 * * *'
|
| workflow_dispatch:
|
|
|
| jobs:
|
| train:
|
| runs-on: ubuntu-latest
|
| steps:
|
| - uses: actions/checkout@v4
|
|
|
| - name: Set up Python
|
| uses: actions/setup-python@v5
|
| with:
|
| python-version: '3.10'
|
|
|
| - name: Install dependencies
|
| run: |
|
| python -m pip install --upgrade pip
|
| pip install transformers datasets sentencepiece accelerate torch wandb
|
|
|
| - name: Data Collection
|
| run: python scripts/data_collector.py
|
| env:
|
| HUGGINGFACE_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }}
|
|
|
| - name: Train Tokenizer
|
| run: python scripts/tokenizer_trainer.py
|
|
|
| - name: Train Model
|
| run: python scripts/model_trainer.py
|
| env:
|
| WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
|
|
|
| - name: Evaluate Model
|
| run: python scripts/model_evaluator.py
|
|
|
| - name: Upload Model Artifacts
|
| uses: actions/upload-artifact@v3
|
| with:
|
| name: model-weights
|
| path: outputs/models/
|
|
|