LIBRE / scripts /build_kaggle_deploy.py
RyZ
feat: git clone private repository inside Kaggle notebook with GH_TOKEN secret support
3bb4e96
Raw
History Blame Contribute Delete
6.8 kB
import os
import shutil
import json
def build_deploy_dir():
# Paths
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
deploy_dir = os.path.join(base_dir, "kaggle_deploy")
# Clean and recreate deploy_dir
if os.path.exists(deploy_dir):
shutil.rmtree(deploy_dir)
os.makedirs(deploy_dir, exist_ok=True)
# Copy src, pyproject.toml
shutil.copytree(os.path.join(base_dir, "src"), os.path.join(deploy_dir, "src"))
shutil.copy(os.path.join(base_dir, "pyproject.toml"), os.path.join(deploy_dir, "pyproject.toml"))
if os.path.exists(os.path.join(base_dir, "README.md")):
shutil.copy(os.path.join(base_dir, "README.md"), os.path.join(deploy_dir, "README.md"))
# Generate run_consumer_kaggle.ipynb (Jupyter Notebook format)
notebook_content = {
"cells": [
{
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": [
"# 1. GPU & CUDA Check\n",
"import torch\n",
"import sys\n",
"import os\n",
"import subprocess\n",
"import shutil\n",
"\n",
"print(\"=== Kaggle GPU & System Check ===\")\n",
"print(f\"Python Version: {sys.version}\")\n",
"print(f\"PyTorch Version: {torch.__version__}\")\n",
"print(f\"CUDA Available: {torch.cuda.is_available()}\")\n",
"if torch.cuda.is_available():\n",
" print(f\"GPU Device: {torch.cuda.get_device_name(0)}\")\n",
"else:\n",
" print(\"WARNING: GPU is not active. Please enable GPU in the Kaggle settings for optimal performance.\")\n",
"\n",
"# 2. Load Kaggle User Secrets\n",
"print(\"\\n=== Kaggle Secrets Validation ===\")\n",
"gh_token = None\n",
"try:\n",
" from kaggle_secrets import UserSecretsClient\n",
" user_secrets = UserSecretsClient()\n",
" db_url = user_secrets.get_secret(\"DATABASE_URL\")\n",
" rabbitmq_url = user_secrets.get_secret(\"RABBITMQ_URL\")\n",
" gh_token = user_secrets.get_secret(\"GH_TOKEN\")\n",
" print(f\"DATABASE_URL secret: {'FOUND' if db_url else 'MISSING'}\")\n",
" print(f\"RABBITMQ_URL secret: {'FOUND' if rabbitmq_url else 'MISSING'}\")\n",
" print(f\"GH_TOKEN secret: {'FOUND' if gh_token else 'MISSING'}\")\n",
"except Exception as e:\n",
" print(f\"Warning: Could not check secrets: {e}\")\n",
"\n",
"# 3. Clone Repository (Since Kaggle CLI only uploads the notebook itself)\n",
"print(\"\\n=== Cloning Repository ===\")\n",
"repo_dir = \"/kaggle/working/ETL-Ingestion\"\n",
"if os.path.exists(repo_dir):\n",
" print(f\"Removing existing repository directory at {repo_dir}...\")\n",
" shutil.rmtree(repo_dir)\n",
"\n",
"if gh_token:\n",
" print(\"Cloning private repository using GH_TOKEN...\")\n",
" clone_url = f\"https://x-access-token:{gh_token}@github.com/FP-KKA/ETL-Ingestion.git\"\n",
"else:\n",
" print(\"WARNING: GH_TOKEN secret not found. Attempting public clone...\")\n",
" clone_url = \"https://github.com/FP-KKA/ETL-Ingestion.git\"\n",
"\n",
"try:\n",
" subprocess.check_call([\"git\", \"clone\", clone_url, repo_dir])\n",
" print(\"Repository cloned successfully.\")\n",
"except Exception as e:\n",
" print(f\"Error cloning repository: {e}\")\n",
" sys.exit(1)\n",
"\n",
"# 4. Change Directory & Install Dependencies\n",
"os.chdir(repo_dir)\n",
"print(f\"Changed working directory to: {os.getcwd()}\")\n",
"\n",
"print(\"\\n=== Installing Clean Architecture Package ===\")\n",
"try:\n",
" subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", \"-e\", \".[all]\"])\n",
" print(\"Package & dependencies installed successfully.\")\n",
"except Exception as e:\n",
" print(f\"Error installing package dependencies: {e}\")\n",
" sys.exit(1)\n",
"\n",
"# 5. Launch Consumer Daemon\n",
"print(\"\\n=== Launching Consumer Daemon ===\")\n",
"try:\n",
" subprocess.check_call([sys.executable, \"-m\", \"src.interface.consumer.run_consumer\"])\n",
"except KeyboardInterrupt:\n",
" print(\"Consumer stopped.\")\n",
"except Exception as e:\n",
" print(f\"Consumer failed to execute: {e}\")\n",
" sys.exit(1)"
]
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
with open(os.path.join(deploy_dir, "run_consumer_kaggle.ipynb"), "w", encoding="utf-8") as f:
json.dump(notebook_content, f, indent=4)
# Generate kernel-metadata.json
username = os.environ.get("KAGGLE_USERNAME", "benedictusryugunawan")
slug = os.environ.get("KAGGLE_SLUG", "libre")
metadata = {
"id": f"{username}/{slug}",
"title": slug,
"code_file": "run_consumer_kaggle.ipynb",
"language": "python",
"kernel_type": "notebook",
"is_private": "true",
"enable_gpu": "true",
"enable_tpu": "false",
"enable_internet": "true",
"dataset_sources": [],
"competition_sources": [],
"kernel_sources": []
}
with open(os.path.join(deploy_dir, "kernel-metadata.json"), "w", encoding="utf-8") as f:
json.dump(metadata, f, indent=4)
print(f"Kaggle deployment bundle successfully created in: {deploy_dir}")
if __name__ == "__main__":
build_deploy_dir()