# Inference provider selection # CI trigger marker: keep this file touchable to force backend deploy workflow runs when needed. INFERENCE_PROVIDER=hf_inference INFERENCE_PRO_ENABLED=true INFERENCE_PRO_PROVIDER=hf_inference INFERENCE_GPU_PROVIDER=hf_inference INFERENCE_CPU_PROVIDER=hf_inference INFERENCE_ENABLE_PROVIDER_FALLBACK=true INFERENCE_PRO_PRIORITY_TASKS=chat,verify_solution INFERENCE_PRO_ROUTE_HEADER_NAME= INFERENCE_PRO_ROUTE_HEADER_VALUE=true # task policy sets, comma-separated INFERENCE_GPU_REQUIRED_TASKS=chat INFERENCE_CPU_ONLY_TASKS=risk_classification,analytics_aggregation,file_parsing,auth,default_cpu INFERENCE_INTERACTIVE_TASKS=chat,verify_solution,daily_insight ENABLE_LLM_RISK_RECOMMENDATIONS=true # local_space provider settings # Accepts either runtime host (https://-.hf.space) or # Space page URL (https://huggingface.co/spaces//). # Example: https://huggingface.co/spaces/Deign86/mathpulse-ai INFERENCE_LOCAL_SPACE_URL=http://127.0.0.1:7860 INFERENCE_LOCAL_SPACE_GENERATE_PATH=/gradio_api/call/generate INFERENCE_LOCAL_SPACE_TIMEOUT_SEC=180 # hf_inference provider settings # Alternative env names accepted by runtime/startup checks: HUGGING_FACE_API_TOKEN, HUGGINGFACE_API_TOKEN HF_TOKEN=your_hf_token FIREBASE_AUTH_PROJECT_ID=mathpulse-ai-2026 # Prefer one of the options below for backend Firestore/Admin access in deployment: # FIREBASE_SERVICE_ACCOUNT_JSON={"type":"service_account",...} # FIREBASE_SERVICE_ACCOUNT_FILE=/path/to/service-account.json INFERENCE_HF_BASE_URL=https://router.huggingface.co/hf-inference/models INFERENCE_HF_CHAT_URL=https://router.huggingface.co/v1/chat/completions INFERENCE_HF_TIMEOUT_SEC=90 INFERENCE_INTERACTIVE_TIMEOUT_SEC=55 INFERENCE_BACKGROUND_TIMEOUT_SEC=120 # Curriculum PDF storage # Store the binary curriculum files in a Hugging Face dataset or Space repo, # then point the backend at that repo so it downloads them at build/startup time. CURRICULUM_SOURCE_REPO_ID=Deign86/mathpulse-curriculum CURRICULUM_SOURCE_REPO_TYPE=dataset CURRICULUM_SOURCE_REVISION=main # Transactional email settings for admin-created accounts # Primary provider: Brevo Transactional API BREVO_API_KEY= # Optional: Brevo MCP token (base64 JSON containing api_key) if BREVO_API_KEY is not set BREVO_MCP_TOKEN= # Optional SMTP fallback provider (Brevo SMTP relay) BREVO_SMTP_LOGIN= BREVO_SMTP_KEY= BREVO_SMTP_HOST=smtp-relay.brevo.com BREVO_SMTP_PORT=587 MAIL_FROM_ADDRESS=noreply@mathpulse.ai MAIL_FROM_NAME=MathPulse AI MAIL_SEND_TIMEOUT_SEC=15 APP_LOGIN_URL=https://mathpulse.ai # Optional: absolute http(s) URL used as the email header avatar image. # If unset, backend derives this from APP_LOGIN_URL + /avatar/avatar_icon.png. APP_BRAND_AVATAR_URL= # model defaults # Global default model for all tasks. INFERENCE_MODEL_ID=Qwen/Qwen3-32B INFERENCE_ENFORCE_QWEN_ONLY=true INFERENCE_QWEN_LOCK_MODEL=Qwen/Qwen3-32B INFERENCE_MAX_NEW_TOKENS=8192 INFERENCE_TEMPERATURE=0.2 INFERENCE_TOP_P=0.9 INFERENCE_CHAT_MODEL_ID=Qwen/Qwen3-32B # Temporary chat-only override for experiments (clear to roll back instantly). # Example: Qwen/Qwen3-32B INFERENCE_CHAT_MODEL_TEMP_OVERRIDE= INFERENCE_CHAT_STRICT_MODEL_ONLY=true INFERENCE_CHAT_HARD_MODEL_ID=meta-llama/Meta-Llama-3-70B-Instruct INFERENCE_CHAT_HARD_TRIGGER_ENABLED=false INFERENCE_CHAT_HARD_PROMPT_CHARS=650 INFERENCE_CHAT_HARD_HISTORY_CHARS=1500 INFERENCE_CHAT_HARD_KEYWORDS=step-by-step,show all steps,explain each step,justify each step,derive,derivation,proof,prove,rigorous,multi-step,word problem CHAT_MAX_NEW_TOKENS=8192 CHAT_STREAM_NO_TOKEN_TIMEOUT_SEC=90 CHAT_STREAM_TOTAL_TIMEOUT_SEC=900 CHAT_STREAM_CONTINUATION_ENABLED=true CHAT_STREAM_CONTINUATION_MAX_ROUNDS=2 CHAT_STREAM_CONTINUATION_MIN_NEW_CHARS=24 CHAT_STREAM_CONTINUATION_TAIL_CHARS=900 CHAT_STREAM_COMPLETION_MODE_DEFAULT=auto # Optional: force quiz-generation model. Leave empty to use routing.task_model_map.quiz_generation. HF_QUIZ_MODEL_ID= HF_QUIZ_JSON_REPAIR_MODEL_ID=Qwen/Qwen3-32B # retry behavior INFERENCE_MAX_RETRIES=3 INFERENCE_BACKOFF_SEC=1.5 INFERENCE_INTERACTIVE_MAX_RETRIES=1 INFERENCE_BACKGROUND_MAX_RETRIES=3 INFERENCE_INTERACTIVE_BACKOFF_SEC=1.0 INFERENCE_BACKGROUND_BACKOFF_SEC=1.75 INFERENCE_INTERACTIVE_MAX_FALLBACK_DEPTH=1 # Max simultaneous blocking HF calls allowed from async endpoints. HF_BLOCKING_CALL_CONCURRENCY=16 HF_ASYNC_MAX_CONNECTIONS=64 HF_ASYNC_MAX_KEEPALIVE_CONNECTIONS=32 HF_ASYNC_CONNECT_TIMEOUT_SEC=10.0 HF_ASYNC_WRITE_TIMEOUT_SEC=30.0 HF_ASYNC_POOL_TIMEOUT_SEC=10.0 # fallback model ids, comma-separated INFERENCE_FALLBACK_MODELS= # async generation controls ENABLE_ASYNC_GENERATION=true ASYNC_TASK_TTL_SECONDS=3600 ASYNC_TASK_MAX_ITEMS=400