| | |
| | """ |
| | Diagnostic script to validate Azure OpenAI embeddings deployment. |
| | |
| | This script helps diagnose 404 errors related to embedding deployments. |
| | Run this before deploying to HuggingFace Spaces to ensure configuration is correct. |
| | |
| | Usage: |
| | python scripts/validate_azure_embeddings.py |
| | """ |
| | import os |
| | import sys |
| | from pathlib import Path |
| | from openai import AzureOpenAI |
| | from dotenv import load_dotenv |
| |
|
| | |
| | load_dotenv() |
| |
|
| | def validate_azure_config(): |
| | """Validate Azure OpenAI configuration.""" |
| | print("=" * 80) |
| | print("Azure OpenAI Embeddings Deployment Validator") |
| | print("=" * 80) |
| | print() |
| |
|
| | |
| | required_vars = { |
| | "AZURE_OPENAI_ENDPOINT": os.getenv("AZURE_OPENAI_ENDPOINT"), |
| | "AZURE_OPENAI_API_KEY": os.getenv("AZURE_OPENAI_API_KEY"), |
| | "AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME": os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME"), |
| | "AZURE_OPENAI_API_VERSION": os.getenv("AZURE_OPENAI_API_VERSION", "2024-02-01"), |
| | } |
| |
|
| | print("1. Checking environment variables...") |
| | print("-" * 80) |
| | missing_vars = [] |
| | for var_name, var_value in required_vars.items(): |
| | if var_value: |
| | |
| | if "KEY" in var_name: |
| | display_value = f"{var_value[:10]}...{var_value[-4:]}" if len(var_value) > 14 else "***" |
| | else: |
| | display_value = var_value |
| | print(f"β
{var_name}: {display_value}") |
| | else: |
| | print(f"β {var_name}: NOT SET") |
| | missing_vars.append(var_name) |
| |
|
| | print() |
| |
|
| | if missing_vars: |
| | print(f"ERROR: Missing required environment variables: {', '.join(missing_vars)}") |
| | print() |
| | print("Fix: Add these variables to your .env file or HuggingFace Spaces secrets") |
| | return False |
| |
|
| | print("2. Testing embeddings deployment...") |
| | print("-" * 80) |
| |
|
| | try: |
| | |
| | client = AzureOpenAI( |
| | api_key=required_vars["AZURE_OPENAI_API_KEY"], |
| | api_version=required_vars["AZURE_OPENAI_API_VERSION"], |
| | azure_endpoint=required_vars["AZURE_OPENAI_ENDPOINT"] |
| | ) |
| |
|
| | deployment_name = required_vars["AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME"] |
| | print(f"Testing deployment: {deployment_name}") |
| | print() |
| |
|
| | |
| | test_text = "This is a test embedding." |
| | response = client.embeddings.create( |
| | input=test_text, |
| | model=deployment_name |
| | ) |
| |
|
| | embedding = response.data[0].embedding |
| | embedding_dim = len(embedding) |
| |
|
| | print(f"β
SUCCESS: Embedding generated successfully!") |
| | print(f" Embedding dimension: {embedding_dim}") |
| | print(f" Model used: {deployment_name}") |
| | print() |
| | print("=" * 80) |
| | print("β
All checks passed! Your Azure OpenAI embeddings configuration is correct.") |
| | print("=" * 80) |
| | return True |
| |
|
| | except Exception as e: |
| | error_msg = str(e) |
| | print(f"β ERROR: Failed to generate embedding") |
| | print() |
| | print(f"Error message: {error_msg}") |
| | print() |
| |
|
| | |
| | if "404" in error_msg or "Resource not found" in error_msg: |
| | print("DIAGNOSIS: Deployment not found (404 error)") |
| | print() |
| | print("Possible causes:") |
| | print(" 1. Deployment name is incorrect") |
| | print(" 2. Deployment doesn't exist in your Azure OpenAI resource") |
| | print(" 3. Deployment is in a different Azure region/resource") |
| | print() |
| | print("How to fix:") |
| | print(" Option A: Create the deployment in Azure Portal") |
| | print(" 1. Go to https://portal.azure.com") |
| | print(" 2. Navigate to your Azure OpenAI resource") |
| | print(" 3. Go to 'Model deployments' β 'Manage Deployments'") |
| | print(" 4. Create a new deployment:") |
| | print(f" - Model: text-embedding-3-small (or text-embedding-ada-002)") |
| | print(f" - Deployment name: {deployment_name}") |
| | print() |
| | print(" Option B: Use existing deployment") |
| | print(" 1. Check what embedding deployments you already have in Azure Portal") |
| | print(" 2. Update AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME to match existing deployment") |
| | print(" 3. Common deployment names:") |
| | print(" - text-embedding-3-small") |
| | print(" - text-embedding-ada-002") |
| | print(" - embedding") |
| | print() |
| |
|
| | elif "401" in error_msg or "Unauthorized" in error_msg: |
| | print("DIAGNOSIS: Authentication failed (401 error)") |
| | print() |
| | print("How to fix:") |
| | print(" 1. Verify AZURE_OPENAI_API_KEY is correct") |
| | print(" 2. Check that the key hasn't expired") |
| | print(" 3. Ensure the key matches the Azure OpenAI resource") |
| | print() |
| |
|
| | elif "InvalidRequestError" in error_msg: |
| | print("DIAGNOSIS: Invalid request to Azure OpenAI API") |
| | print() |
| | print("How to fix:") |
| | print(" 1. Check AZURE_OPENAI_API_VERSION (try '2024-02-01' or '2024-05-01-preview')") |
| | print(" 2. Verify AZURE_OPENAI_ENDPOINT format (should end with '/')") |
| | print() |
| |
|
| | print("=" * 80) |
| | print("β Configuration validation FAILED") |
| | print("=" * 80) |
| | return False |
| |
|
| |
|
| | def list_common_deployment_names(): |
| | """List common embedding deployment names.""" |
| | print() |
| | print("Common embedding deployment names to try:") |
| | print(" - text-embedding-3-small (recommended, most cost-effective)") |
| | print(" - text-embedding-3-large (higher quality, more expensive)") |
| | print(" - text-embedding-ada-002 (legacy, widely supported)") |
| | print(" - embedding (generic name, check your Azure portal)") |
| | print() |
| |
|
| |
|
| | if __name__ == "__main__": |
| | print() |
| | success = validate_azure_config() |
| |
|
| | if not success: |
| | list_common_deployment_names() |
| | sys.exit(1) |
| |
|
| | print() |
| | print("Next steps:") |
| | print(" 1. If deploying to HuggingFace Spaces:") |
| | print(" - Add all Azure OpenAI secrets to HuggingFace Spaces settings") |
| | print(" - Ensure AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME matches your Azure deployment") |
| | print(" 2. Run the application:") |
| | print(" python app.py") |
| | print() |
| | sys.exit(0) |
| |
|