geo-platform / test_analytics.py
3v324v23's picture
initial: geo-platform full stack
5c429d4
#!/usr/bin/env python3
"""Test comprehensive keyword analytics."""
import os
import sys
import json
# Set credentials
os.environ['DATAFORSEO_LOGIN'] = 'ai.seg01@seginvest.com'
os.environ['DATAFORSEO_PASSWORD'] = '712e269a1e24e50f'
sys.path.insert(0, os.path.dirname(__file__))
from server.keyword_engine import extract_keywords_from_audit
from server.keyword_analytics import format_analytics_report
def test_analytics():
"""Test full analytics with existing audit."""
print("=" * 80)
print("COMPREHENSIVE KEYWORD ANALYTICS TEST")
print("=" * 80)
audit_path = 'output/audit.json'
if not os.path.exists(audit_path):
print(f"โŒ {audit_path} not found")
return
with open(audit_path, 'r') as f:
audit = json.load(f)
# Define expected keywords for coverage analysis
expected_keywords = [
'seo', 'search engine optimization', 'ู…ุญุฑูƒ', 'ุจุญุซ',
'local seo', 'ai seo', 'youtube seo', 'aso',
'ranking', 'keywords', 'backlinks', 'content'
]
print("\n๐Ÿ” Extracting keywords with full analytics...")
print(f"๐Ÿ“‹ Expected keywords: {len(expected_keywords)}")
# Get full analytics
analytics = extract_keywords_from_audit(
audit,
top_n=50,
enrich=True,
analytics=True,
expected_keywords=expected_keywords
)
# Print formatted report
print("\n" + format_analytics_report(analytics))
# Print JSON structure
print("\n" + "=" * 80)
print("JSON STRUCTURE (for API response)")
print("=" * 80)
# Show structure without full data
structure = {
'summary': analytics['summary'],
'top_keywords': analytics['top_keywords'][:3],
'classification': {
'primary': f"{len(analytics['classification']['primary'])} keywords",
'secondary': f"{len(analytics['classification']['secondary'])} keywords",
'long_tail': f"{len(analytics['classification']['long_tail'])} keywords"
},
'clusters': {k: f"{len(v)} keywords" for k, v in analytics['clusters'].items()},
'coverage': analytics.get('coverage')
}
print(json.dumps(structure, indent=2, ensure_ascii=False))
# Show example API response
print("\n" + "=" * 80)
print("EXAMPLE API RESPONSE")
print("=" * 80)
api_response = {
'ok': True,
'analytics': {
'summary': analytics['summary'],
'top_keywords': analytics['top_keywords'][:5],
'classification': {
'primary': analytics['classification']['primary'][:3],
'secondary': analytics['classification']['secondary'][:3],
'long_tail': analytics['classification']['long_tail'][:3]
},
'clusters': {k: v[:2] for k, v in analytics['clusters'].items()},
'coverage': analytics.get('coverage')
}
}
print(json.dumps(api_response, indent=2, ensure_ascii=False))
def test_simple_vs_analytics():
"""Compare simple extraction vs analytics."""
print("\n" + "=" * 80)
print("COMPARISON: Simple vs Analytics")
print("=" * 80)
audit_path = 'output/audit.json'
if not os.path.exists(audit_path):
return
with open(audit_path, 'r') as f:
audit = json.load(f)
# Simple extraction
print("\n1๏ธโƒฃ SIMPLE EXTRACTION (analytics=False)")
print("-" * 80)
simple = extract_keywords_from_audit(audit, top_n=10, enrich=False, analytics=False)
print(f"{'Keyword':<30} {'Count':<10}")
print("-" * 80)
for kw in simple[:5]:
print(f"{kw['kw']:<30} {kw['count']:<10}")
# Analytics extraction
print("\n2๏ธโƒฃ ANALYTICS EXTRACTION (analytics=True)")
print("-" * 80)
analytics = extract_keywords_from_audit(audit, top_n=10, enrich=False, analytics=True)
print(f"Total Keywords: {analytics['summary']['total_keywords']}")
print(f"Primary: {analytics['summary']['primary_keywords']}")
print(f"Secondary: {analytics['summary']['secondary_keywords']}")
print(f"Long-tail: {analytics['summary']['long_tail_keywords']}")
print(f"\nTop 5 Keywords:")
for kw in analytics['top_keywords'][:5]:
density = kw.get('density', 0)
print(f" โ€ข {kw['kw']} (count: {kw['count']}, density: {density:.2f}%)")
if __name__ == '__main__':
test_analytics()
test_simple_vs_analytics()
print("\n" + "=" * 80)
print("โœ… ANALYTICS TEST COMPLETED!")
print("=" * 80)
print("\n๐Ÿ’ก API Usage:")
print(" Simple: POST /api/keywords?analytics=false")
print(" Analytics: POST /api/keywords?analytics=true")
print("\n๐Ÿ“Š Analytics includes:")
print(" โœ… Keyword classification (primary/secondary/long-tail)")
print(" โœ… Topic clustering (SEO, E-commerce, Content, etc.)")
print(" โœ… Keyword density calculation")
print(" โœ… Coverage score (vs expected keywords)")
print(" โœ… Clean, filtered keywords (no navigation/junk)")