| | """ |
| | Clear all data from Pinecone index |
| | One-time script for data cleanup before re-ingestion |
| | """ |
| |
|
| | import os |
| | from dotenv import load_dotenv |
| | from pinecone import Pinecone |
| |
|
| | |
| | load_dotenv() |
| |
|
| | def clear_pinecone_index(): |
| | """Delete all vectors from Pinecone index""" |
| |
|
| | |
| | pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY')) |
| | index = pc.Index(os.getenv('PINECONE_INDEX_NAME', 'hackathon')) |
| |
|
| | |
| | stats = index.describe_index_stats() |
| | total_vectors = stats['total_vector_count'] |
| |
|
| | print("="*80) |
| | print("PINECONE DATA CLEANUP") |
| | print("="*80) |
| | print(f"\nIndex: {os.getenv('PINECONE_INDEX_NAME', 'hackathon')}") |
| | print(f"Current vectors: {total_vectors}") |
| | print(f"Dimensions: {stats.get('dimension', 'N/A')}") |
| |
|
| | if total_vectors == 0: |
| | print("\n✅ Index is already empty. Nothing to delete.") |
| | return |
| |
|
| | |
| | print(f"\n⚠️ WARNING: This will delete ALL {total_vectors} vectors!") |
| | confirm = input("Type 'DELETE' to confirm: ") |
| |
|
| | if confirm != 'DELETE': |
| | print("\n❌ Deletion cancelled. No data was removed.") |
| | return |
| |
|
| | print("\n🗑️ Deleting all vectors...") |
| |
|
| | try: |
| | |
| | index.delete(delete_all=True) |
| |
|
| | print("✅ Deletion completed!") |
| |
|
| | |
| | import time |
| | time.sleep(2) |
| |
|
| | stats = index.describe_index_stats() |
| | remaining = stats['total_vector_count'] |
| |
|
| | print(f"\n📊 Final status:") |
| | print(f" Remaining vectors: {remaining}") |
| |
|
| | if remaining == 0: |
| | print(" ✅ Index successfully cleared!") |
| | else: |
| | print(f" ⚠️ {remaining} vectors still remain (may need a moment to sync)") |
| |
|
| | except Exception as e: |
| | print(f"\n❌ Error during deletion: {e}") |
| |
|
| | if __name__ == "__main__": |
| | clear_pinecone_index() |
| |
|