| #!/usr/bin/env python3 | |
| import csv | |
| import json | |
| print("Checking enhanced_dataset.csv...") | |
| with open('enhanced_dataset.csv', 'r', encoding='utf-8') as f: | |
| reader = csv.DictReader(f) | |
| # Get first row | |
| row = next(reader) | |
| print(f"Columns: {list(row.keys())}") | |
| print(f"\nFirst row values:") | |
| print(f" Unnamed: 0: {row.get('Unnamed: 0', 'N/A')}") | |
| print(f" Unnamed: 0.1: {row.get('Unnamed: 0.1', 'N/A')}") | |
| print(f" repo_name: {row.get('repo_name', 'N/A')}") | |
| print(f" path: {row.get('path', 'N/A')}") | |
| print(f" language: {row.get('language', 'N/A')}") | |
| # Try to find the row matching original_index=489788 | |
| print("\n\nSearching for original_index=489788...") | |
| f.seek(0) | |
| next(reader) # Skip header | |
| for i, row in enumerate(reader): | |
| # Check different potential index columns | |
| idx_val = row.get('Unnamed: 0.1') or row.get('Unnamed: 0') or row.get('') | |
| if idx_val == '489788': | |
| print(f"Found at row {i+1}!") | |
| print(f" repo_name: '{row.get('repo_name', 'N/A')}'") | |
| print(f" path: '{row.get('path', 'N/A')}'") | |
| print(f" language: '{row.get('language', 'N/A')}'") | |
| break | |
| if i >= 100000: # Don't search forever | |
| print(f"Not found in first 100k rows") | |
| break | |