#!/usr/bin/env python3 import csv import json print("Checking enhanced_dataset.csv...") with open('enhanced_dataset.csv', 'r', encoding='utf-8') as f: reader = csv.DictReader(f) # Get first row row = next(reader) print(f"Columns: {list(row.keys())}") print(f"\nFirst row values:") print(f" Unnamed: 0: {row.get('Unnamed: 0', 'N/A')}") print(f" Unnamed: 0.1: {row.get('Unnamed: 0.1', 'N/A')}") print(f" repo_name: {row.get('repo_name', 'N/A')}") print(f" path: {row.get('path', 'N/A')}") print(f" language: {row.get('language', 'N/A')}") # Try to find the row matching original_index=489788 print("\n\nSearching for original_index=489788...") f.seek(0) next(reader) # Skip header for i, row in enumerate(reader): # Check different potential index columns idx_val = row.get('Unnamed: 0.1') or row.get('Unnamed: 0') or row.get('') if idx_val == '489788': print(f"Found at row {i+1}!") print(f" repo_name: '{row.get('repo_name', 'N/A')}'") print(f" path: '{row.get('path', 'N/A')}'") print(f" language: '{row.get('language', 'N/A')}'") break if i >= 100000: # Don't search forever print(f"Not found in first 100k rows") break