File size: 1,324 Bytes
b805898 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | #!/usr/bin/env python3
import csv
import json
print("Checking enhanced_dataset.csv...")
with open('enhanced_dataset.csv', 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
# Get first row
row = next(reader)
print(f"Columns: {list(row.keys())}")
print(f"\nFirst row values:")
print(f" Unnamed: 0: {row.get('Unnamed: 0', 'N/A')}")
print(f" Unnamed: 0.1: {row.get('Unnamed: 0.1', 'N/A')}")
print(f" repo_name: {row.get('repo_name', 'N/A')}")
print(f" path: {row.get('path', 'N/A')}")
print(f" language: {row.get('language', 'N/A')}")
# Try to find the row matching original_index=489788
print("\n\nSearching for original_index=489788...")
f.seek(0)
next(reader) # Skip header
for i, row in enumerate(reader):
# Check different potential index columns
idx_val = row.get('Unnamed: 0.1') or row.get('Unnamed: 0') or row.get('')
if idx_val == '489788':
print(f"Found at row {i+1}!")
print(f" repo_name: '{row.get('repo_name', 'N/A')}'")
print(f" path: '{row.get('path', 'N/A')}'")
print(f" language: '{row.get('language', 'N/A')}'")
break
if i >= 100000: # Don't search forever
print(f"Not found in first 100k rows")
break
|