DouDou commited on
Commit
9001624
·
verified ·
1 Parent(s): 216c37b

Upload data3/check_match.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. data3/check_match.py +41 -0
data3/check_match.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import csv
3
+ import json
4
+
5
+ # Read row 57 from CSV (1-indexed, so row 56 in 0-indexed)
6
+ print("Reading row 57 from CSV...")
7
+ with open('function_dataset_v2.csv', 'r') as f:
8
+ reader = csv.DictReader(f)
9
+ for i, row in enumerate(reader):
10
+ if i == 56: # 57th row (0-indexed)
11
+ print(f"Row {i+1}:")
12
+ print(f" original_index: {row['original_index']}")
13
+ print(f" repo_name: '{row['repo_name']}'")
14
+ print(f" path: '{row['path']}'")
15
+ print(f" language: '{row['language']}'")
16
+ print(f" function_name: '{row['function_name']}'")
17
+ break
18
+
19
+ # Also check a sample JSONL
20
+ print("\n\nChecking first JSONL entry...")
21
+ with open('programming_problems.jsonl', 'r') as f:
22
+ data = json.loads(f.readline())
23
+ print(f"original_index: {data['metadata']['original_index']}")
24
+ print(f"function_name: {data['metadata']['function_name']}")
25
+ print(f"Current repo_name: '{data['metadata']['repo_name']}'")
26
+ print(f"Current path: '{data['metadata']['path']}'")
27
+ print(f"Current language: '{data['metadata']['language']}'")
28
+
29
+ # Count how many rows in CSV have complete info
30
+ print("\n\nCounting CSV rows with complete metadata...")
31
+ with open('function_dataset_v2.csv', 'r') as f:
32
+ reader = csv.DictReader(f)
33
+ total = 0
34
+ complete = 0
35
+ for row in reader:
36
+ total += 1
37
+ if row['repo_name'] and row['path'] and row['language']:
38
+ complete += 1
39
+ print(f"Total CSV rows: {total}")
40
+ print(f"Rows with complete metadata: {complete}")
41
+ print(f"Rows with missing metadata: {total - complete}")