Upload data2/step22/md_stat.py with huggingface_hub
Browse files- data2/step22/md_stat.py +21 -0
data2/step22/md_stat.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pathlib import Path
|
| 2 |
+
import json
|
| 3 |
+
|
| 4 |
+
def get_md_contents(src_dir, target_dir="/home/weifengsun/tangou1/step2/step22/dataset"):
|
| 5 |
+
src_path = Path(src_dir)
|
| 6 |
+
target_path = Path(target_dir)
|
| 7 |
+
for subdir in src_path.iterdir():
|
| 8 |
+
if subdir.is_dir():
|
| 9 |
+
readme_file = subdir / "README_SUMMARY.md"
|
| 10 |
+
if readme_file.exists():
|
| 11 |
+
content = readme_file.read_text(encoding='utf-8')
|
| 12 |
+
dest_dir = target_path / subdir.name
|
| 13 |
+
dest_dir.mkdir(parents=True, exist_ok=True)
|
| 14 |
+
with open(dest_dir / "readme_summary.json", "w", encoding='utf-8') as f:
|
| 15 |
+
json.dump({"readme_summary": content}, f, ensure_ascii=False, indent=4)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
if __name__ == "__main__":
|
| 20 |
+
# /home/weifengsun/tangou1/step2/step22/dataset
|
| 21 |
+
get_md_contents("/home/weifengsun/tangou1/domain_code/src/workdir/repos_filtered")
|