| # /home/weifengsun/tangou1/domain_code/src/workdir/repos_filtered | |
| # 一级文件夹数量: 37736 | |
| # 代码文件数量: | |
| # /home/weifengsun/tangou1/domain_code/src/workdir/keywords_expanded.json | |
| # 1。统计项目文件夹数量 | |
| # from pathlib import Path | |
| # def count_subdirs_pathlib(path): | |
| # p = Path(path) | |
| # if not p.exists(): | |
| # print("路径不存在") | |
| # return 0 | |
| # # p.iterdir() 遍历目录下所有项 | |
| # # item.is_dir() 判断是否为目录 | |
| # # sum() 计算生成器中 True 的数量 | |
| # count = sum(1 for item in p.iterdir() if item.is_dir()) | |
| # return count | |
| # # 使用示例 | |
| # folder_path = r'/home/weifengsun/tangou1/domain_code/src/workdir/repos_filtered' # 请替换为你的路径 | |
| # print(f"一级文件夹数量: {count_subdirs_pathlib(folder_path)}") | |
| # 2. 统计代码文件后缀名 | |
| # import os | |
| # from collections import Counter | |
| # def count_extensions_walk(folder_path): | |
| # ext_counter = Counter() | |
| # if not os.path.exists(folder_path): | |
| # print("路径不存在") | |
| # return | |
| # # os.walk 会自动递归遍历所有层级 | |
| # for root, dirs, files in os.walk(folder_path): | |
| # for filename in files: | |
| # # 分离文件名和扩展名 | |
| # # os.path.splitext('file.tar.gz') 会得到 ('file.tar', '.gz') | |
| # _, ext = os.path.splitext(filename) | |
| # # 转为小写 | |
| # ext = ext.lower() | |
| # if ext == '': | |
| # ext = '无后缀' | |
| # ext_counter[ext] += 1 | |
| # return ext_counter | |
| # # --- 使用示例 --- | |
| # target_folder = r'/home/weifengsun/tangou1/domain_code/src/workdir/repos_filtered' # 替换为你的目标文件夹路径 | |
| # result = count_extensions_walk(target_folder) | |
| # if result: | |
| # print(f"统计结果 (总文件数: {result.total()}):") | |
| # for ext, count in result.most_common(): | |
| # print(f"{ext}: {count}") | |
| # 统计结果 (总文件数: 3325264): | |
| # .py: 1078183 | |
| # .h: 352528 | |
| # .hpp: 333821 | |
| # .cpp: 259554 | |
| # .md: 218388 | |
| # .java: 180978 | |
| # .c: 160802 | |
| # .m: 149649 | |
| # .r: 124987 | |
| # .sh: 116917 | |
| # .ipynb: 82218 | |
| # .f: 70590 | |
| # .f90: 59562 | |
| # .cc: 36611 | |
| # .rs: 23565 | |
| # .cxx: 22101 | |
| # .hh: 19051 | |
| # .jl: 18052 | |
| # .go: 13904 | |
| # .for: 1847 | |
| # .bash: 1573 | |
| # .markdown: 346 | |
| # .f95: 36 | |
| # 无后缀: 1 | |