| """
|
| Repository Structure Scanner
|
| =============================
|
| Scans the entire file structure of a repository and outputs a tree-like
|
| Markdown representation. Handles massive (1GB+) repositories efficiently.
|
|
|
| Usage:
|
| python scan_structure.py [path] [--output FILE] [--ignore PATTERN ...]
|
|
|
| Output:
|
| A Markdown file containing the full directory tree.
|
| """
|
|
|
| import os
|
| import sys
|
| import argparse
|
| import subprocess
|
| from pathlib import Path
|
| from collections import defaultdict
|
|
|
|
|
|
|
|
|
|
|
| DEFAULT_IGNORE = {
|
| ".git",
|
| "__pycache__",
|
| ".venv",
|
| "venv",
|
| "env",
|
| "node_modules",
|
| ".ipynb_checkpoints",
|
| ".mypy_cache",
|
| ".pytest_cache",
|
| ".tox",
|
| ".eggs",
|
| "*.egg-info",
|
| ".DS_Store",
|
| "Thumbs.db",
|
| "desktop.ini",
|
| }
|
|
|
|
|
| def should_ignore(name: str, ignore_set: set) -> bool:
|
| """Return True if *name* matches any pattern in the ignore set."""
|
| if name in ignore_set:
|
| return True
|
| for pattern in ignore_set:
|
| if pattern.startswith("*") and name.endswith(pattern[1:]):
|
| return True
|
| return False
|
|
|
|
|
| def build_tree(root_path: str, ignore_set: set) -> list[str]:
|
| """
|
| Walk *root_path* depth-first and return a list of tree-formatted lines.
|
|
|
| Uses ``os.scandir`` for performance on large filesystems and sorts
|
| entries alphabetically (directories first).
|
| """
|
|
|
| lines: list[str] = []
|
|
|
| def _walk(current: str, prefix: str) -> None:
|
| try:
|
| entries = sorted(
|
| os.scandir(current),
|
| key=lambda e: (not e.is_dir(follow_symlinks=False), e.name.lower()),
|
| )
|
| except PermissionError:
|
| return
|
|
|
|
|
| entries = [e for e in entries if not should_ignore(e.name, ignore_set)]
|
|
|
| for idx, entry in enumerate(entries):
|
| is_last = idx == len(entries) - 1
|
| connector = "βββ " if is_last else "βββ "
|
| suffix = "/" if entry.is_dir(follow_symlinks=False) else ""
|
| lines.append(f"{prefix}{connector}{entry.name}{suffix}")
|
|
|
| if entry.is_dir(follow_symlinks=False):
|
| extension = " " if is_last else "β "
|
| _walk(entry.path, prefix + extension)
|
|
|
| _walk(root_path, "")
|
| return lines
|
|
|
|
|
| def main() -> None:
|
| parser = argparse.ArgumentParser(
|
| description="Scan repository file structure and output a Markdown tree."
|
| )
|
| parser.add_argument(
|
| "path",
|
| nargs="?",
|
| default=".",
|
| help="Root directory to scan (default: current directory).",
|
| )
|
| parser.add_argument(
|
| "--output",
|
| "-o",
|
| default=None,
|
| help="Output Markdown file path (default: STRUCTURE.md in scanned dir).",
|
| )
|
| parser.add_argument(
|
| "--ignore",
|
| nargs="*",
|
| default=None,
|
| help="Extra patterns to ignore (added to built-in defaults).",
|
| )
|
| parser.add_argument(
|
| "--no-default-ignore",
|
| action="store_true",
|
| help="Disable the built-in ignore list (scan everything).",
|
| )
|
| args = parser.parse_args()
|
|
|
| root = os.path.abspath(args.path)
|
| root_name = os.path.basename(root)
|
|
|
|
|
| ignore_set: set = set() if args.no_default_ignore else set(DEFAULT_IGNORE)
|
| if args.ignore:
|
| ignore_set.update(args.ignore)
|
|
|
|
|
| out_path = args.output or os.path.join(root, "STRUCTURE.md")
|
| out_name = os.path.basename(out_path)
|
| ignore_set.add(out_name)
|
|
|
| print(f"Scanning: {root}")
|
| print(f"Ignoring: {', '.join(sorted(ignore_set))}")
|
|
|
| tree_lines = build_tree(root, ignore_set)
|
|
|
|
|
| md_lines = [
|
| f"## Project Structure\n",
|
| f"```text",
|
| f"{root_name}/",
|
| ]
|
| md_lines.extend(tree_lines)
|
| md_lines.append("```\n")
|
|
|
| content = "\n".join(md_lines)
|
|
|
| with open(out_path, "w", encoding="utf-8") as fh:
|
| fh.write(content)
|
|
|
| total_entries = len(tree_lines)
|
| print(f"Done β {total_entries} entries written to {out_path}")
|
|
|
|
|
| if __name__ == "__main__":
|
| main()
|
|
|