| | |
| | |
| |
|
| | """ |
| | Language Detector |
| | |
| | This module provides functionality for detecting programming languages in a repository. |
| | """ |
| |
|
| | import os |
| | import logging |
| | from collections import Counter |
| |
|
| | logger = logging.getLogger(__name__) |
| |
|
| | |
| | EXTENSION_TO_LANGUAGE = { |
| | '.py': 'Python', |
| | '.js': 'JavaScript', |
| | '.jsx': 'JavaScript', |
| | '.ts': 'TypeScript', |
| | '.tsx': 'TypeScript', |
| | '.java': 'Java', |
| | '.go': 'Go', |
| | '.rs': 'Rust', |
| | '.cpp': 'C++', |
| | '.cc': 'C++', |
| | '.cxx': 'C++', |
| | '.c': 'C', |
| | '.h': 'C', |
| | '.hpp': 'C++', |
| | '.cs': 'C#', |
| | '.php': 'PHP', |
| | '.rb': 'Ruby', |
| | '.swift': 'Swift', |
| | '.kt': 'Kotlin', |
| | '.scala': 'Scala', |
| | '.r': 'R', |
| | '.sh': 'Shell', |
| | '.bash': 'Shell', |
| | '.zsh': 'Shell', |
| | '.html': 'HTML', |
| | '.htm': 'HTML', |
| | '.css': 'CSS', |
| | '.scss': 'SCSS', |
| | '.sass': 'SCSS', |
| | '.less': 'Less', |
| | '.md': 'Markdown', |
| | '.json': 'JSON', |
| | '.xml': 'XML', |
| | '.yaml': 'YAML', |
| | '.yml': 'YAML', |
| | '.sql': 'SQL', |
| | '.graphql': 'GraphQL', |
| | '.gql': 'GraphQL', |
| | } |
| |
|
| | |
| | SPECIAL_FILES_TO_LANGUAGE = { |
| | 'Dockerfile': 'Docker', |
| | 'docker-compose.yml': 'Docker', |
| | 'docker-compose.yaml': 'Docker', |
| | 'Makefile': 'Make', |
| | 'CMakeLists.txt': 'CMake', |
| | 'package.json': 'JavaScript', |
| | 'tsconfig.json': 'TypeScript', |
| | 'requirements.txt': 'Python', |
| | 'setup.py': 'Python', |
| | 'pom.xml': 'Java', |
| | 'build.gradle': 'Java', |
| | 'Cargo.toml': 'Rust', |
| | 'go.mod': 'Go', |
| | } |
| |
|
| |
|
| | class LanguageDetector: |
| | """ |
| | Detects programming languages in a repository. |
| | """ |
| | |
| | def __init__(self): |
| | """ |
| | Initialize the LanguageDetector. |
| | """ |
| | logger.info("Initialized LanguageDetector") |
| | |
| | def detect_languages(self, repo_path): |
| | """ |
| | Detect programming languages in a repository. |
| | |
| | Args: |
| | repo_path (str): The path to the repository. |
| | |
| | Returns: |
| | list: A list of detected programming languages, sorted by prevalence. |
| | """ |
| | logger.info(f"Detecting languages in repository: {repo_path}") |
| | |
| | language_counter = Counter() |
| | |
| | for root, dirs, files in os.walk(repo_path): |
| | |
| | dirs[:] = [d for d in dirs if not d.startswith('.') and |
| | d not in ['node_modules', 'venv', '.git', '__pycache__', 'dist', 'build']] |
| | |
| | for file in files: |
| | file_path = os.path.join(root, file) |
| | |
| | |
| | if file in SPECIAL_FILES_TO_LANGUAGE: |
| | language = SPECIAL_FILES_TO_LANGUAGE[file] |
| | language_counter[language] += 1 |
| | continue |
| | |
| | |
| | _, ext = os.path.splitext(file) |
| | if ext in EXTENSION_TO_LANGUAGE: |
| | language = EXTENSION_TO_LANGUAGE[ext] |
| | language_counter[language] += 1 |
| | |
| | |
| | supported_languages = [ |
| | "Python", "JavaScript", "TypeScript", "Java", |
| | "Go", "Rust", "C++", "C#", "PHP", "Ruby", |
| | "Swift", "Kotlin", "Scala", "R", "Shell" |
| | ] |
| | |
| | detected_languages = [lang for lang, _ in language_counter.most_common() |
| | if lang in supported_languages] |
| | |
| | logger.info(f"Detected languages: {detected_languages}") |
| | return detected_languages |
| | |
| | def get_language_breakdown(self, repo_path): |
| | """ |
| | Get a breakdown of programming languages in a repository by lines of code. |
| | |
| | Args: |
| | repo_path (str): The path to the repository. |
| | |
| | Returns: |
| | dict: A dictionary mapping languages to lines of code. |
| | """ |
| | logger.info(f"Getting language breakdown for repository: {repo_path}") |
| | |
| | language_loc = {} |
| | |
| | for root, dirs, files in os.walk(repo_path): |
| | |
| | dirs[:] = [d for d in dirs if not d.startswith('.') and |
| | d not in ['node_modules', 'venv', '.git', '__pycache__', 'dist', 'build']] |
| | |
| | for file in files: |
| | file_path = os.path.join(root, file) |
| | |
| | |
| | language = None |
| | |
| | |
| | if file in SPECIAL_FILES_TO_LANGUAGE: |
| | language = SPECIAL_FILES_TO_LANGUAGE[file] |
| | else: |
| | |
| | _, ext = os.path.splitext(file) |
| | if ext in EXTENSION_TO_LANGUAGE: |
| | language = EXTENSION_TO_LANGUAGE[ext] |
| | |
| | if language: |
| | |
| | try: |
| | with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: |
| | line_count = sum(1 for _ in f) |
| | |
| | if language in language_loc: |
| | language_loc[language] += line_count |
| | else: |
| | language_loc[language] = line_count |
| | except Exception as e: |
| | logger.warning(f"Error counting lines in {file_path}: {e}") |
| | |
| | logger.info(f"Language breakdown: {language_loc}") |
| | return language_loc |