Spaces:

sallima
/

mcp_fetch_coding_guidelines

Sleeping

File size: 11,632 Bytes

import os, hashlib, re, base64, requests, gradio as gr
from typing import List, Dict, Optional, Any
import json

GH = "https://api.github.com"
TOKEN = os.getenv("GITHUB_TOKEN")
RULES_REPO = os.getenv("RULES_REPO", "stefanoallima/awesome-cursorrules")
DEFAULT_REF = os.getenv("DEFAULT_REF", "main")

def _hdr(): 
    return {"Authorization": f"Bearer {TOKEN}", "Accept":"application/vnd.github+json"}

def _sha256(b): 
    return hashlib.sha256(b).hexdigest()

def get_readme_content(ref: str = None) -> str:
    """Fetch README content from the repository for context"""
    ref = ref or DEFAULT_REF
    try:
        r = requests.get(f"{GH}/repos/{RULES_REPO}/contents/README.md?ref={ref}", headers=_hdr())
        r.raise_for_status()
        j = r.json()
        raw = base64.b64decode(j["content"]) if j.get("encoding") == "base64" else j["content"].encode()
        return raw.decode("utf-8", "replace")
    except Exception as e:
        return f"Error fetching README: {str(e)}"

def extract_available_technologies(ref: str = None) -> List[str]:
    """Extract all available technologies from the rules directory"""
    ref = ref or DEFAULT_REF
    try:
        r = requests.get(f"{GH}/repos/{RULES_REPO}/git/trees/{ref}?recursive=1", headers=_hdr())
        r.raise_for_status()
        
        technologies = []
        for item in r.json().get("tree", []):
            if item.get("type") == "blob" and item["path"].startswith("rules/"):
                # Extract technology name from directory structure
                path_parts = item["path"].split("/")
                if len(path_parts) >= 2:
                    tech_dir = path_parts[1]
                    # Clean up the directory name to extract technology
                    tech_name = tech_dir.replace("-cursorrules-prompt-file", "").replace("-", " ")
                    if tech_name not in technologies:
                        technologies.append(tech_name)
        
        return sorted(technologies)
    except Exception as e:
        return [f"Error: {str(e)}"]

def semantic_match_technologies(requested_techs: List[str], available_techs: List[str]) -> Dict[str, List[str]]:
    """Use simple semantic matching to find relevant technologies"""
    matches = {}
    
    for requested in requested_techs:
        requested_lower = requested.lower()
        matched_techs = []
        
        for available in available_techs:
            available_lower = available.lower()
            
            # Direct match
            if requested_lower == available_lower:
                matched_techs.append(available)
                continue
            
            # Partial match (contains)
            if requested_lower in available_lower or available_lower in requested_lower:
                matched_techs.append(available)
                continue
            
            # Common technology mappings
            tech_mappings = {
                'python': ['python', 'django', 'fastapi', 'flask'],
                'javascript': ['javascript', 'js', 'node', 'react', 'vue', 'angular'],
                'typescript': ['typescript', 'ts', 'react', 'angular', 'nextjs'],
                'react': ['react', 'nextjs', 'typescript'],
                'vue': ['vue', 'vuejs', 'nuxt'],
                'node': ['node', 'nodejs', 'javascript'],
                'postgres': ['postgres', 'postgresql', 'database'],
                'fastapi': ['fastapi', 'python', 'api'],
                'nextjs': ['nextjs', 'next', 'react', 'typescript']
            }
            
            # Check if requested tech maps to available tech
            if requested_lower in tech_mappings:
                for mapped_tech in tech_mappings[requested_lower]:
                    if mapped_tech in available_lower:
                        matched_techs.append(available)
                        break
        
        matches[requested] = matched_techs
    
    return matches

def list_rules(tech_key: str = None, ref: str = None) -> List[Dict[str, Any]]:
    """List available coding rules with enhanced metadata"""
    ref = ref or DEFAULT_REF
    try:
        r = requests.get(f"{GH}/repos/{RULES_REPO}/git/trees/{ref}?recursive=1", headers=_hdr())
        r.raise_for_status()
        
        rules = []
        for item in r.json().get("tree", []):
            if item.get("type") == "blob" and item["path"].startswith("rules/"):
                path_parts = item["path"].split("/")
                if len(path_parts) >= 2:
                    tech_dir = path_parts[1]
                    tech_name = tech_dir.replace("-cursorrules-prompt-file", "").replace("-", " ")
                    
                    if not tech_key or tech_key.lower() in tech_name.lower():
                        rules.append({
                            "tech_key": tech_name,
                            "directory": tech_dir,
                            "path": item["path"],
                            "repo": RULES_REPO,
                            "commit_sha": ref,
                            "url": f"https://github.com/{RULES_REPO}/tree/{ref}/{item['path']}"
                        })
        
        return rules
    except Exception as e:
        return [{"error": str(e)}]

def fetch_rule_content(tech_directory: str, ref: str = None) -> Dict[str, Any]:
    """Fetch the actual rule content from a technology directory"""
    ref = ref or DEFAULT_REF
    try:
        # Get files in the specific rule directory
        r = requests.get(f"{GH}/repos/{RULES_REPO}/contents/rules/{tech_directory}?ref={ref}", headers=_hdr())
        r.raise_for_status()
        
        files = r.json()
        if not isinstance(files, list):
            files = [files]
        
        # Look for .cursorrules or .md files
        rule_file = None
        for file in files:
            if file["name"].endswith(('.cursorrules', '.md')):
                rule_file = file
                break
        
        if not rule_file:
            return {"error": f"No rule file found in {tech_directory}"}
        
        # Fetch the file content
        content_r = requests.get(rule_file["download_url"])
        content_r.raise_for_status()
        
        return {
            "tech_key": tech_directory.replace("-cursorrules-prompt-file", "").replace("-", " "),
            "filename": rule_file["name"],
            "content": content_r.text,
            "directory": tech_directory,
            "repo": RULES_REPO,
            "commit_sha": ref,
            "sha256": _sha256(content_r.content),
            "url": rule_file["html_url"]
        }
    except Exception as e:
        return {"error": str(e)}

def fetch_rule(tech_key: str, ref: str = None) -> Dict[str, Any]:
    """Fetch rule with semantic matching fallback"""
    ref = ref or DEFAULT_REF
    
    # First try direct match
    rules = list_rules(tech_key=tech_key, ref=ref)
    if rules and "error" not in rules[0]:
        return fetch_rule_content(rules[0]["directory"], ref)
    
    # If no direct match, try semantic matching
    available_techs = extract_available_technologies(ref)
    matches = semantic_match_technologies([tech_key], available_techs)
    
    if tech_key in matches and matches[tech_key]:
        # Return the first match
        best_match = matches[tech_key][0]
        tech_directory = best_match.replace(" ", "-") + "-cursorrules-prompt-file"
        return fetch_rule_content(tech_directory, ref)
    
    return {"error": f"No rule found for '{tech_key}' in {RULES_REPO}@{ref}"}

def get_guidelines_for_stack(tech_stack: List[str], ref: str = None) -> Dict[str, Any]:
    """Get coding guidelines for multiple technologies in a stack"""
    ref = ref or DEFAULT_REF
    
    available_techs = extract_available_technologies(ref)
    matches = semantic_match_technologies(tech_stack, available_techs)
    
    guidelines = {}
    for requested_tech, matched_techs in matches.items():
        guidelines[requested_tech] = []
        for matched_tech in matched_techs[:3]:  # Limit to top 3 matches
            tech_directory = matched_tech.replace(" ", "-") + "-cursorrules-prompt-file"
            rule_content = fetch_rule_content(tech_directory, ref)
            if "error" not in rule_content:
                guidelines[requested_tech].append(rule_content)
    
    return {
        "tech_stack": tech_stack,
        "guidelines": guidelines,
        "available_technologies": available_techs,
        "matches": matches,
        "repo": RULES_REPO,
        "commit_sha": ref
    }

def analyze_project_stack(framework_list: str, ref: str = None) -> Dict[str, Any]:
    """Analyze a project's technology stack and return relevant guidelines"""
    ref = ref or DEFAULT_REF
    
    # Parse the framework list (assume comma-separated or newline-separated)
    techs = []
    for line in framework_list.replace(",", "\n").split("\n"):
        tech = line.strip()
        if tech:
            techs.append(tech)
    
    if not techs:
        return {"error": "No technologies found in the provided list"}
    
    # Get README for context
    readme_content = get_readme_content(ref)
    
    # Get guidelines for the entire stack
    stack_guidelines = get_guidelines_for_stack(techs, ref)
    
    return {
        "project_analysis": {
            "detected_technologies": techs,
            "readme_context": readme_content[:1000] + "..." if len(readme_content) > 1000 else readme_content,
        },
        "guidelines": stack_guidelines,
        "summary": f"Found guidelines for {len([g for g in stack_guidelines['guidelines'].values() if g])} out of {len(techs)} requested technologies"
    }

# Gradio Interface
with gr.Blocks(title="Enhanced MCP Coding Guidelines Server") as demo:
    gr.Markdown("# 🚀 Enhanced MCP Coding Guidelines Server")
    gr.Markdown("Intelligent coding guideline retrieval with semantic matching")
    
    with gr.Tab("Single Technology"):
        with gr.Row():
            tech_input = gr.Textbox(label="Technology", placeholder="e.g., python, react, fastapi")
            ref_input = gr.Textbox(label="Git Reference", value="main", placeholder="main")
        fetch_btn = gr.Button("Fetch Guidelines")
        single_output = gr.JSON(label="Guidelines")
        
        fetch_btn.click(
            fn=fetch_rule,
            inputs=[tech_input, ref_input],
            outputs=single_output
        )
    
    with gr.Tab("Technology Stack"):
        stack_input = gr.Textbox(
            label="Technology Stack", 
            placeholder="python, fastapi, postgres, react, typescript",
            lines=3
        )
        stack_ref_input = gr.Textbox(label="Git Reference", value="main")
        analyze_btn = gr.Button("Analyze Stack")
        stack_output = gr.JSON(label="Stack Analysis")
        
        analyze_btn.click(
            fn=analyze_project_stack,
            inputs=[stack_input, stack_ref_input],
            outputs=stack_output
        )
    
    with gr.Tab("Available Technologies"):
        list_ref_input = gr.Textbox(label="Git Reference", value="main")
        list_btn = gr.Button("List Available Technologies")
        list_output = gr.JSON(label="Available Technologies")
        
        list_btn.click(
            fn=extract_available_technologies,
            inputs=[list_ref_input],
            outputs=list_output
        )
    
    # Register MCP API endpoints
    gr.api(fn=list_rules)
    gr.api(fn=fetch_rule)
    gr.api(fn=get_guidelines_for_stack)
    gr.api(fn=analyze_project_stack)
    gr.api(fn=extract_available_technologies)

if __name__ == "__main__":
    demo.launch(mcp_server=True)