File size: 11,632 Bytes
d62e71f
917b3f5
 
 
d62e71f
 
917b3f5
d62e71f
 
917b3f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe8e34a
 
 
 
 
d62e71f
 
917b3f5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
import os, hashlib, re, base64, requests, gradio as gr
from typing import List, Dict, Optional, Any
import json

GH = "https://api.github.com"
TOKEN = os.getenv("GITHUB_TOKEN")
RULES_REPO = os.getenv("RULES_REPO", "stefanoallima/awesome-cursorrules")
DEFAULT_REF = os.getenv("DEFAULT_REF", "main")

def _hdr(): 
    return {"Authorization": f"Bearer {TOKEN}", "Accept":"application/vnd.github+json"}

def _sha256(b): 
    return hashlib.sha256(b).hexdigest()

def get_readme_content(ref: str = None) -> str:
    """Fetch README content from the repository for context"""
    ref = ref or DEFAULT_REF
    try:
        r = requests.get(f"{GH}/repos/{RULES_REPO}/contents/README.md?ref={ref}", headers=_hdr())
        r.raise_for_status()
        j = r.json()
        raw = base64.b64decode(j["content"]) if j.get("encoding") == "base64" else j["content"].encode()
        return raw.decode("utf-8", "replace")
    except Exception as e:
        return f"Error fetching README: {str(e)}"

def extract_available_technologies(ref: str = None) -> List[str]:
    """Extract all available technologies from the rules directory"""
    ref = ref or DEFAULT_REF
    try:
        r = requests.get(f"{GH}/repos/{RULES_REPO}/git/trees/{ref}?recursive=1", headers=_hdr())
        r.raise_for_status()
        
        technologies = []
        for item in r.json().get("tree", []):
            if item.get("type") == "blob" and item["path"].startswith("rules/"):
                # Extract technology name from directory structure
                path_parts = item["path"].split("/")
                if len(path_parts) >= 2:
                    tech_dir = path_parts[1]
                    # Clean up the directory name to extract technology
                    tech_name = tech_dir.replace("-cursorrules-prompt-file", "").replace("-", " ")
                    if tech_name not in technologies:
                        technologies.append(tech_name)
        
        return sorted(technologies)
    except Exception as e:
        return [f"Error: {str(e)}"]

def semantic_match_technologies(requested_techs: List[str], available_techs: List[str]) -> Dict[str, List[str]]:
    """Use simple semantic matching to find relevant technologies"""
    matches = {}
    
    for requested in requested_techs:
        requested_lower = requested.lower()
        matched_techs = []
        
        for available in available_techs:
            available_lower = available.lower()
            
            # Direct match
            if requested_lower == available_lower:
                matched_techs.append(available)
                continue
            
            # Partial match (contains)
            if requested_lower in available_lower or available_lower in requested_lower:
                matched_techs.append(available)
                continue
            
            # Common technology mappings
            tech_mappings = {
                'python': ['python', 'django', 'fastapi', 'flask'],
                'javascript': ['javascript', 'js', 'node', 'react', 'vue', 'angular'],
                'typescript': ['typescript', 'ts', 'react', 'angular', 'nextjs'],
                'react': ['react', 'nextjs', 'typescript'],
                'vue': ['vue', 'vuejs', 'nuxt'],
                'node': ['node', 'nodejs', 'javascript'],
                'postgres': ['postgres', 'postgresql', 'database'],
                'fastapi': ['fastapi', 'python', 'api'],
                'nextjs': ['nextjs', 'next', 'react', 'typescript']
            }
            
            # Check if requested tech maps to available tech
            if requested_lower in tech_mappings:
                for mapped_tech in tech_mappings[requested_lower]:
                    if mapped_tech in available_lower:
                        matched_techs.append(available)
                        break
        
        matches[requested] = matched_techs
    
    return matches

def list_rules(tech_key: str = None, ref: str = None) -> List[Dict[str, Any]]:
    """List available coding rules with enhanced metadata"""
    ref = ref or DEFAULT_REF
    try:
        r = requests.get(f"{GH}/repos/{RULES_REPO}/git/trees/{ref}?recursive=1", headers=_hdr())
        r.raise_for_status()
        
        rules = []
        for item in r.json().get("tree", []):
            if item.get("type") == "blob" and item["path"].startswith("rules/"):
                path_parts = item["path"].split("/")
                if len(path_parts) >= 2:
                    tech_dir = path_parts[1]
                    tech_name = tech_dir.replace("-cursorrules-prompt-file", "").replace("-", " ")
                    
                    if not tech_key or tech_key.lower() in tech_name.lower():
                        rules.append({
                            "tech_key": tech_name,
                            "directory": tech_dir,
                            "path": item["path"],
                            "repo": RULES_REPO,
                            "commit_sha": ref,
                            "url": f"https://github.com/{RULES_REPO}/tree/{ref}/{item['path']}"
                        })
        
        return rules
    except Exception as e:
        return [{"error": str(e)}]

def fetch_rule_content(tech_directory: str, ref: str = None) -> Dict[str, Any]:
    """Fetch the actual rule content from a technology directory"""
    ref = ref or DEFAULT_REF
    try:
        # Get files in the specific rule directory
        r = requests.get(f"{GH}/repos/{RULES_REPO}/contents/rules/{tech_directory}?ref={ref}", headers=_hdr())
        r.raise_for_status()
        
        files = r.json()
        if not isinstance(files, list):
            files = [files]
        
        # Look for .cursorrules or .md files
        rule_file = None
        for file in files:
            if file["name"].endswith(('.cursorrules', '.md')):
                rule_file = file
                break
        
        if not rule_file:
            return {"error": f"No rule file found in {tech_directory}"}
        
        # Fetch the file content
        content_r = requests.get(rule_file["download_url"])
        content_r.raise_for_status()
        
        return {
            "tech_key": tech_directory.replace("-cursorrules-prompt-file", "").replace("-", " "),
            "filename": rule_file["name"],
            "content": content_r.text,
            "directory": tech_directory,
            "repo": RULES_REPO,
            "commit_sha": ref,
            "sha256": _sha256(content_r.content),
            "url": rule_file["html_url"]
        }
    except Exception as e:
        return {"error": str(e)}

def fetch_rule(tech_key: str, ref: str = None) -> Dict[str, Any]:
    """Fetch rule with semantic matching fallback"""
    ref = ref or DEFAULT_REF
    
    # First try direct match
    rules = list_rules(tech_key=tech_key, ref=ref)
    if rules and "error" not in rules[0]:
        return fetch_rule_content(rules[0]["directory"], ref)
    
    # If no direct match, try semantic matching
    available_techs = extract_available_technologies(ref)
    matches = semantic_match_technologies([tech_key], available_techs)
    
    if tech_key in matches and matches[tech_key]:
        # Return the first match
        best_match = matches[tech_key][0]
        tech_directory = best_match.replace(" ", "-") + "-cursorrules-prompt-file"
        return fetch_rule_content(tech_directory, ref)
    
    return {"error": f"No rule found for '{tech_key}' in {RULES_REPO}@{ref}"}

def get_guidelines_for_stack(tech_stack: List[str], ref: str = None) -> Dict[str, Any]:
    """Get coding guidelines for multiple technologies in a stack"""
    ref = ref or DEFAULT_REF
    
    available_techs = extract_available_technologies(ref)
    matches = semantic_match_technologies(tech_stack, available_techs)
    
    guidelines = {}
    for requested_tech, matched_techs in matches.items():
        guidelines[requested_tech] = []
        for matched_tech in matched_techs[:3]:  # Limit to top 3 matches
            tech_directory = matched_tech.replace(" ", "-") + "-cursorrules-prompt-file"
            rule_content = fetch_rule_content(tech_directory, ref)
            if "error" not in rule_content:
                guidelines[requested_tech].append(rule_content)
    
    return {
        "tech_stack": tech_stack,
        "guidelines": guidelines,
        "available_technologies": available_techs,
        "matches": matches,
        "repo": RULES_REPO,
        "commit_sha": ref
    }

def analyze_project_stack(framework_list: str, ref: str = None) -> Dict[str, Any]:
    """Analyze a project's technology stack and return relevant guidelines"""
    ref = ref or DEFAULT_REF
    
    # Parse the framework list (assume comma-separated or newline-separated)
    techs = []
    for line in framework_list.replace(",", "\n").split("\n"):
        tech = line.strip()
        if tech:
            techs.append(tech)
    
    if not techs:
        return {"error": "No technologies found in the provided list"}
    
    # Get README for context
    readme_content = get_readme_content(ref)
    
    # Get guidelines for the entire stack
    stack_guidelines = get_guidelines_for_stack(techs, ref)
    
    return {
        "project_analysis": {
            "detected_technologies": techs,
            "readme_context": readme_content[:1000] + "..." if len(readme_content) > 1000 else readme_content,
        },
        "guidelines": stack_guidelines,
        "summary": f"Found guidelines for {len([g for g in stack_guidelines['guidelines'].values() if g])} out of {len(techs)} requested technologies"
    }

# Gradio Interface
with gr.Blocks(title="Enhanced MCP Coding Guidelines Server") as demo:
    gr.Markdown("# 🚀 Enhanced MCP Coding Guidelines Server")
    gr.Markdown("Intelligent coding guideline retrieval with semantic matching")
    
    with gr.Tab("Single Technology"):
        with gr.Row():
            tech_input = gr.Textbox(label="Technology", placeholder="e.g., python, react, fastapi")
            ref_input = gr.Textbox(label="Git Reference", value="main", placeholder="main")
        fetch_btn = gr.Button("Fetch Guidelines")
        single_output = gr.JSON(label="Guidelines")
        
        fetch_btn.click(
            fn=fetch_rule,
            inputs=[tech_input, ref_input],
            outputs=single_output
        )
    
    with gr.Tab("Technology Stack"):
        stack_input = gr.Textbox(
            label="Technology Stack", 
            placeholder="python, fastapi, postgres, react, typescript",
            lines=3
        )
        stack_ref_input = gr.Textbox(label="Git Reference", value="main")
        analyze_btn = gr.Button("Analyze Stack")
        stack_output = gr.JSON(label="Stack Analysis")
        
        analyze_btn.click(
            fn=analyze_project_stack,
            inputs=[stack_input, stack_ref_input],
            outputs=stack_output
        )
    
    with gr.Tab("Available Technologies"):
        list_ref_input = gr.Textbox(label="Git Reference", value="main")
        list_btn = gr.Button("List Available Technologies")
        list_output = gr.JSON(label="Available Technologies")
        
        list_btn.click(
            fn=extract_available_technologies,
            inputs=[list_ref_input],
            outputs=list_output
        )
    
    # Register MCP API endpoints
    gr.api(fn=list_rules)
    gr.api(fn=fetch_rule)
    gr.api(fn=get_guidelines_for_stack)
    gr.api(fn=analyze_project_stack)
    gr.api(fn=extract_available_technologies)

if __name__ == "__main__":
    demo.launch(mcp_server=True)