Spaces:
Sleeping
Sleeping
File size: 11,632 Bytes
d62e71f 917b3f5 d62e71f 917b3f5 d62e71f 917b3f5 fe8e34a d62e71f 917b3f5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 | import os, hashlib, re, base64, requests, gradio as gr
from typing import List, Dict, Optional, Any
import json
GH = "https://api.github.com"
TOKEN = os.getenv("GITHUB_TOKEN")
RULES_REPO = os.getenv("RULES_REPO", "stefanoallima/awesome-cursorrules")
DEFAULT_REF = os.getenv("DEFAULT_REF", "main")
def _hdr():
return {"Authorization": f"Bearer {TOKEN}", "Accept":"application/vnd.github+json"}
def _sha256(b):
return hashlib.sha256(b).hexdigest()
def get_readme_content(ref: str = None) -> str:
"""Fetch README content from the repository for context"""
ref = ref or DEFAULT_REF
try:
r = requests.get(f"{GH}/repos/{RULES_REPO}/contents/README.md?ref={ref}", headers=_hdr())
r.raise_for_status()
j = r.json()
raw = base64.b64decode(j["content"]) if j.get("encoding") == "base64" else j["content"].encode()
return raw.decode("utf-8", "replace")
except Exception as e:
return f"Error fetching README: {str(e)}"
def extract_available_technologies(ref: str = None) -> List[str]:
"""Extract all available technologies from the rules directory"""
ref = ref or DEFAULT_REF
try:
r = requests.get(f"{GH}/repos/{RULES_REPO}/git/trees/{ref}?recursive=1", headers=_hdr())
r.raise_for_status()
technologies = []
for item in r.json().get("tree", []):
if item.get("type") == "blob" and item["path"].startswith("rules/"):
# Extract technology name from directory structure
path_parts = item["path"].split("/")
if len(path_parts) >= 2:
tech_dir = path_parts[1]
# Clean up the directory name to extract technology
tech_name = tech_dir.replace("-cursorrules-prompt-file", "").replace("-", " ")
if tech_name not in technologies:
technologies.append(tech_name)
return sorted(technologies)
except Exception as e:
return [f"Error: {str(e)}"]
def semantic_match_technologies(requested_techs: List[str], available_techs: List[str]) -> Dict[str, List[str]]:
"""Use simple semantic matching to find relevant technologies"""
matches = {}
for requested in requested_techs:
requested_lower = requested.lower()
matched_techs = []
for available in available_techs:
available_lower = available.lower()
# Direct match
if requested_lower == available_lower:
matched_techs.append(available)
continue
# Partial match (contains)
if requested_lower in available_lower or available_lower in requested_lower:
matched_techs.append(available)
continue
# Common technology mappings
tech_mappings = {
'python': ['python', 'django', 'fastapi', 'flask'],
'javascript': ['javascript', 'js', 'node', 'react', 'vue', 'angular'],
'typescript': ['typescript', 'ts', 'react', 'angular', 'nextjs'],
'react': ['react', 'nextjs', 'typescript'],
'vue': ['vue', 'vuejs', 'nuxt'],
'node': ['node', 'nodejs', 'javascript'],
'postgres': ['postgres', 'postgresql', 'database'],
'fastapi': ['fastapi', 'python', 'api'],
'nextjs': ['nextjs', 'next', 'react', 'typescript']
}
# Check if requested tech maps to available tech
if requested_lower in tech_mappings:
for mapped_tech in tech_mappings[requested_lower]:
if mapped_tech in available_lower:
matched_techs.append(available)
break
matches[requested] = matched_techs
return matches
def list_rules(tech_key: str = None, ref: str = None) -> List[Dict[str, Any]]:
"""List available coding rules with enhanced metadata"""
ref = ref or DEFAULT_REF
try:
r = requests.get(f"{GH}/repos/{RULES_REPO}/git/trees/{ref}?recursive=1", headers=_hdr())
r.raise_for_status()
rules = []
for item in r.json().get("tree", []):
if item.get("type") == "blob" and item["path"].startswith("rules/"):
path_parts = item["path"].split("/")
if len(path_parts) >= 2:
tech_dir = path_parts[1]
tech_name = tech_dir.replace("-cursorrules-prompt-file", "").replace("-", " ")
if not tech_key or tech_key.lower() in tech_name.lower():
rules.append({
"tech_key": tech_name,
"directory": tech_dir,
"path": item["path"],
"repo": RULES_REPO,
"commit_sha": ref,
"url": f"https://github.com/{RULES_REPO}/tree/{ref}/{item['path']}"
})
return rules
except Exception as e:
return [{"error": str(e)}]
def fetch_rule_content(tech_directory: str, ref: str = None) -> Dict[str, Any]:
"""Fetch the actual rule content from a technology directory"""
ref = ref or DEFAULT_REF
try:
# Get files in the specific rule directory
r = requests.get(f"{GH}/repos/{RULES_REPO}/contents/rules/{tech_directory}?ref={ref}", headers=_hdr())
r.raise_for_status()
files = r.json()
if not isinstance(files, list):
files = [files]
# Look for .cursorrules or .md files
rule_file = None
for file in files:
if file["name"].endswith(('.cursorrules', '.md')):
rule_file = file
break
if not rule_file:
return {"error": f"No rule file found in {tech_directory}"}
# Fetch the file content
content_r = requests.get(rule_file["download_url"])
content_r.raise_for_status()
return {
"tech_key": tech_directory.replace("-cursorrules-prompt-file", "").replace("-", " "),
"filename": rule_file["name"],
"content": content_r.text,
"directory": tech_directory,
"repo": RULES_REPO,
"commit_sha": ref,
"sha256": _sha256(content_r.content),
"url": rule_file["html_url"]
}
except Exception as e:
return {"error": str(e)}
def fetch_rule(tech_key: str, ref: str = None) -> Dict[str, Any]:
"""Fetch rule with semantic matching fallback"""
ref = ref or DEFAULT_REF
# First try direct match
rules = list_rules(tech_key=tech_key, ref=ref)
if rules and "error" not in rules[0]:
return fetch_rule_content(rules[0]["directory"], ref)
# If no direct match, try semantic matching
available_techs = extract_available_technologies(ref)
matches = semantic_match_technologies([tech_key], available_techs)
if tech_key in matches and matches[tech_key]:
# Return the first match
best_match = matches[tech_key][0]
tech_directory = best_match.replace(" ", "-") + "-cursorrules-prompt-file"
return fetch_rule_content(tech_directory, ref)
return {"error": f"No rule found for '{tech_key}' in {RULES_REPO}@{ref}"}
def get_guidelines_for_stack(tech_stack: List[str], ref: str = None) -> Dict[str, Any]:
"""Get coding guidelines for multiple technologies in a stack"""
ref = ref or DEFAULT_REF
available_techs = extract_available_technologies(ref)
matches = semantic_match_technologies(tech_stack, available_techs)
guidelines = {}
for requested_tech, matched_techs in matches.items():
guidelines[requested_tech] = []
for matched_tech in matched_techs[:3]: # Limit to top 3 matches
tech_directory = matched_tech.replace(" ", "-") + "-cursorrules-prompt-file"
rule_content = fetch_rule_content(tech_directory, ref)
if "error" not in rule_content:
guidelines[requested_tech].append(rule_content)
return {
"tech_stack": tech_stack,
"guidelines": guidelines,
"available_technologies": available_techs,
"matches": matches,
"repo": RULES_REPO,
"commit_sha": ref
}
def analyze_project_stack(framework_list: str, ref: str = None) -> Dict[str, Any]:
"""Analyze a project's technology stack and return relevant guidelines"""
ref = ref or DEFAULT_REF
# Parse the framework list (assume comma-separated or newline-separated)
techs = []
for line in framework_list.replace(",", "\n").split("\n"):
tech = line.strip()
if tech:
techs.append(tech)
if not techs:
return {"error": "No technologies found in the provided list"}
# Get README for context
readme_content = get_readme_content(ref)
# Get guidelines for the entire stack
stack_guidelines = get_guidelines_for_stack(techs, ref)
return {
"project_analysis": {
"detected_technologies": techs,
"readme_context": readme_content[:1000] + "..." if len(readme_content) > 1000 else readme_content,
},
"guidelines": stack_guidelines,
"summary": f"Found guidelines for {len([g for g in stack_guidelines['guidelines'].values() if g])} out of {len(techs)} requested technologies"
}
# Gradio Interface
with gr.Blocks(title="Enhanced MCP Coding Guidelines Server") as demo:
gr.Markdown("# 🚀 Enhanced MCP Coding Guidelines Server")
gr.Markdown("Intelligent coding guideline retrieval with semantic matching")
with gr.Tab("Single Technology"):
with gr.Row():
tech_input = gr.Textbox(label="Technology", placeholder="e.g., python, react, fastapi")
ref_input = gr.Textbox(label="Git Reference", value="main", placeholder="main")
fetch_btn = gr.Button("Fetch Guidelines")
single_output = gr.JSON(label="Guidelines")
fetch_btn.click(
fn=fetch_rule,
inputs=[tech_input, ref_input],
outputs=single_output
)
with gr.Tab("Technology Stack"):
stack_input = gr.Textbox(
label="Technology Stack",
placeholder="python, fastapi, postgres, react, typescript",
lines=3
)
stack_ref_input = gr.Textbox(label="Git Reference", value="main")
analyze_btn = gr.Button("Analyze Stack")
stack_output = gr.JSON(label="Stack Analysis")
analyze_btn.click(
fn=analyze_project_stack,
inputs=[stack_input, stack_ref_input],
outputs=stack_output
)
with gr.Tab("Available Technologies"):
list_ref_input = gr.Textbox(label="Git Reference", value="main")
list_btn = gr.Button("List Available Technologies")
list_output = gr.JSON(label="Available Technologies")
list_btn.click(
fn=extract_available_technologies,
inputs=[list_ref_input],
outputs=list_output
)
# Register MCP API endpoints
gr.api(fn=list_rules)
gr.api(fn=fetch_rule)
gr.api(fn=get_guidelines_for_stack)
gr.api(fn=analyze_project_stack)
gr.api(fn=extract_available_technologies)
if __name__ == "__main__":
demo.launch(mcp_server=True) |