Spaces:
Sleeping
Sleeping
| import logging | |
| from typing import List, Dict, Any, Set | |
| from src.recommendation_engine.context_builder import build_project_context | |
| from src.recommendation_engine.prompt_builder import build_feature_prompt | |
| from src.recommendation_engine.llm_client import generate_text | |
| from src.recommendation_engine.validator import validate_generated_list | |
| from src.recommendation_engine.novelty_checker import is_feature_novel | |
| from src.similarity_model import compare_two_ideas | |
| from src.recommendation_engine.config import ( | |
| DEFAULT_FEATURE_COUNT, | |
| GENERATION_BATCH_SIZE | |
| ) | |
| logger = logging.getLogger(__name__) | |
| MAX_RETRIES = 5 | |
| SIMILARITY_THRESHOLD_LOCAL = 0.82 | |
| def normalize(text: str) -> str: | |
| return " ".join(str(text).strip().lower().split()) | |
| GENERIC_PATTERNS = [ | |
| "dashboard", | |
| "login", | |
| "signup", | |
| "authentication", | |
| "analytics module", | |
| "ai module", | |
| "admin panel", | |
| "settings page", | |
| "reports system", | |
| "user management" | |
| ] | |
| def is_generic_feature(text: str) -> bool: | |
| low = normalize(text) | |
| if len(low.split()) < 2: | |
| return True | |
| for bad in GENERIC_PATTERNS: | |
| if bad in low: | |
| return True | |
| return False | |
| def clean_features(features: List[str]) -> List[str]: | |
| final = [] | |
| for f in features: | |
| clean = str(f).strip() | |
| if not clean: | |
| continue | |
| words = clean.split() | |
| if len(words) < 3 or len(words) > 10: | |
| continue | |
| if is_generic_feature(clean): | |
| continue | |
| final.append(clean) | |
| return final | |
| def is_duplicate_local(feature: str, existing: List[str]) -> bool: | |
| for old in existing: | |
| score = compare_two_ideas(feature, old) | |
| if score >= SIMILARITY_THRESHOLD_LOCAL: | |
| logger.info(f"[LOCAL DUPLICATE] {feature} ~ {old} ({score:.2f})") | |
| return True | |
| return False | |
| def fallback_features(title: str) -> List[str]: | |
| title = (title or "").lower() | |
| if any(k in title for k in ["health", "hospital", "medical", "clinic"]): | |
| return [ | |
| "Real-time patient monitoring", | |
| "Emergency alert notification system", | |
| "AI-assisted diagnosis support", | |
| "Medical data visualization dashboard", | |
| "Predictive patient risk analysis" | |
| ] | |
| if any(k in title for k in ["education", "learning", "student", "school"]): | |
| return [ | |
| "Adaptive learning recommendation engine", | |
| "Student performance prediction system", | |
| "Automated assignment evaluation", | |
| "Gamified engagement tracking", | |
| "Personalized study path generation" | |
| ] | |
| if any(k in title for k in ["security", "cyber", "threat"]): | |
| return [ | |
| "Real-time threat detection engine", | |
| "Behavior anomaly monitoring", | |
| "Automated attack alert system", | |
| "Security event visualization", | |
| "Risk prediction analytics" | |
| ] | |
| return [ | |
| "Real-time intelligent monitoring", | |
| "Predictive analytics engine", | |
| "Smart recommendation system", | |
| "Automated decision support", | |
| "Dynamic performance optimization" | |
| ] | |
| def generate_features( | |
| title: str, | |
| description: str, | |
| abstract: str = "", | |
| features: List[str] = None, | |
| previous_generated_features: List[str] = None, | |
| top_k: int = DEFAULT_FEATURE_COUNT | |
| ) -> Dict[str, Any]: | |
| features = features or [] | |
| previous_generated_features = previous_generated_features or [] | |
| top_k = max(1, min(top_k, 20)) | |
| logger.info(f"Starting feature generation | title={title}") | |
| context = build_project_context( | |
| title=title, | |
| description=description, | |
| abstract=abstract, | |
| features=features | |
| ) | |
| final_features: List[str] = [] | |
| final_norm_set: Set[str] = set() | |
| existing_features = context.get("features", []) | |
| existing_norm = set( | |
| normalize(f) | |
| for f in existing_features | |
| ) | |
| previous_norm = set( | |
| normalize(f) | |
| for f in previous_generated_features | |
| ) | |
| attempts = 0 | |
| while len(final_features) < top_k and attempts < MAX_RETRIES: | |
| attempts += 1 | |
| logger.info(f"Generation attempt #{attempts}") | |
| generation_count = max( | |
| top_k * 4, | |
| GENERATION_BATCH_SIZE | |
| ) | |
| prompt = build_feature_prompt( | |
| context=context, | |
| count=generation_count, | |
| previous_features=previous_generated_features | |
| ) | |
| raw_text = generate_text( | |
| prompt, | |
| task="feature" | |
| ) | |
| if not raw_text: | |
| logger.warning("Empty feature response") | |
| continue | |
| generated = validate_generated_list( | |
| text=raw_text, | |
| top_k=generation_count | |
| ) | |
| generated = clean_features(generated) | |
| logger.info(f"Generated {len(generated)} candidate features") | |
| for feat in generated: | |
| norm = normalize(feat) | |
| if not norm: | |
| continue | |
| if ( | |
| norm in final_norm_set | |
| or norm in existing_norm | |
| or norm in previous_norm | |
| ): | |
| continue | |
| if not is_feature_novel(feat, existing_features): | |
| continue | |
| if is_duplicate_local(feat, final_features): | |
| continue | |
| final_features.append(feat) | |
| final_norm_set.add(norm) | |
| logger.info(f"[NEW FEATURE] {feat}") | |
| if len(final_features) >= top_k: | |
| break | |
| if len(final_features) < top_k: | |
| logger.warning("Using fallback features") | |
| fallback = fallback_features(title) | |
| for feat in fallback: | |
| norm = normalize(feat) | |
| if ( | |
| norm not in final_norm_set | |
| and norm not in existing_norm | |
| ): | |
| final_features.append(feat) | |
| final_norm_set.add(norm) | |
| if len(final_features) >= top_k: | |
| break | |
| logger.info(f"Final generated features: {final_features}") | |
| return { | |
| "project_title": context.get("project_title", title), | |
| "current_features": existing_features, | |
| "recommended_features": final_features, | |
| "originality_score": context.get("originality_score", 1.0), | |
| "similar_projects": context.get("similar_titles", []) | |
| } | |