import gradio as gr from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options from deep_translator import GoogleTranslator from transformers import AutoTokenizer, AutoModelForCausalLM import torch from ddgs import DDGS # ========================= # ✅ MODEL # ========================= model_id = "Qwen/Qwen2.5-0.5B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto", dtype=torch.float16 ) # ========================= # ✅ SEARCH (4 texte + 1 image) # ========================= def search_wiki(query): text_results = [] image_url = None with DDGS() as ddgs: # ✅ 4 résultats texte results = list(ddgs.text(query, max_results=2)) for r in results: text_results.append({ "title": r.get("title"), "link": r.get("href"), "description": r.get("body") }) # ✅ 1 image (5e résultat) images = list(ddgs.images(query, max_results=2)) if images: image_url = images[0].get("image") return text_results, image_url # ========================= # ✅ PIPELINE # ========================= def run_pipeline(user_query): results, img = search_wiki(user_query) if not results: return "❌ Aucun résultat trouvé.", None link = results[0]["link"] try: options = Options() options.add_argument("--headless") options.add_argument("--no-sandbox") # Obligatoire pour Docker options.add_argument("--disable-dev-shm-usage") # Obligatoire pour Docker # Sur HF Spaces, le driver est installé dans /usr/bin/chromedriver service = Service("/usr/bin/chromedriver") driver = webdriver.Chrome(service=service,options=options) driver.get(link) paragraphs = driver.find_elements(By.TAG_NAME, "p") translator = GoogleTranslator(source='auto', target='fr') texte_total = "" for p in paragraphs: texte = p.text.strip() if texte and len(texte) > 50: try: traduction = translator.translate(texte) texte_total += traduction + "\n" except: pass driver.quit() texte_total = texte_total[:6000] prompt = ( "Fais un résumé clair et structuré en français :\n\n" + texte_total ) inputs = tokenizer(prompt, return_tensors="pt").to(model.device) outputs = model.generate( **inputs, max_new_tokens=300, temperature=0.7, do_sample=True ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return f"🔗 {link}\n\n📄 {response}", img except Exception as e: return f"❌ Erreur : {str(e)}", None # ========================= # ✅ STYLE # ========================= css = """ body { background: #0f1117; color: white; } .container { max-width: 900px; margin: auto; padding-top: 40px; } .title { text-align: center; font-size: 30px; font-weight: bold; margin-bottom: 20px; } textarea { background: #1a1d26 !important; color: white !important; border-radius: 12px !important; } button { background: linear-gradient(90deg, #00c6ff, #0072ff) !important; border-radius: 12px !important; } """ # ========================= # ✅ UI # ========================= with gr.Blocks(css=css) as app: with gr.Column(elem_classes="container"): gr.Markdown("