Spaces:

KTXStudio
/

SeleniumShearchBot

Running

File size: 4,248 Bytes

ac450cf
 
 
e0dbd75
ac450cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c2e5c2
ac450cf
 
 
1c2e5c2
ac450cf
 
1c2e5c2
 
ac450cf
 
 
 
 
 
 
 
1c2e5c2
 
 
 
ac450cf
1c2e5c2
ac450cf
 
 
 
 
1c2e5c2
ac450cf
 
1c2e5c2
ac450cf
 
 
 
 
 
1c2e5c2
 
e0dbd75
1c2e5c2
 
 
 
ac450cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c2e5c2
ac450cf
 
1c2e5c2
ac450cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c2e5c2
ac450cf
 
 
 
1c2e5c2
 
ac450cf
 
 
 
1c2e5c2
ac450cf
 
 
 
 
1c2e5c2

import gradio as gr
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from deep_translator import GoogleTranslator
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from ddgs import DDGS

# =========================
# ✅ MODEL
# =========================
model_id = "Qwen/Qwen2.5-0.5B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    dtype=torch.float16
)

# =========================
# ✅ SEARCH (4 texte + 1 image)
# =========================
def search_wiki(query):
    text_results = []
    image_url = None

    with DDGS() as ddgs:
        # ✅ 4 résultats texte
        results = list(ddgs.text(query, max_results=2))

        for r in results:
            text_results.append({
                "title": r.get("title"),
                "link": r.get("href"),
                "description": r.get("body")
            })

        # ✅ 1 image (5e résultat)
        images = list(ddgs.images(query, max_results=2))
        if images:
            image_url = images[0].get("image")

    return text_results, image_url

# =========================
# ✅ PIPELINE
# =========================
def run_pipeline(user_query):
    results, img = search_wiki(user_query)

    if not results:
        return "❌ Aucun résultat trouvé.", None

    link = results[0]["link"]

    try:
        options = Options()
        options.add_argument("--headless")
        options.add_argument("--no-sandbox") # Obligatoire pour Docker
        options.add_argument("--disable-dev-shm-usage") # Obligatoire pour Docker

        # Sur HF Spaces, le driver est installé dans /usr/bin/chromedriver
        service = Service("/usr/bin/chromedriver") 

        driver = webdriver.Chrome(service=service,options=options)
        driver.get(link)

        paragraphs = driver.find_elements(By.TAG_NAME, "p")

        translator = GoogleTranslator(source='auto', target='fr')
        texte_total = ""

        for p in paragraphs:
            texte = p.text.strip()
            if texte and len(texte) > 50:
                try:
                    traduction = translator.translate(texte)
                    texte_total += traduction + "\n"
                except:
                    pass

        driver.quit()

        texte_total = texte_total[:6000]

        prompt = (
            "Fais un résumé clair et structuré en français :\n\n"
            + texte_total
        )

        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

        outputs = model.generate(
            **inputs,
            max_new_tokens=300,
            temperature=0.7,
            do_sample=True
        )

        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        return f"🔗 {link}\n\n📄 {response}", img

    except Exception as e:
        return f"❌ Erreur : {str(e)}", None

# =========================
# ✅ STYLE
# =========================
css = """
body { background: #0f1117; color: white; }

.container {
    max-width: 900px;
    margin: auto;
    padding-top: 40px;
}

.title {
    text-align: center;
    font-size: 30px;
    font-weight: bold;
    margin-bottom: 20px;
}

textarea {
    background: #1a1d26 !important;
    color: white !important;
    border-radius: 12px !important;
}

button {
    background: linear-gradient(90deg, #00c6ff, #0072ff) !important;
    border-radius: 12px !important;
}
"""

# =========================
# ✅ UI
# =========================
with gr.Blocks(css=css) as app:

    with gr.Column(elem_classes="container"):
        gr.Markdown("<div class='title'>🚀 KTXStudio AI</div>")

        query = gr.Textbox(
            placeholder="Ex : Ninjago Dragon Rising saison 4"
        )

        btn = gr.Button("⚡ Générer")

        output_text = gr.Textbox(lines=15)
        output_img = gr.Image(label="Image (résultat 5)")

        btn.click(
            run_pipeline,
            inputs=query,
            outputs=[output_text, output_img]
        )

# =========================
# ✅ RUN
# =========================
app.launch(share=True,favicon_path="favicon.png")