import gradio as gr
from bertopic import BERTopic
from sentence_transformers import SentenceTransformer
import os
import pandas as pd

def run_from_textfile(file):
    if file is None:
        return "Please upload a .txt file.", "", "", None
    
    # ---- Handle file input ----
    text = ""
    
    if hasattr(file, 'decode'):
        try:
            text = file.decode("utf-8")
        except Exception as e:
            return f"Error decoding NamedString: {e}", "", "", None
    
    elif hasattr(file, 'read'):
        try:
            text = file.read().decode("utf-8")
        except Exception as e:
            return f"Error reading/decoding file object: {e}", "", "", None
    
    elif isinstance(file, str) and os.path.exists(file):
        try:
            with open(file, 'r', encoding='utf-8') as f:
                text = f.read()
        except Exception as e:
            return f"Error reading file from path: {e}", "", "", None
    
    if not text:
         return "Could not read the file content. Please check the file type and content.", "", "", None
    
    # Split the text into documents (one per line)
    docs = [line.strip() for line in text.split("\n") if line.strip()]
    if len(docs) < 3:
        return "Need at least 3 documents (one per line).", "", "", None
    
    # ---- Embedding Model ----
    embedder = SentenceTransformer("all-MiniLM-L6-v2")
    
    # ---- Topic Modeling ----
    topic_model = BERTopic(embedding_model=embedder)
    topics, probs = topic_model.fit_transform(docs)
    
    # ---- Topic Summary ----
    topic_info = topic_model.get_topic_info().to_string(index=False)
    
    # ---- TOPIC WEIGHTS (Word Importance per Topic) ----
    weights_output = "=" * 80 + "\n"
    weights_output += "TOPIC WEIGHTS (Word Importance Scores)\n"
    weights_output += "=" * 80 + "\n\n"
    
    # Get all topics except outlier topic (-1)
    all_topics = [t for t in topic_model.get_topics().keys() if t != -1]
    
    for topic_id in all_topics:
        weights_output += f"TOPIC {topic_id}\n"
        weights_output += "-" * 40 + "\n"
        
        # Get top words and their weights for this topic
        topic_words = topic_model.get_topic(topic_id)
        
        if topic_words:
            for word, weight in topic_words[:10]:  # Top 10 words
                weights_output += f"  {word:20s} {weight:8.4f}\n"
        
        weights_output += "\n"
    
    # ---- Document → Topic Assignments ----
    assignments = "\n".join([f"Doc {i+1}: Topic {topics[i]}" for i in range(len(docs))])
    
    # ---- Visualization ----
    fig = topic_model.visualize_barchart(top_n_topics=10)
    
    return topic_info, weights_output, assignments, fig

# ---- Gradio Interface ----
with gr.Blocks() as demo:
    gr.Markdown("# 🧠 Topic Modeling from TXT File (BERTopic)")
    gr.Markdown(
        "Upload a plain text (.txt) file. Each line should contain **one LLM response**.\n"
        "\nExample format:\n```\nResponse 1...\nResponse 2...\nResponse 3...\n```"
    )
    
    file_input = gr.File(label="Upload .txt file") 
    
    run_button = gr.Button("Run Topic Modeling")
    
    topic_output = gr.Textbox(label="Topic Overview", lines=12)
    weights_output = gr.Textbox(label="📊 Topic Weights (Word Importance)", lines=20)
    assignment_output = gr.Textbox(label="Document → Topic Assignments", lines=12)
    fig_output = gr.Plot(label="Topic Visualization")
    
    run_button.click(
        fn=run_from_textfile,
        inputs=file_input,
        outputs=[topic_output, weights_output, assignment_output, fig_output]
    )

# Launch app
demo.launch()