| | import json |
| | from sklearn.feature_extraction.text import TfidfVectorizer |
| | from sklearn.metrics.pairwise import cosine_similarity |
| | from transformers import pipeline |
| | import gradio as gr |
| |
|
| | |
| | with open("electricity_corpus.json", "r") as f: |
| | corpus = json.load(f) |
| |
|
| | |
| | vectorizer = TfidfVectorizer() |
| | tfidf_matrix = vectorizer.fit_transform(corpus) |
| |
|
| | |
| | qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad") |
| |
|
| | |
| | def get_top_contexts(question, top_k=3): |
| | question_vec = vectorizer.transform([question]) |
| | similarities = cosine_similarity(question_vec, tfidf_matrix).flatten() |
| | top_indices = similarities.argsort()[-top_k:][::-1] |
| | return [corpus[i] for i in top_indices] |
| |
|
| | |
| | def answer_question(question, top_k=3): |
| | if not question.strip(): |
| | return "Please enter a valid question." |
| |
|
| | contexts = get_top_contexts(question, top_k) |
| | combined_context = " ".join(contexts)[:4096] |
| | result = qa_pipeline(question=question, context=combined_context) |
| | return result["answer"] |
| |
|
| | |
| | iface = gr.Interface( |
| | fn=answer_question, |
| | inputs=gr.Textbox(label="Ask your question about electricity usage..."), |
| | outputs=gr.Textbox(label="Answer"), |
| | title="🔌 Electricity Data Q&A", |
| | description="Ask questions like 'What was the price for residential in Texas in Jan 2001?' or 'Which state had highest revenue in Jan 2001?'", |
| | ) |
| |
|
| | |
| | if __name__ == "__main__": |
| | iface.launch() |
| |
|
| |
|