# app.py - Main Streamlit Application import streamlit as st import os import json import hashlib import time from datetime import datetime from pathlib import Path import pandas as pd import plotly.graph_objects as go import plotly.express as px from typing import List, Dict, Optional, Tuple import uuid # Import custom modules from version_rag import VersionRAG, BaselineRAG from graph_manager import GraphManager from evaluation import Evaluator, VersionQADataset from utils import DocumentProcessor, ChangeDetector, PersistentStorage # Page configuration st.set_page_config( page_title="VersionRAG - Version-Aware RAG System", page_icon="📚", layout="wide", initial_sidebar_state="expanded" ) # Initialize session state def init_session_state(): if 'user_id' not in st.session_state: st.session_state.user_id = str(uuid.uuid4()) if 'version_rag' not in st.session_state: st.session_state.version_rag = None if 'baseline_rag' not in st.session_state: st.session_state.baseline_rag = None if 'graph_manager' not in st.session_state: st.session_state.graph_manager = None if 'uploaded_files' not in st.session_state: st.session_state.uploaded_files = {} if 'chat_history' not in st.session_state: st.session_state.chat_history = [] if 'evaluation_results' not in st.session_state: st.session_state.evaluation_results = None if 'feedback_data' not in st.session_state: st.session_state.feedback_data = [] if 'persistent_storage' not in st.session_state: st.session_state.persistent_storage = None init_session_state() # Custom CSS st.markdown(""" """, unsafe_allow_html=True) # Sidebar with st.sidebar: st.markdown("### 🔐 User Session") st.info(f"User ID: {st.session_state.user_id[:8]}...") st.markdown("### ⚙️ Settings") # API Key input api_key = st.text_input("OpenAI API Key", type="password", value=os.getenv("OPENAI_API_KEY", "")) if api_key: os.environ["OPENAI_API_KEY"] = api_key # Model selection model_name = st.selectbox( "LLM Model", ["gpt-3.5-turbo", "gpt-4", "gpt-4-turbo-preview"], index=0 ) # Embedding model embedding_model = st.selectbox( "Embedding Model", ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"], # ✅ CORRECT index=0 ) # Retrieval parameters st.markdown("### 🎯 Retrieval Parameters") top_k = st.slider("Top K Results", 1, 10, 5) similarity_threshold = st.slider("Similarity Threshold", 0.0, 1.0, 0.7) # Initialize systems button if st.button("🚀 Initialize Systems", type="primary"): with st.spinner("Initializing VersionRAG and Baseline systems..."): try: st.session_state.version_rag = VersionRAG( user_id=st.session_state.user_id, model_name=model_name, embedding_model=embedding_model ) st.session_state.baseline_rag = BaselineRAG( user_id=st.session_state.user_id, model_name=model_name, embedding_model=embedding_model ) st.session_state.graph_manager = GraphManager( user_id=st.session_state.user_id ) st.success("✅ Systems initialized successfully!") except Exception as e: st.error(f"❌ Initialization error: {str(e)}") # Knowledge base status if st.session_state.uploaded_files: st.markdown("### 📚 Knowledge Base") for filename, info in st.session_state.uploaded_files.items(): with st.expander(f"📄 {filename}"): st.write(f"**Version:** {info['version']}") st.write(f"**Uploaded:** {info['timestamp']}") st.write(f"**Hash:** {info['hash'][:12]}...") # Main content st.markdown('
📚 VersionRAG: Version-Aware RAG System
', unsafe_allow_html=True) # Create tabs tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([ "📤 Document Upload", "💬 Query Interface", "📊 Evaluation", "🔍 Version Explorer", "📈 Analytics", "👥 Multi-User Management" ]) # Tab 1: Document Upload with tab1: st.header("Document Upload & Indexing") col1, col2 = st.columns([2, 1]) with col1: uploaded_files = st.file_uploader( "Upload versioned documents (PDF, TXT)", type=["pdf", "txt"], accept_multiple_files=True ) if uploaded_files: st.markdown("### 📋 File Metadata") for idx, file in enumerate(uploaded_files): with st.expander(f"📄 {file.name}", expanded=True): col_a, col_b = st.columns(2) with col_a: version = st.text_input( "Version", key=f"version_{idx}", value="1.0.0" ) with col_b: domain = st.selectbox( "Domain", ["Software", "Healthcare", "Finance", "Industrial", "Other"], key=f"domain_{idx}" ) topic = st.text_input( "Topic/Module", key=f"topic_{idx}", value=file.name.split('.')[0] ) if st.button(f"Process {file.name}", key=f"process_{idx}"): if not st.session_state.version_rag: st.error("Please initialize systems first!") else: with st.spinner(f"Processing {file.name}..."): try: # Read file content content = file.read() if file.type == "application/pdf": text = DocumentProcessor.extract_text_from_pdf(content) else: text = content.decode('utf-8') # Calculate hash file_hash = hashlib.sha256(content).hexdigest() # Check if file already exists if file.name in st.session_state.uploaded_files: old_hash = st.session_state.uploaded_files[file.name]['hash'] if old_hash == file_hash: st.info("File unchanged, skipping indexing.") continue else: st.info("File changed, re-indexing with diff analysis...") # Perform diff analysis old_text = st.session_state.uploaded_files[file.name]['text'] changes = ChangeDetector.compute_diff(old_text, text) # Add to graph st.session_state.graph_manager.add_version_with_changes( document_name=topic, version=version, changes=changes ) # Add to VersionRAG st.session_state.version_rag.add_documents( texts=[text], metadatas=[{ 'filename': file.name, 'version': version, 'domain': domain, 'topic': topic, 'hash': file_hash, 'timestamp': datetime.now().isoformat() }] ) # Add to Baseline RAG st.session_state.baseline_rag.add_documents( texts=[text], metadatas=[{ 'filename': file.name, 'version': version }] ) # Add to graph st.session_state.graph_manager.add_document_version( document_name=topic, version=version, content=text, metadata={ 'domain': domain, 'filename': file.name } ) # Store in session state st.session_state.uploaded_files[file.name] = { 'version': version, 'domain': domain, 'topic': topic, 'hash': file_hash, 'text': text, 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S") } st.success(f"✅ Successfully processed {file.name}") except Exception as e: st.error(f"❌ Error processing {file.name}: {str(e)}") with col2: st.markdown("### 📊 Upload Statistics") if st.session_state.uploaded_files: stats_data = { 'Total Files': len(st.session_state.uploaded_files), 'Domains': len(set(f['domain'] for f in st.session_state.uploaded_files.values())), 'Total Versions': len(set(f['version'] for f in st.session_state.uploaded_files.values())) } for key, value in stats_data.items(): st.metric(key, value) # Domain distribution domain_counts = {} for file_info in st.session_state.uploaded_files.values(): domain = file_info['domain'] domain_counts[domain] = domain_counts.get(domain, 0) + 1 fig = px.pie( values=list(domain_counts.values()), names=list(domain_counts.keys()), title="Documents by Domain" ) st.plotly_chart(fig, use_container_width=True) # Tab 2: Query Interface with tab2: st.header("Interactive Query Interface") if not st.session_state.version_rag: st.warning("⚠️ Please initialize the systems first from the sidebar!") else: # Query type selection query_type = st.radio( "Query Type", ["Content Retrieval", "Version Inquiry", "Change Retrieval"], horizontal=True ) # Query input col1, col2 = st.columns([3, 1]) with col1: query = st.text_input( "Enter your query", placeholder="e.g., What is the assert module in Node.js v20.0?" ) with col2: compare_mode = st.checkbox("Compare with Baseline", value=True) # Version filter (for content retrieval) if query_type == "Content Retrieval": version_filter = st.text_input( "Version Filter (optional)", placeholder="e.g., 1.2.0" ) else: version_filter = None if st.button("🔍 Search", type="primary"): if not query: st.warning("Please enter a query!") else: with st.spinner("Searching..."): start_time = time.time() # VersionRAG query if query_type == "Content Retrieval": vrag_result = st.session_state.version_rag.query( query=query, version_filter=version_filter, top_k=top_k ) elif query_type == "Version Inquiry": vrag_result = st.session_state.version_rag.version_inquiry( query=query ) else: # Change Retrieval vrag_result = st.session_state.version_rag.change_retrieval( query=query ) vrag_time = time.time() - start_time # Baseline query (if comparison enabled) if compare_mode: start_time = time.time() baseline_result = st.session_state.baseline_rag.query( query=query, top_k=top_k ) baseline_time = time.time() - start_time # Display results if compare_mode: col1, col2 = st.columns(2) with col1: st.markdown("### 🚀 VersionRAG Response") st.markdown(f"**Response Time:** {vrag_time:.3f}s") st.markdown("---") st.markdown(vrag_result['answer']) if 'sources' in vrag_result: with st.expander("📚 Sources"): for idx, source in enumerate(vrag_result['sources']): st.markdown(f"**Source {idx+1}**") st.markdown(f"- Version: `{source.get('version', 'N/A')}`") st.markdown(f"- File: `{source.get('filename', 'N/A')}`") st.markdown(f"- Similarity: {source.get('similarity', 0):.3f}") st.markdown(f"```\n{source.get('content', '')[:200]}...\n```") with col2: st.markdown("### 📊 Baseline RAG Response") st.markdown(f"**Response Time:** {baseline_time:.3f}s") st.markdown("---") st.markdown(baseline_result['answer']) if 'sources' in baseline_result: with st.expander("📚 Sources"): for idx, source in enumerate(baseline_result['sources']): st.markdown(f"**Source {idx+1}**") st.markdown(f"```\n{source.get('content', '')[:200]}...\n```") else: st.markdown("### 🚀 VersionRAG Response") st.markdown(f"**Response Time:** {vrag_time:.3f}s") st.markdown("---") st.markdown(vrag_result['answer']) if 'sources' in vrag_result: with st.expander("📚 Sources"): for idx, source in enumerate(vrag_result['sources']): st.markdown(f"**Source {idx+1}**") st.markdown(f"- Version: `{source.get('version', 'N/A')}`") st.markdown(f"- File: `{source.get('filename', 'N/A')}`") st.markdown(f"- Similarity: {source.get('similarity', 0):.3f}") st.markdown(f"```\n{source.get('content', '')[:200]}...\n```") # Feedback st.markdown("### 📝 Feedback") col1, col2, col3 = st.columns([1, 1, 2]) with col1: rating = st.slider("Rate this answer", 1, 5, 3) with col2: if st.button("Submit Feedback"): st.session_state.feedback_data.append({ 'query': query, 'query_type': query_type, 'rating': rating, 'timestamp': datetime.now().isoformat(), 'response_time': vrag_time }) st.success("Thank you for your feedback!") # Add to chat history st.session_state.chat_history.append({ 'query': query, 'query_type': query_type, 'vrag_answer': vrag_result['answer'], 'vrag_time': vrag_time, 'baseline_answer': baseline_result['answer'] if compare_mode else None, 'baseline_time': baseline_time if compare_mode else None, 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S") }) # Chat history if st.session_state.chat_history: st.markdown("### 💭 Query History") for idx, chat in enumerate(reversed(st.session_state.chat_history[-5:])): with st.expander(f"{chat['timestamp']} - {chat['query'][:50]}..."): st.markdown(f"**Query Type:** {chat['query_type']}") st.markdown(f"**VersionRAG Answer:** {chat['vrag_answer'][:200]}...") st.markdown(f"**Response Time:** {chat['vrag_time']:.3f}s") # Tab 3: Evaluation with tab3: st.header("System Evaluation") if not st.session_state.version_rag: st.warning("⚠️ Please initialize the systems first!") else: st.markdown(""" This section evaluates VersionRAG against the baseline system using the Mini-VersionQA dataset. Metrics include Hit@k, MRR, Accuracy, and Version-Sensitive Accuracy (VSA). """) # Evaluation dataset configuration st.markdown("### 📋 Evaluation Dataset Configuration") use_custom_dataset = st.checkbox("Use custom evaluation dataset") if use_custom_dataset: uploaded_qa_file = st.file_uploader( "Upload QA Dataset (JSON)", type=["json"] ) if uploaded_qa_file: qa_data = json.load(uploaded_qa_file) st.success(f"Loaded {len(qa_data)} questions") else: st.info("Using default Mini-VersionQA dataset") qa_data = None if st.button("🚀 Run Evaluation", type="primary"): with st.spinner("Running evaluation..."): try: # Initialize evaluator evaluator = Evaluator( version_rag=st.session_state.version_rag, baseline_rag=st.session_state.baseline_rag ) # Create or load dataset if qa_data: dataset = VersionQADataset.from_dict(qa_data) else: dataset = VersionQADataset.create_mini_versionqa() # Run evaluation results = evaluator.evaluate(dataset) st.session_state.evaluation_results = results # Display results st.markdown("### 📊 Evaluation Results") # Overall comparison col1, col2 = st.columns(2) with col1: st.markdown("#### 🚀 VersionRAG") st.metric("Accuracy", f"{results['versionrag']['accuracy']:.2%}") st.metric("Hit@5", f"{results['versionrag']['hit_at_5']:.2%}") st.metric("MRR", f"{results['versionrag']['mrr']:.3f}") st.metric("VSA", f"{results['versionrag']['vsa']:.2%}") st.metric("Avg Latency", f"{results['versionrag']['avg_latency']:.3f}s") with col2: st.markdown("#### 📊 Baseline RAG") st.metric("Accuracy", f"{results['baseline']['accuracy']:.2%}") st.metric("Hit@5", f"{results['baseline']['hit_at_5']:.2%}") st.metric("MRR", f"{results['baseline']['mrr']:.3f}") st.metric("VSA", f"{results['baseline']['vsa']:.2%}") st.metric("Avg Latency", f"{results['baseline']['avg_latency']:.3f}s") # Performance improvement st.markdown("### 📈 Performance Improvement") improvement = { 'Accuracy': (results['versionrag']['accuracy'] - results['baseline']['accuracy']) * 100, 'Hit@5': (results['versionrag']['hit_at_5'] - results['baseline']['hit_at_5']) * 100, 'MRR': (results['versionrag']['mrr'] - results['baseline']['mrr']) * 100, 'VSA': (results['versionrag']['vsa'] - results['baseline']['vsa']) * 100 } fig = go.Figure(data=[ go.Bar(name='Improvement', x=list(improvement.keys()), y=list(improvement.values()), marker_color='lightblue') ]) fig.add_hline(y=25, line_dash="dash", line_color="red", annotation_text="Target: 25 points") fig.update_layout( title="VersionRAG vs Baseline - Performance Improvement (percentage points)", yaxis_title="Improvement (%)", showlegend=False ) st.plotly_chart(fig, use_container_width=True) # Query type breakdown st.markdown("### 🔍 Performance by Query Type") query_types = ['Content Retrieval', 'Version Inquiry', 'Change Retrieval'] vrag_scores = [ results['versionrag']['by_type']['content_retrieval'], results['versionrag']['by_type']['version_inquiry'], results['versionrag']['by_type']['change_retrieval'] ] baseline_scores = [ results['baseline']['by_type']['content_retrieval'], results['baseline']['by_type']['version_inquiry'], results['baseline']['by_type']['change_retrieval'] ] fig = go.Figure(data=[ go.Bar(name='VersionRAG', x=query_types, y=vrag_scores), go.Bar(name='Baseline', x=query_types, y=baseline_scores) ]) fig.update_layout( title="Accuracy by Query Type", yaxis_title="Accuracy (%)", barmode='group' ) st.plotly_chart(fig, use_container_width=True) # Success criteria check st.markdown("### ✅ Success Criteria") criteria = { 'VSA Improvement ≥ 25 points': improvement['VSA'] >= 25, 'Content Retrieval ≥ 85%': vrag_scores[0] >= 85, 'Version Inquiry ≥ 90%': vrag_scores[1] >= 90, 'Change Retrieval ≥ 60%': vrag_scores[2] >= 60 } for criterion, passed in criteria.items(): if passed: st.success(f"✅ {criterion}") else: st.error(f"❌ {criterion}") except Exception as e: st.error(f"Evaluation error: {str(e)}") # Tab 4: Version Explorer with tab4: st.header("Version Explorer") if not st.session_state.graph_manager: st.warning("⚠️ Please initialize the systems first!") else: # Document selection documents = st.session_state.graph_manager.get_all_documents() if not documents: st.info("No documents uploaded yet. Please upload documents in the 'Document Upload' tab.") else: selected_doc = st.selectbox("Select Document", documents) if selected_doc: # Get versions for selected document versions = st.session_state.graph_manager.get_document_versions(selected_doc) st.markdown(f"### 📚 {selected_doc}") st.markdown(f"**Total Versions:** {len(versions)}") # Version timeline if len(versions) > 1: st.markdown("### 📅 Version Timeline") timeline_data = [] for v in sorted(versions): version_info = st.session_state.graph_manager.get_version_info( selected_doc, v ) timeline_data.append({ 'Version': v, 'Date': version_info.get('timestamp', 'N/A') }) df = pd.DataFrame(timeline_data) st.dataframe(df, use_container_width=True) # Version comparison st.markdown("### 🔄 Version Comparison") col1, col2 = st.columns(2) with col1: version1 = st.selectbox("Version 1", sorted(versions), index=0) with col2: version2 = st.selectbox("Version 2", sorted(versions), index=min(1, len(versions)-1)) if version1 and version2 and version1 != version2: if st.button("Compare Versions"): with st.spinner("Computing differences..."): changes = st.session_state.graph_manager.get_changes_between_versions( selected_doc, version1, version2 ) st.markdown("### 📝 Changes Detected") if changes['additions']: st.markdown("#### ➕ Additions") for add in changes['additions']: st.markdown(f'
{add}
', unsafe_allow_html=True) if changes['deletions']: st.markdown("#### ➖ Deletions") for delete in changes['deletions']: st.markdown(f'
{delete}
', unsafe_allow_html=True) if changes['modifications']: st.markdown("#### 🔄 Modifications") for mod in changes['modifications']: st.markdown(f"- {mod}") # Visualize changes st.markdown("### 📊 Change Statistics") change_stats = { 'Additions': len(changes['additions']), 'Deletions': len(changes['deletions']), 'Modifications': len(changes['modifications']) } fig = px.bar( x=list(change_stats.keys()), y=list(change_stats.values()), title=f"Changes from {version1} to {version2}", labels={'x': 'Change Type', 'y': 'Count'} ) st.plotly_chart(fig, use_container_width=True) # Tab 5: Analytics with tab5: st.header("System Analytics") # System statistics col1, col2, col3, col4 = st.columns(4) with col1: st.metric("Total Queries", len(st.session_state.chat_history)) with col2: if st.session_state.feedback_data: avg_rating = sum(f['rating'] for f in st.session_state.feedback_data) / len(st.session_state.feedback_data) st.metric("Avg Rating", f"{avg_rating:.2f} / 5") else: st.metric("Avg Rating", "N/A") with col3: if st.session_state.chat_history: avg_response_time = sum(c['vrag_time'] for c in st.session_state.chat_history) / len(st.session_state.chat_history) st.metric("Avg Response Time", f"{avg_response_time:.3f}s") else: st.metric("Avg Response Time", "N/A") with col4: st.metric("Total Documents", len(st.session_state.uploaded_files)) # Query type distribution if st.session_state.chat_history: st.markdown("### 📊 Query Type Distribution") query_type_counts = {} for chat in st.session_state.chat_history: qtype = chat['query_type'] query_type_counts[qtype] = query_type_counts.get(qtype, 0) + 1 fig = px.pie( values=list(query_type_counts.values()), names=list(query_type_counts.keys()), title="Distribution of Query Types" ) st.plotly_chart(fig, use_container_width=True) # Response time trend if len(st.session_state.chat_history) > 1: st.markdown("### ⏱️ Response Time Trend") times = [c['vrag_time'] for c in st.session_state.chat_history] fig = go.Figure(data=go.Scatter( y=times, mode='lines+markers', name='Response Time' )) fig.update_layout( title="Response Time Over Queries", xaxis_title="Query Number", yaxis_title="Response Time (s)" ) st.plotly_chart(fig, use_container_width=True) # Feedback analysis if st.session_state.feedback_data: st.markdown("### 📝 User Feedback Analysis") # Rating distribution rating_counts = {} for feedback in st.session_state.feedback_data: rating = feedback['rating'] rating_counts[rating] = rating_counts.get(rating, 0) + 1 fig = go.Figure(data=[ go.Bar(x=list(rating_counts.keys()), y=list(rating_counts.values())) ]) fig.update_layout( title="Rating Distribution", xaxis_title="Rating", yaxis_title="Count" ) st.plotly_chart(fig, use_container_width=True) # Export analytics st.markdown("### 💾 Export Data") col1, col2 = st.columns(2) with col1: if st.button("Export Chat History"): if st.session_state.chat_history: df = pd.DataFrame(st.session_state.chat_history) csv = df.to_csv(index=False) st.download_button( "Download CSV", csv, "chat_history.csv", "text/csv" ) with col2: if st.button("Export Feedback Data"): if st.session_state.feedback_data: df = pd.DataFrame(st.session_state.feedback_data) csv = df.to_csv(index=False) st.download_button( "Download CSV", csv, "feedback_data.csv", "text/csv" ) # Tab 6: Multi-User Management with tab6: st.header("Multi-User Management") st.markdown(""" This section demonstrates VersionRAG's multi-user capabilities with logical data separation and persistent knowledge base management. """) # User session info st.markdown("### 👤 Current Session") col1, col2, col3 = st.columns(3) with col1: st.info(f"**User ID:** {st.session_state.user_id[:16]}...") with col2: st.info(f"**Documents:** {len(st.session_state.uploaded_files)}") with col3: st.info(f"**Queries:** {len(st.session_state.chat_history)}") # Data isolation demonstration st.markdown("### 🔒 Data Isolation") st.markdown(""" Each user's knowledge base is logically separated using `tenant_id` metadata in ChromaDB. This ensures: - No data leakage between users - Independent query results - Isolated document management """) # Knowledge base status st.markdown("### 📚 Knowledge Base Status") if st.session_state.uploaded_files: kb_data = [] for filename, info in st.session_state.uploaded_files.items(): kb_data.append({ 'File': filename, 'Version': info['version'], 'Domain': info['domain'], 'Topic': info['topic'], 'Uploaded': info['timestamp'], 'Hash': info['hash'][:12] + "..." }) df = pd.DataFrame(kb_data) st.dataframe(df, use_container_width=True) # Persistent storage info st.success(""" ✅ **Persistent Storage Active** - All documents are stored with file hash tracking - Unchanged files skip re-indexing - Automatic diff-based updates for modified files """) else: st.info("No documents in knowledge base. Upload documents to get started.") # Session management st.markdown("### 🔄 Session Management") col1, col2 = st.columns(2) with col1: if st.button("🆕 Create New Session"): if st.checkbox("Confirm session reset"): st.session_state.user_id = str(uuid.uuid4()) st.session_state.version_rag = None st.session_state.baseline_rag = None st.session_state.graph_manager = None st.session_state.uploaded_files = {} st.session_state.chat_history = [] st.success("New session created!") st.rerun() with col2: if st.button("💾 Export Session Data"): session_data = { 'user_id': st.session_state.user_id, 'uploaded_files': st.session_state.uploaded_files, 'chat_history': st.session_state.chat_history, 'feedback_data': st.session_state.feedback_data, 'timestamp': datetime.now().isoformat() } json_str = json.dumps(session_data, indent=2) st.download_button( "Download Session JSON", json_str, f"session_{st.session_state.user_id[:8]}.json", "application/json" ) # UX Metrics st.markdown("### 📊 UX Metrics") col1, col2, col3 = st.columns(3) with col1: # Calculate reupload count (files with same name but different hash) reupload_count = 0 st.metric("Reupload Count", reupload_count, help="Number of times files were reuploaded") with col2: if st.session_state.chat_history: avg_response = sum(c['vrag_time'] for c in st.session_state.chat_history) / len(st.session_state.chat_history) st.metric("Avg Response Time", f"{avg_response:.3f}s") else: st.metric("Avg Response Time", "N/A") with col3: cross_contamination = 0 # This would be detected in production st.metric("Cross-User Contamination", cross_contamination, help="Number of cross-user data leakage incidents") # Footer st.markdown("---") st.markdown("""

VersionRAG - Version-Aware Retrieval-Augmented Generation System

Built with Streamlit, LangChain, and ChromaDB

""", unsafe_allow_html=True)