Spaces:
Sleeping
Sleeping
| # app.py - Main Streamlit Application | |
| import streamlit as st | |
| import os | |
| import json | |
| import hashlib | |
| import time | |
| from datetime import datetime | |
| from pathlib import Path | |
| import pandas as pd | |
| import plotly.graph_objects as go | |
| import plotly.express as px | |
| from typing import List, Dict, Optional, Tuple | |
| import uuid | |
| # Import custom modules | |
| from version_rag import VersionRAG, BaselineRAG | |
| from graph_manager import GraphManager | |
| from evaluation import Evaluator, VersionQADataset | |
| from utils import DocumentProcessor, ChangeDetector, PersistentStorage | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="VersionRAG - Version-Aware RAG System", | |
| page_icon="π", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Initialize session state | |
| def init_session_state(): | |
| if 'user_id' not in st.session_state: | |
| st.session_state.user_id = str(uuid.uuid4()) | |
| if 'version_rag' not in st.session_state: | |
| st.session_state.version_rag = None | |
| if 'baseline_rag' not in st.session_state: | |
| st.session_state.baseline_rag = None | |
| if 'graph_manager' not in st.session_state: | |
| st.session_state.graph_manager = None | |
| if 'uploaded_files' not in st.session_state: | |
| st.session_state.uploaded_files = {} | |
| if 'chat_history' not in st.session_state: | |
| st.session_state.chat_history = [] | |
| if 'evaluation_results' not in st.session_state: | |
| st.session_state.evaluation_results = None | |
| if 'feedback_data' not in st.session_state: | |
| st.session_state.feedback_data = [] | |
| if 'persistent_storage' not in st.session_state: | |
| st.session_state.persistent_storage = None | |
| init_session_state() | |
| # Custom CSS | |
| st.markdown(""" | |
| <style> | |
| .main-header { | |
| font-size: 2.5rem; | |
| font-weight: bold; | |
| color: #1f77b4; | |
| text-align: center; | |
| padding: 1rem 0; | |
| } | |
| .metric-card { | |
| background-color: #f0f2f6; | |
| padding: 1rem; | |
| border-radius: 0.5rem; | |
| margin: 0.5rem 0; | |
| } | |
| .diff-added { | |
| background-color: #d4edda; | |
| padding: 0.2rem 0.5rem; | |
| border-radius: 0.3rem; | |
| } | |
| .diff-removed { | |
| background-color: #f8d7da; | |
| padding: 0.2rem 0.5rem; | |
| border-radius: 0.3rem; | |
| } | |
| .version-tag { | |
| background-color: #e7f3ff; | |
| color: #0366d6; | |
| padding: 0.2rem 0.5rem; | |
| border-radius: 0.3rem; | |
| font-weight: bold; | |
| } | |
| .stTabs [data-baseweb="tab-list"] { | |
| gap: 2rem; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Sidebar | |
| with st.sidebar: | |
| st.markdown("### π User Session") | |
| st.info(f"User ID: {st.session_state.user_id[:8]}...") | |
| st.markdown("### βοΈ Settings") | |
| # API Key input | |
| api_key = st.text_input("OpenAI API Key", type="password", | |
| value=os.getenv("OPENAI_API_KEY", "")) | |
| if api_key: | |
| os.environ["OPENAI_API_KEY"] = api_key | |
| # Model selection | |
| model_name = st.selectbox( | |
| "LLM Model", | |
| ["gpt-3.5-turbo", "gpt-4", "gpt-4-turbo-preview"], | |
| index=0 | |
| ) | |
| # Embedding model | |
| embedding_model = st.selectbox( | |
| "Embedding Model", | |
| ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"], # β CORRECT | |
| index=0 | |
| ) | |
| # Retrieval parameters | |
| st.markdown("### π― Retrieval Parameters") | |
| top_k = st.slider("Top K Results", 1, 10, 5) | |
| similarity_threshold = st.slider("Similarity Threshold", 0.0, 1.0, 0.7) | |
| # Initialize systems button | |
| if st.button("π Initialize Systems", type="primary"): | |
| with st.spinner("Initializing VersionRAG and Baseline systems..."): | |
| try: | |
| st.session_state.version_rag = VersionRAG( | |
| user_id=st.session_state.user_id, | |
| model_name=model_name, | |
| embedding_model=embedding_model | |
| ) | |
| st.session_state.baseline_rag = BaselineRAG( | |
| user_id=st.session_state.user_id, | |
| model_name=model_name, | |
| embedding_model=embedding_model | |
| ) | |
| st.session_state.graph_manager = GraphManager( | |
| user_id=st.session_state.user_id | |
| ) | |
| st.success("β Systems initialized successfully!") | |
| except Exception as e: | |
| st.error(f"β Initialization error: {str(e)}") | |
| # Knowledge base status | |
| if st.session_state.uploaded_files: | |
| st.markdown("### π Knowledge Base") | |
| for filename, info in st.session_state.uploaded_files.items(): | |
| with st.expander(f"π {filename}"): | |
| st.write(f"**Version:** {info['version']}") | |
| st.write(f"**Uploaded:** {info['timestamp']}") | |
| st.write(f"**Hash:** {info['hash'][:12]}...") | |
| # Main content | |
| st.markdown('<div class="main-header">π VersionRAG: Version-Aware RAG System</div>', | |
| unsafe_allow_html=True) | |
| # Create tabs | |
| tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([ | |
| "π€ Document Upload", | |
| "π¬ Query Interface", | |
| "π Evaluation", | |
| "π Version Explorer", | |
| "π Analytics", | |
| "π₯ Multi-User Management" | |
| ]) | |
| # Tab 1: Document Upload | |
| with tab1: | |
| st.header("Document Upload & Indexing") | |
| col1, col2 = st.columns([2, 1]) | |
| with col1: | |
| uploaded_files = st.file_uploader( | |
| "Upload versioned documents (PDF, TXT)", | |
| type=["pdf", "txt"], | |
| accept_multiple_files=True | |
| ) | |
| if uploaded_files: | |
| st.markdown("### π File Metadata") | |
| for idx, file in enumerate(uploaded_files): | |
| with st.expander(f"π {file.name}", expanded=True): | |
| col_a, col_b = st.columns(2) | |
| with col_a: | |
| version = st.text_input( | |
| "Version", | |
| key=f"version_{idx}", | |
| value="1.0.0" | |
| ) | |
| with col_b: | |
| domain = st.selectbox( | |
| "Domain", | |
| ["Software", "Healthcare", "Finance", "Industrial", "Other"], | |
| key=f"domain_{idx}" | |
| ) | |
| topic = st.text_input( | |
| "Topic/Module", | |
| key=f"topic_{idx}", | |
| value=file.name.split('.')[0] | |
| ) | |
| if st.button(f"Process {file.name}", key=f"process_{idx}"): | |
| if not st.session_state.version_rag: | |
| st.error("Please initialize systems first!") | |
| else: | |
| with st.spinner(f"Processing {file.name}..."): | |
| try: | |
| # Read file content | |
| content = file.read() | |
| if file.type == "application/pdf": | |
| text = DocumentProcessor.extract_text_from_pdf(content) | |
| else: | |
| text = content.decode('utf-8') | |
| # Calculate hash | |
| file_hash = hashlib.sha256(content).hexdigest() | |
| # Check if file already exists | |
| if file.name in st.session_state.uploaded_files: | |
| old_hash = st.session_state.uploaded_files[file.name]['hash'] | |
| if old_hash == file_hash: | |
| st.info("File unchanged, skipping indexing.") | |
| continue | |
| else: | |
| st.info("File changed, re-indexing with diff analysis...") | |
| # Perform diff analysis | |
| old_text = st.session_state.uploaded_files[file.name]['text'] | |
| changes = ChangeDetector.compute_diff(old_text, text) | |
| # Add to graph | |
| st.session_state.graph_manager.add_version_with_changes( | |
| document_name=topic, | |
| version=version, | |
| changes=changes | |
| ) | |
| # Add to VersionRAG | |
| st.session_state.version_rag.add_documents( | |
| texts=[text], | |
| metadatas=[{ | |
| 'filename': file.name, | |
| 'version': version, | |
| 'domain': domain, | |
| 'topic': topic, | |
| 'hash': file_hash, | |
| 'timestamp': datetime.now().isoformat() | |
| }] | |
| ) | |
| # Add to Baseline RAG | |
| st.session_state.baseline_rag.add_documents( | |
| texts=[text], | |
| metadatas=[{ | |
| 'filename': file.name, | |
| 'version': version | |
| }] | |
| ) | |
| # Add to graph | |
| st.session_state.graph_manager.add_document_version( | |
| document_name=topic, | |
| version=version, | |
| content=text, | |
| metadata={ | |
| 'domain': domain, | |
| 'filename': file.name | |
| } | |
| ) | |
| # Store in session state | |
| st.session_state.uploaded_files[file.name] = { | |
| 'version': version, | |
| 'domain': domain, | |
| 'topic': topic, | |
| 'hash': file_hash, | |
| 'text': text, | |
| 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| } | |
| st.success(f"β Successfully processed {file.name}") | |
| except Exception as e: | |
| st.error(f"β Error processing {file.name}: {str(e)}") | |
| with col2: | |
| st.markdown("### π Upload Statistics") | |
| if st.session_state.uploaded_files: | |
| stats_data = { | |
| 'Total Files': len(st.session_state.uploaded_files), | |
| 'Domains': len(set(f['domain'] for f in st.session_state.uploaded_files.values())), | |
| 'Total Versions': len(set(f['version'] for f in st.session_state.uploaded_files.values())) | |
| } | |
| for key, value in stats_data.items(): | |
| st.metric(key, value) | |
| # Domain distribution | |
| domain_counts = {} | |
| for file_info in st.session_state.uploaded_files.values(): | |
| domain = file_info['domain'] | |
| domain_counts[domain] = domain_counts.get(domain, 0) + 1 | |
| fig = px.pie( | |
| values=list(domain_counts.values()), | |
| names=list(domain_counts.keys()), | |
| title="Documents by Domain" | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Tab 2: Query Interface | |
| with tab2: | |
| st.header("Interactive Query Interface") | |
| if not st.session_state.version_rag: | |
| st.warning("β οΈ Please initialize the systems first from the sidebar!") | |
| else: | |
| # Query type selection | |
| query_type = st.radio( | |
| "Query Type", | |
| ["Content Retrieval", "Version Inquiry", "Change Retrieval"], | |
| horizontal=True | |
| ) | |
| # Query input | |
| col1, col2 = st.columns([3, 1]) | |
| with col1: | |
| query = st.text_input( | |
| "Enter your query", | |
| placeholder="e.g., What is the assert module in Node.js v20.0?" | |
| ) | |
| with col2: | |
| compare_mode = st.checkbox("Compare with Baseline", value=True) | |
| # Version filter (for content retrieval) | |
| if query_type == "Content Retrieval": | |
| version_filter = st.text_input( | |
| "Version Filter (optional)", | |
| placeholder="e.g., 1.2.0" | |
| ) | |
| else: | |
| version_filter = None | |
| if st.button("π Search", type="primary"): | |
| if not query: | |
| st.warning("Please enter a query!") | |
| else: | |
| with st.spinner("Searching..."): | |
| start_time = time.time() | |
| # VersionRAG query | |
| if query_type == "Content Retrieval": | |
| vrag_result = st.session_state.version_rag.query( | |
| query=query, | |
| version_filter=version_filter, | |
| top_k=top_k | |
| ) | |
| elif query_type == "Version Inquiry": | |
| vrag_result = st.session_state.version_rag.version_inquiry( | |
| query=query | |
| ) | |
| else: # Change Retrieval | |
| vrag_result = st.session_state.version_rag.change_retrieval( | |
| query=query | |
| ) | |
| vrag_time = time.time() - start_time | |
| # Baseline query (if comparison enabled) | |
| if compare_mode: | |
| start_time = time.time() | |
| baseline_result = st.session_state.baseline_rag.query( | |
| query=query, | |
| top_k=top_k | |
| ) | |
| baseline_time = time.time() - start_time | |
| # Display results | |
| if compare_mode: | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("### π VersionRAG Response") | |
| st.markdown(f"**Response Time:** {vrag_time:.3f}s") | |
| st.markdown("---") | |
| st.markdown(vrag_result['answer']) | |
| if 'sources' in vrag_result: | |
| with st.expander("π Sources"): | |
| for idx, source in enumerate(vrag_result['sources']): | |
| st.markdown(f"**Source {idx+1}**") | |
| st.markdown(f"- Version: `{source.get('version', 'N/A')}`") | |
| st.markdown(f"- File: `{source.get('filename', 'N/A')}`") | |
| st.markdown(f"- Similarity: {source.get('similarity', 0):.3f}") | |
| st.markdown(f"```\n{source.get('content', '')[:200]}...\n```") | |
| with col2: | |
| st.markdown("### π Baseline RAG Response") | |
| st.markdown(f"**Response Time:** {baseline_time:.3f}s") | |
| st.markdown("---") | |
| st.markdown(baseline_result['answer']) | |
| if 'sources' in baseline_result: | |
| with st.expander("π Sources"): | |
| for idx, source in enumerate(baseline_result['sources']): | |
| st.markdown(f"**Source {idx+1}**") | |
| st.markdown(f"```\n{source.get('content', '')[:200]}...\n```") | |
| else: | |
| st.markdown("### π VersionRAG Response") | |
| st.markdown(f"**Response Time:** {vrag_time:.3f}s") | |
| st.markdown("---") | |
| st.markdown(vrag_result['answer']) | |
| if 'sources' in vrag_result: | |
| with st.expander("π Sources"): | |
| for idx, source in enumerate(vrag_result['sources']): | |
| st.markdown(f"**Source {idx+1}**") | |
| st.markdown(f"- Version: `{source.get('version', 'N/A')}`") | |
| st.markdown(f"- File: `{source.get('filename', 'N/A')}`") | |
| st.markdown(f"- Similarity: {source.get('similarity', 0):.3f}") | |
| st.markdown(f"```\n{source.get('content', '')[:200]}...\n```") | |
| # Feedback | |
| st.markdown("### π Feedback") | |
| col1, col2, col3 = st.columns([1, 1, 2]) | |
| with col1: | |
| rating = st.slider("Rate this answer", 1, 5, 3) | |
| with col2: | |
| if st.button("Submit Feedback"): | |
| st.session_state.feedback_data.append({ | |
| 'query': query, | |
| 'query_type': query_type, | |
| 'rating': rating, | |
| 'timestamp': datetime.now().isoformat(), | |
| 'response_time': vrag_time | |
| }) | |
| st.success("Thank you for your feedback!") | |
| # Add to chat history | |
| st.session_state.chat_history.append({ | |
| 'query': query, | |
| 'query_type': query_type, | |
| 'vrag_answer': vrag_result['answer'], | |
| 'vrag_time': vrag_time, | |
| 'baseline_answer': baseline_result['answer'] if compare_mode else None, | |
| 'baseline_time': baseline_time if compare_mode else None, | |
| 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| }) | |
| # Chat history | |
| if st.session_state.chat_history: | |
| st.markdown("### π Query History") | |
| for idx, chat in enumerate(reversed(st.session_state.chat_history[-5:])): | |
| with st.expander(f"{chat['timestamp']} - {chat['query'][:50]}..."): | |
| st.markdown(f"**Query Type:** {chat['query_type']}") | |
| st.markdown(f"**VersionRAG Answer:** {chat['vrag_answer'][:200]}...") | |
| st.markdown(f"**Response Time:** {chat['vrag_time']:.3f}s") | |
| # Tab 3: Evaluation | |
| with tab3: | |
| st.header("System Evaluation") | |
| if not st.session_state.version_rag: | |
| st.warning("β οΈ Please initialize the systems first!") | |
| else: | |
| st.markdown(""" | |
| This section evaluates VersionRAG against the baseline system using the Mini-VersionQA dataset. | |
| Metrics include Hit@k, MRR, Accuracy, and Version-Sensitive Accuracy (VSA). | |
| """) | |
| # Evaluation dataset configuration | |
| st.markdown("### π Evaluation Dataset Configuration") | |
| use_custom_dataset = st.checkbox("Use custom evaluation dataset") | |
| if use_custom_dataset: | |
| uploaded_qa_file = st.file_uploader( | |
| "Upload QA Dataset (JSON)", | |
| type=["json"] | |
| ) | |
| if uploaded_qa_file: | |
| qa_data = json.load(uploaded_qa_file) | |
| st.success(f"Loaded {len(qa_data)} questions") | |
| else: | |
| st.info("Using default Mini-VersionQA dataset") | |
| qa_data = None | |
| if st.button("π Run Evaluation", type="primary"): | |
| with st.spinner("Running evaluation..."): | |
| try: | |
| # Initialize evaluator | |
| evaluator = Evaluator( | |
| version_rag=st.session_state.version_rag, | |
| baseline_rag=st.session_state.baseline_rag | |
| ) | |
| # Create or load dataset | |
| if qa_data: | |
| dataset = VersionQADataset.from_dict(qa_data) | |
| else: | |
| dataset = VersionQADataset.create_mini_versionqa() | |
| # Run evaluation | |
| results = evaluator.evaluate(dataset) | |
| st.session_state.evaluation_results = results | |
| # Display results | |
| st.markdown("### π Evaluation Results") | |
| # Overall comparison | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("#### π VersionRAG") | |
| st.metric("Accuracy", f"{results['versionrag']['accuracy']:.2%}") | |
| st.metric("Hit@5", f"{results['versionrag']['hit_at_5']:.2%}") | |
| st.metric("MRR", f"{results['versionrag']['mrr']:.3f}") | |
| st.metric("VSA", f"{results['versionrag']['vsa']:.2%}") | |
| st.metric("Avg Latency", f"{results['versionrag']['avg_latency']:.3f}s") | |
| with col2: | |
| st.markdown("#### π Baseline RAG") | |
| st.metric("Accuracy", f"{results['baseline']['accuracy']:.2%}") | |
| st.metric("Hit@5", f"{results['baseline']['hit_at_5']:.2%}") | |
| st.metric("MRR", f"{results['baseline']['mrr']:.3f}") | |
| st.metric("VSA", f"{results['baseline']['vsa']:.2%}") | |
| st.metric("Avg Latency", f"{results['baseline']['avg_latency']:.3f}s") | |
| # Performance improvement | |
| st.markdown("### π Performance Improvement") | |
| improvement = { | |
| 'Accuracy': (results['versionrag']['accuracy'] - results['baseline']['accuracy']) * 100, | |
| 'Hit@5': (results['versionrag']['hit_at_5'] - results['baseline']['hit_at_5']) * 100, | |
| 'MRR': (results['versionrag']['mrr'] - results['baseline']['mrr']) * 100, | |
| 'VSA': (results['versionrag']['vsa'] - results['baseline']['vsa']) * 100 | |
| } | |
| fig = go.Figure(data=[ | |
| go.Bar(name='Improvement', x=list(improvement.keys()), | |
| y=list(improvement.values()), | |
| marker_color='lightblue') | |
| ]) | |
| fig.add_hline(y=25, line_dash="dash", line_color="red", | |
| annotation_text="Target: 25 points") | |
| fig.update_layout( | |
| title="VersionRAG vs Baseline - Performance Improvement (percentage points)", | |
| yaxis_title="Improvement (%)", | |
| showlegend=False | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Query type breakdown | |
| st.markdown("### π Performance by Query Type") | |
| query_types = ['Content Retrieval', 'Version Inquiry', 'Change Retrieval'] | |
| vrag_scores = [ | |
| results['versionrag']['by_type']['content_retrieval'], | |
| results['versionrag']['by_type']['version_inquiry'], | |
| results['versionrag']['by_type']['change_retrieval'] | |
| ] | |
| baseline_scores = [ | |
| results['baseline']['by_type']['content_retrieval'], | |
| results['baseline']['by_type']['version_inquiry'], | |
| results['baseline']['by_type']['change_retrieval'] | |
| ] | |
| fig = go.Figure(data=[ | |
| go.Bar(name='VersionRAG', x=query_types, y=vrag_scores), | |
| go.Bar(name='Baseline', x=query_types, y=baseline_scores) | |
| ]) | |
| fig.update_layout( | |
| title="Accuracy by Query Type", | |
| yaxis_title="Accuracy (%)", | |
| barmode='group' | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Success criteria check | |
| st.markdown("### β Success Criteria") | |
| criteria = { | |
| 'VSA Improvement β₯ 25 points': improvement['VSA'] >= 25, | |
| 'Content Retrieval β₯ 85%': vrag_scores[0] >= 85, | |
| 'Version Inquiry β₯ 90%': vrag_scores[1] >= 90, | |
| 'Change Retrieval β₯ 60%': vrag_scores[2] >= 60 | |
| } | |
| for criterion, passed in criteria.items(): | |
| if passed: | |
| st.success(f"β {criterion}") | |
| else: | |
| st.error(f"β {criterion}") | |
| except Exception as e: | |
| st.error(f"Evaluation error: {str(e)}") | |
| # Tab 4: Version Explorer | |
| with tab4: | |
| st.header("Version Explorer") | |
| if not st.session_state.graph_manager: | |
| st.warning("β οΈ Please initialize the systems first!") | |
| else: | |
| # Document selection | |
| documents = st.session_state.graph_manager.get_all_documents() | |
| if not documents: | |
| st.info("No documents uploaded yet. Please upload documents in the 'Document Upload' tab.") | |
| else: | |
| selected_doc = st.selectbox("Select Document", documents) | |
| if selected_doc: | |
| # Get versions for selected document | |
| versions = st.session_state.graph_manager.get_document_versions(selected_doc) | |
| st.markdown(f"### π {selected_doc}") | |
| st.markdown(f"**Total Versions:** {len(versions)}") | |
| # Version timeline | |
| if len(versions) > 1: | |
| st.markdown("### π Version Timeline") | |
| timeline_data = [] | |
| for v in sorted(versions): | |
| version_info = st.session_state.graph_manager.get_version_info( | |
| selected_doc, v | |
| ) | |
| timeline_data.append({ | |
| 'Version': v, | |
| 'Date': version_info.get('timestamp', 'N/A') | |
| }) | |
| df = pd.DataFrame(timeline_data) | |
| st.dataframe(df, use_container_width=True) | |
| # Version comparison | |
| st.markdown("### π Version Comparison") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| version1 = st.selectbox("Version 1", sorted(versions), index=0) | |
| with col2: | |
| version2 = st.selectbox("Version 2", sorted(versions), | |
| index=min(1, len(versions)-1)) | |
| if version1 and version2 and version1 != version2: | |
| if st.button("Compare Versions"): | |
| with st.spinner("Computing differences..."): | |
| changes = st.session_state.graph_manager.get_changes_between_versions( | |
| selected_doc, version1, version2 | |
| ) | |
| st.markdown("### π Changes Detected") | |
| if changes['additions']: | |
| st.markdown("#### β Additions") | |
| for add in changes['additions']: | |
| st.markdown(f'<div class="diff-added">{add}</div>', | |
| unsafe_allow_html=True) | |
| if changes['deletions']: | |
| st.markdown("#### β Deletions") | |
| for delete in changes['deletions']: | |
| st.markdown(f'<div class="diff-removed">{delete}</div>', | |
| unsafe_allow_html=True) | |
| if changes['modifications']: | |
| st.markdown("#### π Modifications") | |
| for mod in changes['modifications']: | |
| st.markdown(f"- {mod}") | |
| # Visualize changes | |
| st.markdown("### π Change Statistics") | |
| change_stats = { | |
| 'Additions': len(changes['additions']), | |
| 'Deletions': len(changes['deletions']), | |
| 'Modifications': len(changes['modifications']) | |
| } | |
| fig = px.bar( | |
| x=list(change_stats.keys()), | |
| y=list(change_stats.values()), | |
| title=f"Changes from {version1} to {version2}", | |
| labels={'x': 'Change Type', 'y': 'Count'} | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Tab 5: Analytics | |
| with tab5: | |
| st.header("System Analytics") | |
| # System statistics | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric("Total Queries", len(st.session_state.chat_history)) | |
| with col2: | |
| if st.session_state.feedback_data: | |
| avg_rating = sum(f['rating'] for f in st.session_state.feedback_data) / len(st.session_state.feedback_data) | |
| st.metric("Avg Rating", f"{avg_rating:.2f} / 5") | |
| else: | |
| st.metric("Avg Rating", "N/A") | |
| with col3: | |
| if st.session_state.chat_history: | |
| avg_response_time = sum(c['vrag_time'] for c in st.session_state.chat_history) / len(st.session_state.chat_history) | |
| st.metric("Avg Response Time", f"{avg_response_time:.3f}s") | |
| else: | |
| st.metric("Avg Response Time", "N/A") | |
| with col4: | |
| st.metric("Total Documents", len(st.session_state.uploaded_files)) | |
| # Query type distribution | |
| if st.session_state.chat_history: | |
| st.markdown("### π Query Type Distribution") | |
| query_type_counts = {} | |
| for chat in st.session_state.chat_history: | |
| qtype = chat['query_type'] | |
| query_type_counts[qtype] = query_type_counts.get(qtype, 0) + 1 | |
| fig = px.pie( | |
| values=list(query_type_counts.values()), | |
| names=list(query_type_counts.keys()), | |
| title="Distribution of Query Types" | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Response time trend | |
| if len(st.session_state.chat_history) > 1: | |
| st.markdown("### β±οΈ Response Time Trend") | |
| times = [c['vrag_time'] for c in st.session_state.chat_history] | |
| fig = go.Figure(data=go.Scatter( | |
| y=times, | |
| mode='lines+markers', | |
| name='Response Time' | |
| )) | |
| fig.update_layout( | |
| title="Response Time Over Queries", | |
| xaxis_title="Query Number", | |
| yaxis_title="Response Time (s)" | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Feedback analysis | |
| if st.session_state.feedback_data: | |
| st.markdown("### π User Feedback Analysis") | |
| # Rating distribution | |
| rating_counts = {} | |
| for feedback in st.session_state.feedback_data: | |
| rating = feedback['rating'] | |
| rating_counts[rating] = rating_counts.get(rating, 0) + 1 | |
| fig = go.Figure(data=[ | |
| go.Bar(x=list(rating_counts.keys()), y=list(rating_counts.values())) | |
| ]) | |
| fig.update_layout( | |
| title="Rating Distribution", | |
| xaxis_title="Rating", | |
| yaxis_title="Count" | |
| ) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Export analytics | |
| st.markdown("### πΎ Export Data") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| if st.button("Export Chat History"): | |
| if st.session_state.chat_history: | |
| df = pd.DataFrame(st.session_state.chat_history) | |
| csv = df.to_csv(index=False) | |
| st.download_button( | |
| "Download CSV", | |
| csv, | |
| "chat_history.csv", | |
| "text/csv" | |
| ) | |
| with col2: | |
| if st.button("Export Feedback Data"): | |
| if st.session_state.feedback_data: | |
| df = pd.DataFrame(st.session_state.feedback_data) | |
| csv = df.to_csv(index=False) | |
| st.download_button( | |
| "Download CSV", | |
| csv, | |
| "feedback_data.csv", | |
| "text/csv" | |
| ) | |
| # Tab 6: Multi-User Management | |
| with tab6: | |
| st.header("Multi-User Management") | |
| st.markdown(""" | |
| This section demonstrates VersionRAG's multi-user capabilities with logical data separation | |
| and persistent knowledge base management. | |
| """) | |
| # User session info | |
| st.markdown("### π€ Current Session") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.info(f"**User ID:** {st.session_state.user_id[:16]}...") | |
| with col2: | |
| st.info(f"**Documents:** {len(st.session_state.uploaded_files)}") | |
| with col3: | |
| st.info(f"**Queries:** {len(st.session_state.chat_history)}") | |
| # Data isolation demonstration | |
| st.markdown("### π Data Isolation") | |
| st.markdown(""" | |
| Each user's knowledge base is logically separated using `tenant_id` metadata in ChromaDB. | |
| This ensures: | |
| - No data leakage between users | |
| - Independent query results | |
| - Isolated document management | |
| """) | |
| # Knowledge base status | |
| st.markdown("### π Knowledge Base Status") | |
| if st.session_state.uploaded_files: | |
| kb_data = [] | |
| for filename, info in st.session_state.uploaded_files.items(): | |
| kb_data.append({ | |
| 'File': filename, | |
| 'Version': info['version'], | |
| 'Domain': info['domain'], | |
| 'Topic': info['topic'], | |
| 'Uploaded': info['timestamp'], | |
| 'Hash': info['hash'][:12] + "..." | |
| }) | |
| df = pd.DataFrame(kb_data) | |
| st.dataframe(df, use_container_width=True) | |
| # Persistent storage info | |
| st.success(""" | |
| β **Persistent Storage Active** | |
| - All documents are stored with file hash tracking | |
| - Unchanged files skip re-indexing | |
| - Automatic diff-based updates for modified files | |
| """) | |
| else: | |
| st.info("No documents in knowledge base. Upload documents to get started.") | |
| # Session management | |
| st.markdown("### π Session Management") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| if st.button("π Create New Session"): | |
| if st.checkbox("Confirm session reset"): | |
| st.session_state.user_id = str(uuid.uuid4()) | |
| st.session_state.version_rag = None | |
| st.session_state.baseline_rag = None | |
| st.session_state.graph_manager = None | |
| st.session_state.uploaded_files = {} | |
| st.session_state.chat_history = [] | |
| st.success("New session created!") | |
| st.rerun() | |
| with col2: | |
| if st.button("πΎ Export Session Data"): | |
| session_data = { | |
| 'user_id': st.session_state.user_id, | |
| 'uploaded_files': st.session_state.uploaded_files, | |
| 'chat_history': st.session_state.chat_history, | |
| 'feedback_data': st.session_state.feedback_data, | |
| 'timestamp': datetime.now().isoformat() | |
| } | |
| json_str = json.dumps(session_data, indent=2) | |
| st.download_button( | |
| "Download Session JSON", | |
| json_str, | |
| f"session_{st.session_state.user_id[:8]}.json", | |
| "application/json" | |
| ) | |
| # UX Metrics | |
| st.markdown("### π UX Metrics") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| # Calculate reupload count (files with same name but different hash) | |
| reupload_count = 0 | |
| st.metric("Reupload Count", reupload_count, | |
| help="Number of times files were reuploaded") | |
| with col2: | |
| if st.session_state.chat_history: | |
| avg_response = sum(c['vrag_time'] for c in st.session_state.chat_history) / len(st.session_state.chat_history) | |
| st.metric("Avg Response Time", f"{avg_response:.3f}s") | |
| else: | |
| st.metric("Avg Response Time", "N/A") | |
| with col3: | |
| cross_contamination = 0 # This would be detected in production | |
| st.metric("Cross-User Contamination", cross_contamination, | |
| help="Number of cross-user data leakage incidents") | |
| # Footer | |
| st.markdown("---") | |
| st.markdown(""" | |
| <div style='text-align: center; color: #666;'> | |
| <p>VersionRAG - Version-Aware Retrieval-Augmented Generation System</p> | |
| <p>Built with Streamlit, LangChain, and ChromaDB</p> | |
| </div> | |
| """, unsafe_allow_html=True) |