Spaces:

shahbazdev0
/

VersionRAG

Sleeping

App Files Files Community

VersionRAG / src /streamlit_app.py

shahbazdev0

Update src/streamlit_app.py

028477b verified 3 months ago

raw

history blame contribute delete

39.5 kB


	# app.py - Main Streamlit Application
	import streamlit as st
	import os
	import json
	import hashlib
	import time
	from datetime import datetime
	from pathlib import Path
	import pandas as pd
	import plotly.graph_objects as go
	import plotly.express as px
	from typing import List, Dict, Optional, Tuple
	import uuid

	# Import custom modules
	from version_rag import VersionRAG, BaselineRAG
	from graph_manager import GraphManager
	from evaluation import Evaluator, VersionQADataset
	from utils import DocumentProcessor, ChangeDetector, PersistentStorage

	# Page configuration
	st.set_page_config(
	page_title="VersionRAG - Version-Aware RAG System",
	page_icon="📚",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Initialize session state
	def init_session_state():
	if 'user_id' not in st.session_state:
	st.session_state.user_id = str(uuid.uuid4())
	if 'version_rag' not in st.session_state:
	st.session_state.version_rag = None
	if 'baseline_rag' not in st.session_state:
	st.session_state.baseline_rag = None
	if 'graph_manager' not in st.session_state:
	st.session_state.graph_manager = None
	if 'uploaded_files' not in st.session_state:
	st.session_state.uploaded_files = {}
	if 'chat_history' not in st.session_state:
	st.session_state.chat_history = []
	if 'evaluation_results' not in st.session_state:
	st.session_state.evaluation_results = None
	if 'feedback_data' not in st.session_state:
	st.session_state.feedback_data = []
	if 'persistent_storage' not in st.session_state:
	st.session_state.persistent_storage = None

	init_session_state()

	# Custom CSS
	st.markdown("""
	<style>
	.main-header {
	font-size: 2.5rem;
	font-weight: bold;
	color: #1f77b4;
	text-align: center;
	padding: 1rem 0;
	}
	.metric-card {
	background-color: #f0f2f6;
	padding: 1rem;
	border-radius: 0.5rem;
	margin: 0.5rem 0;
	}
	.diff-added {
	background-color: #d4edda;
	padding: 0.2rem 0.5rem;
	border-radius: 0.3rem;
	}
	.diff-removed {
	background-color: #f8d7da;
	padding: 0.2rem 0.5rem;
	border-radius: 0.3rem;
	}
	.version-tag {
	background-color: #e7f3ff;
	color: #0366d6;
	padding: 0.2rem 0.5rem;
	border-radius: 0.3rem;
	font-weight: bold;
	}
	.stTabs [data-baseweb="tab-list"] {
	gap: 2rem;
	}
	</style>
	""", unsafe_allow_html=True)

	# Sidebar
	with st.sidebar:
	st.markdown("### 🔐 User Session")
	st.info(f"User ID: {st.session_state.user_id[:8]}...")

	st.markdown("### ⚙️ Settings")

	# API Key input
	api_key = st.text_input("OpenAI API Key", type="password",
	value=os.getenv("OPENAI_API_KEY", ""))
	if api_key:
	os.environ["OPENAI_API_KEY"] = api_key

	# Model selection
	model_name = st.selectbox(
	"LLM Model",
	["gpt-3.5-turbo", "gpt-4", "gpt-4-turbo-preview"],
	index=0
	)

	# Embedding model
	embedding_model = st.selectbox(
	"Embedding Model",
	["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"], # ✅ CORRECT
	index=0
	)

	# Retrieval parameters
	st.markdown("### 🎯 Retrieval Parameters")
	top_k = st.slider("Top K Results", 1, 10, 5)
	similarity_threshold = st.slider("Similarity Threshold", 0.0, 1.0, 0.7)

	# Initialize systems button
	if st.button("🚀 Initialize Systems", type="primary"):
	with st.spinner("Initializing VersionRAG and Baseline systems..."):
	try:
	st.session_state.version_rag = VersionRAG(
	user_id=st.session_state.user_id,
	model_name=model_name,
	embedding_model=embedding_model
	)
	st.session_state.baseline_rag = BaselineRAG(
	user_id=st.session_state.user_id,
	model_name=model_name,
	embedding_model=embedding_model
	)
	st.session_state.graph_manager = GraphManager(
	user_id=st.session_state.user_id
	)
	st.success("✅ Systems initialized successfully!")
	except Exception as e:
	st.error(f"❌ Initialization error: {str(e)}")

	# Knowledge base status
	if st.session_state.uploaded_files:
	st.markdown("### 📚 Knowledge Base")
	for filename, info in st.session_state.uploaded_files.items():
	with st.expander(f"📄 {filename}"):
	st.write(f"Version: {info['version']}")
	st.write(f"Uploaded: {info['timestamp']}")
	st.write(f"Hash: {info['hash'][:12]}...")

	# Main content
	st.markdown('<div class="main-header">📚 VersionRAG: Version-Aware RAG System</div>',
	unsafe_allow_html=True)

	# Create tabs
	tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
	"📤 Document Upload",
	"💬 Query Interface",
	"📊 Evaluation",
	"🔍 Version Explorer",
	"📈 Analytics",
	"👥 Multi-User Management"
	])

	# Tab 1: Document Upload
	with tab1:
	st.header("Document Upload & Indexing")

	col1, col2 = st.columns([2, 1])

	with col1:
	uploaded_files = st.file_uploader(
	"Upload versioned documents (PDF, TXT)",
	type=["pdf", "txt"],
	accept_multiple_files=True
	)

	if uploaded_files:
	st.markdown("### 📋 File Metadata")
	for idx, file in enumerate(uploaded_files):
	with st.expander(f"📄 {file.name}", expanded=True):
	col_a, col_b = st.columns(2)
	with col_a:
	version = st.text_input(
	"Version",
	key=f"version_{idx}",
	value="1.0.0"
	)
	with col_b:
	domain = st.selectbox(
	"Domain",
	["Software", "Healthcare", "Finance", "Industrial", "Other"],
	key=f"domain_{idx}"
	)

	topic = st.text_input(
	"Topic/Module",
	key=f"topic_{idx}",
	value=file.name.split('.')[0]
	)

	if st.button(f"Process {file.name}", key=f"process_{idx}"):
	if not st.session_state.version_rag:
	st.error("Please initialize systems first!")
	else:
	with st.spinner(f"Processing {file.name}..."):
	try:
	# Read file content
	content = file.read()
	if file.type == "application/pdf":
	text = DocumentProcessor.extract_text_from_pdf(content)
	else:
	text = content.decode('utf-8')

	# Calculate hash
	file_hash = hashlib.sha256(content).hexdigest()

	# Check if file already exists
	if file.name in st.session_state.uploaded_files:
	old_hash = st.session_state.uploaded_files[file.name]['hash']
	if old_hash == file_hash:
	st.info("File unchanged, skipping indexing.")
	continue
	else:
	st.info("File changed, re-indexing with diff analysis...")
	# Perform diff analysis
	old_text = st.session_state.uploaded_files[file.name]['text']
	changes = ChangeDetector.compute_diff(old_text, text)

	# Add to graph
	st.session_state.graph_manager.add_version_with_changes(
	document_name=topic,
	version=version,
	changes=changes
	)

	# Add to VersionRAG
	st.session_state.version_rag.add_documents(
	texts=[text],
	metadatas=[{
	'filename': file.name,
	'version': version,
	'domain': domain,
	'topic': topic,
	'hash': file_hash,
	'timestamp': datetime.now().isoformat()
	}]
	)

	# Add to Baseline RAG
	st.session_state.baseline_rag.add_documents(
	texts=[text],
	metadatas=[{
	'filename': file.name,
	'version': version
	}]
	)

	# Add to graph
	st.session_state.graph_manager.add_document_version(
	document_name=topic,
	version=version,
	content=text,
	metadata={
	'domain': domain,
	'filename': file.name
	}
	)

	# Store in session state
	st.session_state.uploaded_files[file.name] = {
	'version': version,
	'domain': domain,
	'topic': topic,
	'hash': file_hash,
	'text': text,
	'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
	}

	st.success(f"✅ Successfully processed {file.name}")

	except Exception as e:
	st.error(f"❌ Error processing {file.name}: {str(e)}")

	with col2:
	st.markdown("### 📊 Upload Statistics")
	if st.session_state.uploaded_files:
	stats_data = {
	'Total Files': len(st.session_state.uploaded_files),
	'Domains': len(set(f['domain'] for f in st.session_state.uploaded_files.values())),
	'Total Versions': len(set(f['version'] for f in st.session_state.uploaded_files.values()))
	}

	for key, value in stats_data.items():
	st.metric(key, value)

	# Domain distribution
	domain_counts = {}
	for file_info in st.session_state.uploaded_files.values():
	domain = file_info['domain']
	domain_counts[domain] = domain_counts.get(domain, 0) + 1

	fig = px.pie(
	values=list(domain_counts.values()),
	names=list(domain_counts.keys()),
	title="Documents by Domain"
	)
	st.plotly_chart(fig, use_container_width=True)

	# Tab 2: Query Interface
	with tab2:
	st.header("Interactive Query Interface")

	if not st.session_state.version_rag:
	st.warning("⚠️ Please initialize the systems first from the sidebar!")
	else:
	# Query type selection
	query_type = st.radio(
	"Query Type",
	["Content Retrieval", "Version Inquiry", "Change Retrieval"],
	horizontal=True
	)

	# Query input
	col1, col2 = st.columns([3, 1])
	with col1:
	query = st.text_input(
	"Enter your query",
	placeholder="e.g., What is the assert module in Node.js v20.0?"
	)

	with col2:
	compare_mode = st.checkbox("Compare with Baseline", value=True)

	# Version filter (for content retrieval)
	if query_type == "Content Retrieval":
	version_filter = st.text_input(
	"Version Filter (optional)",
	placeholder="e.g., 1.2.0"
	)
	else:
	version_filter = None

	if st.button("🔍 Search", type="primary"):
	if not query:
	st.warning("Please enter a query!")
	else:
	with st.spinner("Searching..."):
	start_time = time.time()

	# VersionRAG query
	if query_type == "Content Retrieval":
	vrag_result = st.session_state.version_rag.query(
	query=query,
	version_filter=version_filter,
	top_k=top_k
	)
	elif query_type == "Version Inquiry":
	vrag_result = st.session_state.version_rag.version_inquiry(
	query=query
	)
	else: # Change Retrieval
	vrag_result = st.session_state.version_rag.change_retrieval(
	query=query
	)

	vrag_time = time.time() - start_time

	# Baseline query (if comparison enabled)
	if compare_mode:
	start_time = time.time()
	baseline_result = st.session_state.baseline_rag.query(
	query=query,
	top_k=top_k
	)
	baseline_time = time.time() - start_time

	# Display results
	if compare_mode:
	col1, col2 = st.columns(2)

	with col1:
	st.markdown("### 🚀 VersionRAG Response")
	st.markdown(f"Response Time: {vrag_time:.3f}s")
	st.markdown("---")
	st.markdown(vrag_result['answer'])

	if 'sources' in vrag_result:
	with st.expander("📚 Sources"):
	for idx, source in enumerate(vrag_result['sources']):
	st.markdown(f"Source {idx+1}")
	st.markdown(f"- Version: `{source.get('version', 'N/A')}`")
	st.markdown(f"- File: `{source.get('filename', 'N/A')}`")
	st.markdown(f"- Similarity: {source.get('similarity', 0):.3f}")
	st.markdown(f"```\n{source.get('content', '')[:200]}...\n```")

	with col2:
	st.markdown("### 📊 Baseline RAG Response")
	st.markdown(f"Response Time: {baseline_time:.3f}s")
	st.markdown("---")
	st.markdown(baseline_result['answer'])

	if 'sources' in baseline_result:
	with st.expander("📚 Sources"):
	for idx, source in enumerate(baseline_result['sources']):
	st.markdown(f"Source {idx+1}")
	st.markdown(f"```\n{source.get('content', '')[:200]}...\n```")
	else:
	st.markdown("### 🚀 VersionRAG Response")
	st.markdown(f"Response Time: {vrag_time:.3f}s")
	st.markdown("---")
	st.markdown(vrag_result['answer'])

	if 'sources' in vrag_result:
	with st.expander("📚 Sources"):
	for idx, source in enumerate(vrag_result['sources']):
	st.markdown(f"Source {idx+1}")
	st.markdown(f"- Version: `{source.get('version', 'N/A')}`")
	st.markdown(f"- File: `{source.get('filename', 'N/A')}`")
	st.markdown(f"- Similarity: {source.get('similarity', 0):.3f}")
	st.markdown(f"```\n{source.get('content', '')[:200]}...\n```")

	# Feedback
	st.markdown("### 📝 Feedback")
	col1, col2, col3 = st.columns([1, 1, 2])
	with col1:
	rating = st.slider("Rate this answer", 1, 5, 3)
	with col2:
	if st.button("Submit Feedback"):
	st.session_state.feedback_data.append({
	'query': query,
	'query_type': query_type,
	'rating': rating,
	'timestamp': datetime.now().isoformat(),
	'response_time': vrag_time
	})
	st.success("Thank you for your feedback!")

	# Add to chat history
	st.session_state.chat_history.append({
	'query': query,
	'query_type': query_type,
	'vrag_answer': vrag_result['answer'],
	'vrag_time': vrag_time,
	'baseline_answer': baseline_result['answer'] if compare_mode else None,
	'baseline_time': baseline_time if compare_mode else None,
	'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
	})

	# Chat history
	if st.session_state.chat_history:
	st.markdown("### 💭 Query History")
	for idx, chat in enumerate(reversed(st.session_state.chat_history[-5:])):
	with st.expander(f"{chat['timestamp']} - {chat['query'][:50]}..."):
	st.markdown(f"Query Type: {chat['query_type']}")
	st.markdown(f"VersionRAG Answer: {chat['vrag_answer'][:200]}...")
	st.markdown(f"Response Time: {chat['vrag_time']:.3f}s")

	# Tab 3: Evaluation
	with tab3:
	st.header("System Evaluation")

	if not st.session_state.version_rag:
	st.warning("⚠️ Please initialize the systems first!")
	else:
	st.markdown("""
	This section evaluates VersionRAG against the baseline system using the Mini-VersionQA dataset.
	Metrics include Hit@k, MRR, Accuracy, and Version-Sensitive Accuracy (VSA).
	""")

	# Evaluation dataset configuration
	st.markdown("### 📋 Evaluation Dataset Configuration")

	use_custom_dataset = st.checkbox("Use custom evaluation dataset")

	if use_custom_dataset:
	uploaded_qa_file = st.file_uploader(
	"Upload QA Dataset (JSON)",
	type=["json"]
	)
	if uploaded_qa_file:
	qa_data = json.load(uploaded_qa_file)
	st.success(f"Loaded {len(qa_data)} questions")
	else:
	st.info("Using default Mini-VersionQA dataset")
	qa_data = None

	if st.button("🚀 Run Evaluation", type="primary"):
	with st.spinner("Running evaluation..."):
	try:
	# Initialize evaluator
	evaluator = Evaluator(
	version_rag=st.session_state.version_rag,
	baseline_rag=st.session_state.baseline_rag
	)

	# Create or load dataset
	if qa_data:
	dataset = VersionQADataset.from_dict(qa_data)
	else:
	dataset = VersionQADataset.create_mini_versionqa()

	# Run evaluation
	results = evaluator.evaluate(dataset)
	st.session_state.evaluation_results = results

	# Display results
	st.markdown("### 📊 Evaluation Results")

	# Overall comparison
	col1, col2 = st.columns(2)

	with col1:
	st.markdown("#### 🚀 VersionRAG")
	st.metric("Accuracy", f"{results['versionrag']['accuracy']:.2%}")
	st.metric("Hit@5", f"{results['versionrag']['hit_at_5']:.2%}")
	st.metric("MRR", f"{results['versionrag']['mrr']:.3f}")
	st.metric("VSA", f"{results['versionrag']['vsa']:.2%}")
	st.metric("Avg Latency", f"{results['versionrag']['avg_latency']:.3f}s")

	with col2:
	st.markdown("#### 📊 Baseline RAG")
	st.metric("Accuracy", f"{results['baseline']['accuracy']:.2%}")
	st.metric("Hit@5", f"{results['baseline']['hit_at_5']:.2%}")
	st.metric("MRR", f"{results['baseline']['mrr']:.3f}")
	st.metric("VSA", f"{results['baseline']['vsa']:.2%}")
	st.metric("Avg Latency", f"{results['baseline']['avg_latency']:.3f}s")

	# Performance improvement
	st.markdown("### 📈 Performance Improvement")
	improvement = {
	'Accuracy': (results['versionrag']['accuracy'] - results['baseline']['accuracy']) * 100,
	'Hit@5': (results['versionrag']['hit_at_5'] - results['baseline']['hit_at_5']) * 100,
	'MRR': (results['versionrag']['mrr'] - results['baseline']['mrr']) * 100,
	'VSA': (results['versionrag']['vsa'] - results['baseline']['vsa']) * 100
	}

	fig = go.Figure(data=[
	go.Bar(name='Improvement', x=list(improvement.keys()),
	y=list(improvement.values()),
	marker_color='lightblue')
	])
	fig.add_hline(y=25, line_dash="dash", line_color="red",
	annotation_text="Target: 25 points")
	fig.update_layout(
	title="VersionRAG vs Baseline - Performance Improvement (percentage points)",
	yaxis_title="Improvement (%)",
	showlegend=False
	)
	st.plotly_chart(fig, use_container_width=True)

	# Query type breakdown
	st.markdown("### 🔍 Performance by Query Type")

	query_types = ['Content Retrieval', 'Version Inquiry', 'Change Retrieval']
	vrag_scores = [
	results['versionrag']['by_type']['content_retrieval'],
	results['versionrag']['by_type']['version_inquiry'],
	results['versionrag']['by_type']['change_retrieval']
	]
	baseline_scores = [
	results['baseline']['by_type']['content_retrieval'],
	results['baseline']['by_type']['version_inquiry'],
	results['baseline']['by_type']['change_retrieval']
	]

	fig = go.Figure(data=[
	go.Bar(name='VersionRAG', x=query_types, y=vrag_scores),
	go.Bar(name='Baseline', x=query_types, y=baseline_scores)
	])
	fig.update_layout(
	title="Accuracy by Query Type",
	yaxis_title="Accuracy (%)",
	barmode='group'
	)
	st.plotly_chart(fig, use_container_width=True)

	# Success criteria check
	st.markdown("### ✅ Success Criteria")
	criteria = {
	'VSA Improvement ≥ 25 points': improvement['VSA'] >= 25,
	'Content Retrieval ≥ 85%': vrag_scores[0] >= 85,
	'Version Inquiry ≥ 90%': vrag_scores[1] >= 90,
	'Change Retrieval ≥ 60%': vrag_scores[2] >= 60
	}

	for criterion, passed in criteria.items():
	if passed:
	st.success(f"✅ {criterion}")
	else:
	st.error(f"❌ {criterion}")

	except Exception as e:
	st.error(f"Evaluation error: {str(e)}")

	# Tab 4: Version Explorer
	with tab4:
	st.header("Version Explorer")

	if not st.session_state.graph_manager:
	st.warning("⚠️ Please initialize the systems first!")
	else:
	# Document selection
	documents = st.session_state.graph_manager.get_all_documents()

	if not documents:
	st.info("No documents uploaded yet. Please upload documents in the 'Document Upload' tab.")
	else:
	selected_doc = st.selectbox("Select Document", documents)

	if selected_doc:
	# Get versions for selected document
	versions = st.session_state.graph_manager.get_document_versions(selected_doc)

	st.markdown(f"### 📚 {selected_doc}")
	st.markdown(f"Total Versions: {len(versions)}")

	# Version timeline
	if len(versions) > 1:
	st.markdown("### 📅 Version Timeline")
	timeline_data = []
	for v in sorted(versions):
	version_info = st.session_state.graph_manager.get_version_info(
	selected_doc, v
	)
	timeline_data.append({
	'Version': v,
	'Date': version_info.get('timestamp', 'N/A')
	})

	df = pd.DataFrame(timeline_data)
	st.dataframe(df, use_container_width=True)

	# Version comparison
	st.markdown("### 🔄 Version Comparison")
	col1, col2 = st.columns(2)

	with col1:
	version1 = st.selectbox("Version 1", sorted(versions), index=0)
	with col2:
	version2 = st.selectbox("Version 2", sorted(versions),
	index=min(1, len(versions)-1))

	if version1 and version2 and version1 != version2:
	if st.button("Compare Versions"):
	with st.spinner("Computing differences..."):
	changes = st.session_state.graph_manager.get_changes_between_versions(
	selected_doc, version1, version2
	)

	st.markdown("### 📝 Changes Detected")

	if changes['additions']:
	st.markdown("#### ➕ Additions")
	for add in changes['additions']:
	st.markdown(f'<div class="diff-added">{add}</div>',
	unsafe_allow_html=True)

	if changes['deletions']:
	st.markdown("#### ➖ Deletions")
	for delete in changes['deletions']:
	st.markdown(f'<div class="diff-removed">{delete}</div>',
	unsafe_allow_html=True)

	if changes['modifications']:
	st.markdown("#### 🔄 Modifications")
	for mod in changes['modifications']:
	st.markdown(f"- {mod}")

	# Visualize changes
	st.markdown("### 📊 Change Statistics")
	change_stats = {
	'Additions': len(changes['additions']),
	'Deletions': len(changes['deletions']),
	'Modifications': len(changes['modifications'])
	}

	fig = px.bar(
	x=list(change_stats.keys()),
	y=list(change_stats.values()),
	title=f"Changes from {version1} to {version2}",
	labels={'x': 'Change Type', 'y': 'Count'}
	)
	st.plotly_chart(fig, use_container_width=True)

	# Tab 5: Analytics
	with tab5:
	st.header("System Analytics")

	# System statistics
	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.metric("Total Queries", len(st.session_state.chat_history))
	with col2:
	if st.session_state.feedback_data:
	avg_rating = sum(f['rating'] for f in st.session_state.feedback_data) / len(st.session_state.feedback_data)
	st.metric("Avg Rating", f"{avg_rating:.2f} / 5")
	else:
	st.metric("Avg Rating", "N/A")
	with col3:
	if st.session_state.chat_history:
	avg_response_time = sum(c['vrag_time'] for c in st.session_state.chat_history) / len(st.session_state.chat_history)
	st.metric("Avg Response Time", f"{avg_response_time:.3f}s")
	else:
	st.metric("Avg Response Time", "N/A")
	with col4:
	st.metric("Total Documents", len(st.session_state.uploaded_files))

	# Query type distribution
	if st.session_state.chat_history:
	st.markdown("### 📊 Query Type Distribution")
	query_type_counts = {}
	for chat in st.session_state.chat_history:
	qtype = chat['query_type']
	query_type_counts[qtype] = query_type_counts.get(qtype, 0) + 1

	fig = px.pie(
	values=list(query_type_counts.values()),
	names=list(query_type_counts.keys()),
	title="Distribution of Query Types"
	)
	st.plotly_chart(fig, use_container_width=True)

	# Response time trend
	if len(st.session_state.chat_history) > 1:
	st.markdown("### ⏱️ Response Time Trend")
	times = [c['vrag_time'] for c in st.session_state.chat_history]
	fig = go.Figure(data=go.Scatter(
	y=times,
	mode='lines+markers',
	name='Response Time'
	))
	fig.update_layout(
	title="Response Time Over Queries",
	xaxis_title="Query Number",
	yaxis_title="Response Time (s)"
	)
	st.plotly_chart(fig, use_container_width=True)

	# Feedback analysis
	if st.session_state.feedback_data:
	st.markdown("### 📝 User Feedback Analysis")

	# Rating distribution
	rating_counts = {}
	for feedback in st.session_state.feedback_data:
	rating = feedback['rating']
	rating_counts[rating] = rating_counts.get(rating, 0) + 1

	fig = go.Figure(data=[
	go.Bar(x=list(rating_counts.keys()), y=list(rating_counts.values()))
	])
	fig.update_layout(
	title="Rating Distribution",
	xaxis_title="Rating",
	yaxis_title="Count"
	)
	st.plotly_chart(fig, use_container_width=True)

	# Export analytics
	st.markdown("### 💾 Export Data")
	col1, col2 = st.columns(2)

	with col1:
	if st.button("Export Chat History"):
	if st.session_state.chat_history:
	df = pd.DataFrame(st.session_state.chat_history)
	csv = df.to_csv(index=False)
	st.download_button(
	"Download CSV",
	csv,
	"chat_history.csv",
	"text/csv"
	)

	with col2:
	if st.button("Export Feedback Data"):
	if st.session_state.feedback_data:
	df = pd.DataFrame(st.session_state.feedback_data)
	csv = df.to_csv(index=False)
	st.download_button(
	"Download CSV",
	csv,
	"feedback_data.csv",
	"text/csv"
	)

	# Tab 6: Multi-User Management
	with tab6:
	st.header("Multi-User Management")

	st.markdown("""
	This section demonstrates VersionRAG's multi-user capabilities with logical data separation
	and persistent knowledge base management.
	""")

	# User session info
	st.markdown("### 👤 Current Session")
	col1, col2, col3 = st.columns(3)

	with col1:
	st.info(f"User ID: {st.session_state.user_id[:16]}...")
	with col2:
	st.info(f"Documents: {len(st.session_state.uploaded_files)}")
	with col3:
	st.info(f"Queries: {len(st.session_state.chat_history)}")

	# Data isolation demonstration
	st.markdown("### 🔒 Data Isolation")
	st.markdown("""
	Each user's knowledge base is logically separated using `tenant_id` metadata in ChromaDB.
	This ensures:
	- No data leakage between users
	- Independent query results
	- Isolated document management
	""")

	# Knowledge base status
	st.markdown("### 📚 Knowledge Base Status")

	if st.session_state.uploaded_files:
	kb_data = []
	for filename, info in st.session_state.uploaded_files.items():
	kb_data.append({
	'File': filename,
	'Version': info['version'],
	'Domain': info['domain'],
	'Topic': info['topic'],
	'Uploaded': info['timestamp'],
	'Hash': info['hash'][:12] + "..."
	})

	df = pd.DataFrame(kb_data)
	st.dataframe(df, use_container_width=True)

	# Persistent storage info
	st.success("""
	✅ Persistent Storage Active
	- All documents are stored with file hash tracking
	- Unchanged files skip re-indexing
	- Automatic diff-based updates for modified files
	""")
	else:
	st.info("No documents in knowledge base. Upload documents to get started.")

	# Session management
	st.markdown("### 🔄 Session Management")

	col1, col2 = st.columns(2)

	with col1:
	if st.button("🆕 Create New Session"):
	if st.checkbox("Confirm session reset"):
	st.session_state.user_id = str(uuid.uuid4())
	st.session_state.version_rag = None
	st.session_state.baseline_rag = None
	st.session_state.graph_manager = None
	st.session_state.uploaded_files = {}
	st.session_state.chat_history = []
	st.success("New session created!")
	st.rerun()

	with col2:
	if st.button("💾 Export Session Data"):
	session_data = {
	'user_id': st.session_state.user_id,
	'uploaded_files': st.session_state.uploaded_files,
	'chat_history': st.session_state.chat_history,
	'feedback_data': st.session_state.feedback_data,
	'timestamp': datetime.now().isoformat()
	}

	json_str = json.dumps(session_data, indent=2)
	st.download_button(
	"Download Session JSON",
	json_str,
	f"session_{st.session_state.user_id[:8]}.json",
	"application/json"
	)

	# UX Metrics
	st.markdown("### 📊 UX Metrics")

	col1, col2, col3 = st.columns(3)

	with col1:
	# Calculate reupload count (files with same name but different hash)
	reupload_count = 0
	st.metric("Reupload Count", reupload_count,
	help="Number of times files were reuploaded")

	with col2:
	if st.session_state.chat_history:
	avg_response = sum(c['vrag_time'] for c in st.session_state.chat_history) / len(st.session_state.chat_history)
	st.metric("Avg Response Time", f"{avg_response:.3f}s")
	else:
	st.metric("Avg Response Time", "N/A")

	with col3:
	cross_contamination = 0 # This would be detected in production
	st.metric("Cross-User Contamination", cross_contamination,
	help="Number of cross-user data leakage incidents")

	# Footer
	st.markdown("---")
	st.markdown("""
	<div style='text-align: center; color: #666;'>
	<p>VersionRAG - Version-Aware Retrieval-Augmented Generation System</p>
	<p>Built with Streamlit, LangChain, and ChromaDB</p>
	</div>
	""", unsafe_allow_html=True)