Spaces:
Build error
Build error
Changes Pro max
Browse filesThis view is limited to 50 files because it contains too many changes. Β See raw diff
- .env.example +21 -8
- .gitignore +25 -3
- Dockerfile +37 -9
- README.md +46 -195
- backend/app/__init__.py +1 -0
- backend/app/auth.py +89 -0
- backend/app/config.py +59 -0
- backend/app/database.py +40 -0
- backend/app/main.py +137 -0
- backend/app/models.py +62 -0
- backend/app/rag/__init__.py +1 -0
- backend/app/rag/agent.py +214 -0
- backend/app/rag/chunker.py +107 -0
- backend/app/rag/embeddings.py +45 -0
- backend/app/rag/prompts.py +53 -0
- backend/app/rag/retriever.py +96 -0
- backend/app/rag/vectorstore.py +186 -0
- backend/app/routes/__init__.py +1 -0
- backend/app/routes/auth.py +73 -0
- backend/app/routes/chat.py +202 -0
- backend/app/routes/documents.py +245 -0
- backend/app/schemas.py +98 -0
- backend/requirements.txt +42 -0
- docker-compose.yml +10 -22
- frontend/.gitignore +41 -0
- frontend/AGENTS.md +5 -0
- frontend/CLAUDE.md +1 -0
- frontend/README.md +36 -0
- frontend/components.json +25 -0
- frontend/eslint.config.mjs +18 -0
- frontend/next.config.ts +10 -0
- frontend/package-lock.json +0 -0
- frontend/package.json +38 -0
- frontend/postcss.config.mjs +7 -0
- frontend/public/file.svg +1 -0
- frontend/public/globe.svg +1 -0
- frontend/public/next.svg +1 -0
- frontend/public/vercel.svg +1 -0
- frontend/public/window.svg +1 -0
- frontend/src/app/dashboard/page.tsx +121 -0
- frontend/src/app/favicon.ico +0 -0
- frontend/src/app/globals.css +215 -0
- frontend/src/app/layout.tsx +36 -0
- frontend/src/app/login/page.tsx +118 -0
- frontend/src/app/page.tsx +121 -0
- frontend/src/app/register/page.tsx +133 -0
- frontend/src/components/chat/ChatPanel.tsx +250 -0
- frontend/src/components/chat/MessageBubble.tsx +61 -0
- frontend/src/components/chat/SourceCard.tsx +101 -0
- frontend/src/components/document/DocumentSidebar.tsx +209 -0
.env.example
CHANGED
|
@@ -1,11 +1,24 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
|
|
|
| 9 |
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ββ App Config βββββββββββββββββββββββββββββββββββββββ
|
| 2 |
+
SECRET_KEY=change-me-in-production
|
| 3 |
+
DATABASE_URL=sqlite:///./data/app.db
|
| 4 |
|
| 5 |
+
# ββ HuggingFace (Required for LLM) ββββββββββββββββββ
|
| 6 |
+
HF_TOKEN=your_huggingface_token_here
|
| 7 |
|
| 8 |
+
# ββ LLM Model (Optional β defaults shown) βββββββββββ
|
| 9 |
+
# LLM_MODEL=mistralai/Mistral-7B-Instruct-v0.3
|
| 10 |
+
# LLM_TEMPERATURE=0.3
|
| 11 |
+
# LLM_MAX_NEW_TOKENS=1024
|
| 12 |
|
| 13 |
+
# ββ Embeddings (Optional β defaults shown) βββββββββββ
|
| 14 |
+
# EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
|
| 15 |
+
|
| 16 |
+
# ββ RAG Config (Optional β defaults shown) βββββββββββ
|
| 17 |
+
# CHUNK_SIZE=1000
|
| 18 |
+
# CHUNK_OVERLAP=200
|
| 19 |
+
# TOP_K_RETRIEVAL=10
|
| 20 |
+
# TOP_K_RERANK=5
|
| 21 |
+
|
| 22 |
+
# ββ Upload (Optional) βββββββββββββββββββββββββββββββ
|
| 23 |
+
# UPLOAD_DIR=./data/uploads
|
| 24 |
+
# MAX_FILE_SIZE_MB=50
|
.gitignore
CHANGED
|
@@ -1,8 +1,30 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
vectorstore/
|
| 4 |
__pycache__/
|
| 5 |
*.pyc
|
| 6 |
.venv/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
|
|
|
|
|
|
|
|
|
| 8 |
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dependencies
|
| 2 |
+
node_modules/
|
|
|
|
| 3 |
__pycache__/
|
| 4 |
*.pyc
|
| 5 |
.venv/
|
| 6 |
+
*.egg-info/
|
| 7 |
+
|
| 8 |
+
# Data (runtime generated)
|
| 9 |
+
data/
|
| 10 |
+
*.db
|
| 11 |
+
|
| 12 |
+
# Environment
|
| 13 |
+
.env
|
| 14 |
+
backend/.env
|
| 15 |
+
|
| 16 |
+
# Build outputs
|
| 17 |
+
frontend/out/
|
| 18 |
+
frontend/.next/
|
| 19 |
+
.next/
|
| 20 |
+
|
| 21 |
+
# OS
|
| 22 |
+
.DS_Store
|
| 23 |
+
Thumbs.db
|
| 24 |
|
| 25 |
+
# IDE
|
| 26 |
+
.vscode/
|
| 27 |
+
.idea/
|
| 28 |
|
| 29 |
+
# Misc
|
| 30 |
+
*.log
|
Dockerfile
CHANGED
|
@@ -1,4 +1,22 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
WORKDIR /app
|
| 4 |
|
|
@@ -9,15 +27,25 @@ RUN apt-get update && apt-get install -y \
|
|
| 9 |
--no-install-recommends && \
|
| 10 |
rm -rf /var/lib/apt/lists/*
|
| 11 |
|
| 12 |
-
# Install
|
| 13 |
-
COPY requirements.txt .
|
| 14 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 15 |
|
| 16 |
-
# Copy
|
| 17 |
-
COPY
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
-
#
|
| 20 |
-
|
| 21 |
|
| 22 |
-
#
|
| 23 |
-
CMD ["
|
|
|
|
| 1 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 2 |
+
# Stage 1: Build Next.js Frontend
|
| 3 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 4 |
+
FROM node:20-alpine AS frontend-builder
|
| 5 |
+
|
| 6 |
+
WORKDIR /app/frontend
|
| 7 |
+
|
| 8 |
+
# Install dependencies
|
| 9 |
+
COPY frontend/package.json frontend/package-lock.json ./
|
| 10 |
+
RUN npm ci --no-audit
|
| 11 |
+
|
| 12 |
+
# Copy frontend source and build
|
| 13 |
+
COPY frontend/ ./
|
| 14 |
+
RUN npm run build
|
| 15 |
+
|
| 16 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 17 |
+
# Stage 2: Python Backend + Serve Frontend
|
| 18 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 19 |
+
FROM python:3.11-slim
|
| 20 |
|
| 21 |
WORKDIR /app
|
| 22 |
|
|
|
|
| 27 |
--no-install-recommends && \
|
| 28 |
rm -rf /var/lib/apt/lists/*
|
| 29 |
|
| 30 |
+
# Install Python dependencies
|
| 31 |
+
COPY backend/requirements.txt ./requirements.txt
|
| 32 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 33 |
|
| 34 |
+
# Copy backend code
|
| 35 |
+
COPY backend/ ./backend/
|
| 36 |
+
|
| 37 |
+
# Copy frontend build from stage 1
|
| 38 |
+
COPY --from=frontend-builder /app/frontend/out ./frontend/out
|
| 39 |
+
|
| 40 |
+
# Create data directories
|
| 41 |
+
RUN mkdir -p data/uploads data/chroma_db
|
| 42 |
+
|
| 43 |
+
# Copy entrypoint
|
| 44 |
+
COPY start.sh ./start.sh
|
| 45 |
+
RUN chmod +x start.sh
|
| 46 |
|
| 47 |
+
# HuggingFace Spaces requires port 7860
|
| 48 |
+
EXPOSE 7860
|
| 49 |
|
| 50 |
+
# Entrypoint
|
| 51 |
+
CMD ["./start.sh"]
|
README.md
CHANGED
|
@@ -1,215 +1,66 @@
|
|
| 1 |
-
# π RAG PDF Assistant
|
| 2 |
-
|
| 3 |
-
A **Retrieval-Augmented Generation (RAG)** document assistant built with Flask, Pinecone, Gemini Embeddings, Groq API, and Google Gemini. Upload PDFs, DOCX, TXT, or MD files and intuitively chat with them using modern AI models.
|
| 4 |
-
|
| 5 |
---
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
- π€ **Data Isolation**: Per-user namespaces in Pinecone for complete privacy.
|
| 16 |
-
- π‘οΈ **Admin Dashboard**: Admin panel to monitor users and uploaded files.
|
| 17 |
-
- ποΈ **Data Management**: Intuitive UI to delete files and clear vector stores.
|
| 18 |
-
- π± **Responsive UI**: Minimal and modern front-end for seamless user experience.
|
| 19 |
-
- βοΈ **Lightweight & Cloud-Native**: Zero local ML models β all embeddings and LLM calls are cloud-based API calls, requiring minimal server RAM.
|
| 20 |
-
|
| 21 |
---
|
| 22 |
|
| 23 |
-
#
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|-------|------------|
|
| 27 |
-
| **Backend** | Flask (Python) |
|
| 28 |
-
| **Authentication** | Flask-Login + Flask-Dance (Google OAuth) |
|
| 29 |
-
| **Embeddings** | Google Gemini (`gemini-embedding-001`) |
|
| 30 |
-
| **Vector Store** | Pinecone (Serverless) |
|
| 31 |
-
| **LLMs** | Groq API (Llama 3.3 70B) & Google Gemini |
|
| 32 |
-
| **User Database** | MongoDB Atlas |
|
| 33 |
-
| **Frontend** | HTML, CSS, Vanilla JS |
|
| 34 |
|
| 35 |
-
|
| 36 |
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
-
|
| 40 |
-
RAG_App/
|
| 41 |
-
βββ app.py # Main Flask application & routes
|
| 42 |
-
βββ models.py # MongoDB user model & encrypted key storage
|
| 43 |
-
βββ config.py # Configuration & env variables
|
| 44 |
-
βββ requirements.txt # Python dependencies
|
| 45 |
-
βββ render.yaml # Render deployment blueprint
|
| 46 |
-
βββ Dockerfile # Docker containerization
|
| 47 |
-
βββ .env.example # Environment variable template
|
| 48 |
-
βββ rag/
|
| 49 |
-
β βββ chunker.py # Document parsing & chunking logic
|
| 50 |
-
β βββ embeddings.py # Gemini embeddings + Pinecone upsert
|
| 51 |
-
β βββ retriever.py # Pinecone semantic search & retrieval
|
| 52 |
-
β βββ generator.py # LLM integration for answer generation
|
| 53 |
-
βββ templates/
|
| 54 |
-
β βββ index.html # File management & upload dashboard
|
| 55 |
-
β βββ chat.html # RAG chat interface
|
| 56 |
-
β βββ login.html # User login page
|
| 57 |
-
β βββ register.html # User registration page
|
| 58 |
-
β βββ admin.html # Admin dashboard
|
| 59 |
-
β βββ profile.html # User profile & API key settings
|
| 60 |
-
βββ static/ # Static assets (CSS, JS, profile_pics)
|
| 61 |
-
βββ uploads/ # User-uploaded files (isolated per user)
|
| 62 |
-
βββ .github/workflows/
|
| 63 |
-
βββ devsecops.yml # Security scanning pipeline
|
| 64 |
-
βββ deploy.yml # Docker build & GHCR push pipeline
|
| 65 |
-
```
|
| 66 |
-
|
| 67 |
-
---
|
| 68 |
-
|
| 69 |
-
## βοΈ Setup & Installation
|
| 70 |
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
-
##
|
| 78 |
-
```bash
|
| 79 |
-
python -m venv .venv
|
| 80 |
|
| 81 |
-
|
| 82 |
-
.
|
|
|
|
|
|
|
| 83 |
|
| 84 |
-
#
|
| 85 |
-
source .venv/bin/activate
|
| 86 |
-
```
|
| 87 |
|
| 88 |
-
### 3. Install Dependencies
|
| 89 |
```bash
|
|
|
|
|
|
|
| 90 |
pip install -r requirements.txt
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
### 4. Configure Environment Variables
|
| 94 |
-
Create a `.env` file using the template:
|
| 95 |
-
```bash
|
| 96 |
-
cp .env.example .env
|
| 97 |
-
```
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
SECRET_KEY=<your-secret-key>
|
| 102 |
-
ENCRYPTION_KEY=<your-fernet-key>
|
| 103 |
-
MONGO_URI=<your-mongodb-atlas-uri>
|
| 104 |
-
GOOGLE_CLIENT_ID=<your-google-client-id>
|
| 105 |
-
GOOGLE_CLIENT_SECRET=<your-google-client-secret>
|
| 106 |
```
|
| 107 |
|
| 108 |
-
|
| 109 |
-
> ```bash
|
| 110 |
-
> # SECRET_KEY
|
| 111 |
-
> python -c "import secrets; print(secrets.token_hex(32))"
|
| 112 |
-
> # ENCRYPTION_KEY
|
| 113 |
-
> python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
|
| 114 |
-
> ```
|
| 115 |
-
|
| 116 |
-
### 5. Run the Application
|
| 117 |
-
```bash
|
| 118 |
-
python app.py
|
| 119 |
-
```
|
| 120 |
-
|
| 121 |
-
### 6. Access in Browser
|
| 122 |
-
Visit `http://localhost:5000` in your web browser.
|
| 123 |
-
|
| 124 |
-
---
|
| 125 |
-
|
| 126 |
-
## π User Setup (Per-User API Keys)
|
| 127 |
-
|
| 128 |
-
After registering/logging in, each user must add their own API keys on the **Profile** page:
|
| 129 |
-
|
| 130 |
-
| Service | Required? | Where to Get | Notes |
|
| 131 |
-
|---------|-----------|--------------|-------|
|
| 132 |
-
| **Gemini API Key** | β
Required | [aistudio.google.com](https://aistudio.google.com) | Free β used for embeddings & chat |
|
| 133 |
-
| **Pinecone API Key** | β
Required | [app.pinecone.io](https://app.pinecone.io) | Free tier available |
|
| 134 |
-
| **Pinecone Index Name** | β
Required | Pinecone Dashboard | Create: dim `3072`, metric `cosine` |
|
| 135 |
-
| **Groq API Key** | Optional | [console.groq.com](https://console.groq.com) | For Llama 3 chat generation |
|
| 136 |
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
3. Copy your API key and index name into the Profile page
|
| 143 |
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
## π Google OAuth Setup
|
| 147 |
-
1. Go to **Google Cloud Console** β [console.cloud.google.com](https://console.cloud.google.com)
|
| 148 |
-
2. Create a new project and navigate to **APIs & Services** β **Credentials**
|
| 149 |
-
3. Click **Create Credentials** β **OAuth Client ID**
|
| 150 |
-
4. Set the Authorized redirect URI to: `http://localhost:5000/login/google/authorized`
|
| 151 |
-
5. Copy your `Client ID` and `Client Secret` into the `.env` file
|
| 152 |
-
|
| 153 |
-
---
|
| 154 |
-
|
| 155 |
-
## π How It Works (The RAG Pipeline)
|
| 156 |
-
|
| 157 |
-
1. **Upload**: User uploads a document (PDF, DOCX, TXT, or MD).
|
| 158 |
-
2. **Chunking**: The document is parsed and split into manageable textual chunks.
|
| 159 |
-
3. **Embedding**: Chunks are converted to 3072-dimensional vectors using `gemini-embedding-001`.
|
| 160 |
-
4. **Vector Storage**: Vectors are stored in the user's Pinecone namespace.
|
| 161 |
-
5. **Querying**: The user submits a question.
|
| 162 |
-
6. **Retrieval**: Pinecone retrieves the most semantically relevant chunks.
|
| 163 |
-
7. **Generation**: The retrieved context is passed to the selected LLM (Groq or Gemini) to generate an accurate, grounded answer.
|
| 164 |
-
|
| 165 |
-
---
|
| 166 |
-
|
| 167 |
-
## π Deployment
|
| 168 |
-
|
| 169 |
-
### Deploy to Render (Recommended β Free)
|
| 170 |
-
1. Push your code to GitHub
|
| 171 |
-
2. Go to [Render](https://dashboard.render.com) β **New** β **Web Service**
|
| 172 |
-
3. Connect your GitHub repository
|
| 173 |
-
4. Render auto-detects `render.yaml` and configures everything
|
| 174 |
-
5. Add environment variables: `SECRET_KEY`, `ENCRYPTION_KEY`, `MONGO_URI`, `GOOGLE_CLIENT_ID`, `GOOGLE_CLIENT_SECRET`
|
| 175 |
-
6. Update Google OAuth redirect URI to: `https://your-app.onrender.com/login/google/authorized`
|
| 176 |
-
7. Deploy!
|
| 177 |
-
|
| 178 |
-
### Deploy with Docker
|
| 179 |
-
```bash
|
| 180 |
-
docker build -t rag-app .
|
| 181 |
-
docker run -p 5000:5000 --env-file .env rag-app
|
| 182 |
-
```
|
| 183 |
-
|
| 184 |
-
---
|
| 185 |
-
|
| 186 |
-
## π DevSecOps Pipeline
|
| 187 |
-
|
| 188 |
-
| Tool | Purpose |
|
| 189 |
-
|------|---------|
|
| 190 |
-
| `GitHub Actions` | CI/CD Pipeline |
|
| 191 |
-
| `Bandit` | SAST β Python security vulnerability scanning |
|
| 192 |
-
| `Gitleaks` | Hardcoded secret and credential detection |
|
| 193 |
-
| `Trivy` | Container and dependency vulnerability checking |
|
| 194 |
-
| `Snyk` | Advanced dependency vulnerability scanning |
|
| 195 |
-
| `OWASP ZAP` | DAST β Dynamic web security scanning |
|
| 196 |
-
| `SonarCloud` | Overall code quality and security analysis |
|
| 197 |
-
| `GHCR` | Docker image hosting via GitHub Container Registry |
|
| 198 |
-
|
| 199 |
-
---
|
| 200 |
-
|
| 201 |
-
## π¨βπ» Author
|
| 202 |
-
|
| 203 |
-
- **Name:** Paramjit Singh (param20h)
|
| 204 |
-
|
| 205 |
-
---
|
| 206 |
-
|
| 207 |
-
## π License
|
| 208 |
-
|
| 209 |
-
This project is licensed under the **MIT License**. Check the `LICENSE` file for more details.
|
| 210 |
-
|
| 211 |
-
---
|
| 212 |
-
|
| 213 |
-
## β Show Some Support!
|
| 214 |
|
| 215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Document AI Analyst
|
| 3 |
+
emoji: π§
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
pinned: true
|
| 9 |
+
license: mit
|
| 10 |
+
short_description: Enterprise Agentic RAG β upload PDFs and chat with AI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# π§ Document AI Analyst β Enterprise Agentic RAG System
|
| 14 |
|
| 15 |
+
Upload complex PDFs, financial reports, legal contracts, or research papers and chat with an AI agent that provides **accurate, cited insights** powered by Retrieval-Augmented Generation.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
+
## β¨ Features
|
| 18 |
|
| 19 |
+
- **Multi-Format Upload** β PDF, DOCX, TXT, Markdown with smart chunking
|
| 20 |
+
- **Semantic Search** β Two-stage retrieval with cross-encoder reranking
|
| 21 |
+
- **Streaming Chat** β Real-time AI responses with inline source citations
|
| 22 |
+
- **Data Isolation** β Per-user vector collections for complete privacy
|
| 23 |
+
- **Open-Source LLMs** β Powered by Mistral-7B and HuggingFace ecosystem
|
| 24 |
|
| 25 |
+
## ποΈ Architecture
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
+
| Layer | Technology |
|
| 28 |
+
|---|---|
|
| 29 |
+
| **Frontend** | Next.js 16, Tailwind CSS v4, Shadcn UI v2 |
|
| 30 |
+
| **Backend** | FastAPI, SQLAlchemy, JWT Auth |
|
| 31 |
+
| **Embeddings** | sentence-transformers/all-MiniLM-L6-v2 (local) |
|
| 32 |
+
| **Vector Store** | ChromaDB (persistent, per-user collections) |
|
| 33 |
+
| **Reranker** | cross-encoder/ms-marco-MiniLM-L-6-v2 |
|
| 34 |
+
| **LLM** | Mistral-7B-Instruct via HuggingFace Inference API |
|
| 35 |
+
| **Deployment** | Docker multi-stage build on HuggingFace Spaces |
|
| 36 |
|
| 37 |
+
## π Quick Start
|
|
|
|
|
|
|
| 38 |
|
| 39 |
+
1. **Register** an account
|
| 40 |
+
2. **Upload** a PDF document
|
| 41 |
+
3. **Wait** for processing (chunking + embedding)
|
| 42 |
+
4. **Ask** questions and get cited answers!
|
| 43 |
|
| 44 |
+
## π§ Local Development
|
|
|
|
|
|
|
| 45 |
|
|
|
|
| 46 |
```bash
|
| 47 |
+
# Backend
|
| 48 |
+
cd backend && python3 -m venv .venv && source .venv/bin/activate
|
| 49 |
pip install -r requirements.txt
|
| 50 |
+
uvicorn app.main:app --port 7860
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
+
# Frontend
|
| 53 |
+
cd frontend && npm install && npm run dev
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
```
|
| 55 |
|
| 56 |
+
## π¦ Environment Variables
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
+
| Variable | Required | Description |
|
| 59 |
+
|---|---|---|
|
| 60 |
+
| `HF_TOKEN` | β
| HuggingFace API token for LLM inference |
|
| 61 |
+
| `SECRET_KEY` | β
| JWT signing secret |
|
| 62 |
+
| `DATABASE_URL` | β | SQLite path (default: `sqlite:///./data/app.db`) |
|
|
|
|
| 63 |
|
| 64 |
+
## π οΈ Tech Stack
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
+
Built with: **FastAPI** β’ **LangChain** β’ **ChromaDB** β’ **HuggingFace** β’ **Next.js 16** β’ **Tailwind CSS** β’ **Shadcn UI**
|
backend/app/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Enterprise Agentic RAG System β Backend
|
backend/app/auth.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
JWT authentication β register, login, and token verification.
|
| 3 |
+
"""
|
| 4 |
+
from datetime import datetime, timedelta, timezone
|
| 5 |
+
from typing import Optional
|
| 6 |
+
|
| 7 |
+
import jwt
|
| 8 |
+
import bcrypt
|
| 9 |
+
from fastapi import Depends, HTTPException, status
|
| 10 |
+
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
| 11 |
+
from sqlalchemy.orm import Session
|
| 12 |
+
|
| 13 |
+
from app.config import get_settings
|
| 14 |
+
from app.database import get_db
|
| 15 |
+
from app.models import User
|
| 16 |
+
|
| 17 |
+
settings = get_settings()
|
| 18 |
+
security = HTTPBearer()
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
# ββ Password Hashing βββββββββββββββββββββββββββββββββ
|
| 22 |
+
|
| 23 |
+
def hash_password(password: str) -> str:
|
| 24 |
+
return bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()).decode("utf-8")
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def verify_password(plain: str, hashed: str) -> bool:
|
| 28 |
+
return bcrypt.checkpw(plain.encode("utf-8"), hashed.encode("utf-8"))
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# ββ JWT Token ββββββββββββββββββββββββββββββββββββββββ
|
| 32 |
+
|
| 33 |
+
def create_token(user_id: str) -> str:
|
| 34 |
+
"""Create a JWT token with user_id as the subject."""
|
| 35 |
+
payload = {
|
| 36 |
+
"sub": user_id,
|
| 37 |
+
"exp": datetime.now(timezone.utc) + timedelta(hours=settings.JWT_EXPIRY_HOURS),
|
| 38 |
+
"iat": datetime.now(timezone.utc),
|
| 39 |
+
}
|
| 40 |
+
return jwt.encode(payload, settings.SECRET_KEY, algorithm=settings.JWT_ALGORITHM)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def decode_token(token: str) -> Optional[str]:
|
| 44 |
+
"""Decode JWT and return user_id, or None if invalid."""
|
| 45 |
+
try:
|
| 46 |
+
payload = jwt.decode(token, settings.SECRET_KEY, algorithms=[settings.JWT_ALGORITHM])
|
| 47 |
+
return payload.get("sub")
|
| 48 |
+
except jwt.ExpiredSignatureError:
|
| 49 |
+
return None
|
| 50 |
+
except jwt.InvalidTokenError:
|
| 51 |
+
return None
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
# ββ FastAPI Dependencies βββββββββββββββββββββββββββββ
|
| 55 |
+
|
| 56 |
+
def get_current_user(
|
| 57 |
+
credentials: HTTPAuthorizationCredentials = Depends(security),
|
| 58 |
+
db: Session = Depends(get_db),
|
| 59 |
+
) -> User:
|
| 60 |
+
"""Dependency: extract and validate user from JWT bearer token."""
|
| 61 |
+
token = credentials.credentials
|
| 62 |
+
user_id = decode_token(token)
|
| 63 |
+
|
| 64 |
+
if not user_id:
|
| 65 |
+
raise HTTPException(
|
| 66 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 67 |
+
detail="Invalid or expired token",
|
| 68 |
+
headers={"WWW-Authenticate": "Bearer"},
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
user = db.query(User).filter(User.id == user_id).first()
|
| 72 |
+
|
| 73 |
+
if not user:
|
| 74 |
+
raise HTTPException(
|
| 75 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 76 |
+
detail="User not found",
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
return user
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def get_admin_user(user: User = Depends(get_current_user)) -> User:
|
| 83 |
+
"""Dependency: require admin privileges."""
|
| 84 |
+
if not user.is_admin:
|
| 85 |
+
raise HTTPException(
|
| 86 |
+
status_code=status.HTTP_403_FORBIDDEN,
|
| 87 |
+
detail="Admin access required",
|
| 88 |
+
)
|
| 89 |
+
return user
|
backend/app/config.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Application configuration via pydantic-settings.
|
| 3 |
+
All config is loaded from environment variables with sensible defaults.
|
| 4 |
+
"""
|
| 5 |
+
import os
|
| 6 |
+
from pydantic_settings import BaseSettings
|
| 7 |
+
from functools import lru_cache
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class Settings(BaseSettings):
|
| 11 |
+
# ββ App ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 12 |
+
APP_NAME: str = "Document AI Analyst"
|
| 13 |
+
SECRET_KEY: str = "change-me-in-production-please"
|
| 14 |
+
DEBUG: bool = False
|
| 15 |
+
|
| 16 |
+
# ββ Database βββββββββββββββββββββββββββββββββββββββββ
|
| 17 |
+
DATABASE_URL: str = "sqlite:///./data/app.db"
|
| 18 |
+
|
| 19 |
+
# ββ Auth βββββββββββββββββββββββββββββββββββββββββββββ
|
| 20 |
+
JWT_ALGORITHM: str = "HS256"
|
| 21 |
+
JWT_EXPIRY_HOURS: int = 72
|
| 22 |
+
|
| 23 |
+
# ββ File Upload ββββββββββββββββββββββββββββββββββββββ
|
| 24 |
+
UPLOAD_DIR: str = "./data/uploads"
|
| 25 |
+
MAX_FILE_SIZE_MB: int = 50
|
| 26 |
+
ALLOWED_EXTENSIONS: set = {"pdf", "docx", "txt", "md"}
|
| 27 |
+
|
| 28 |
+
# ββ RAG Pipeline βββββββββββββββββββββββββββββββββββββ
|
| 29 |
+
CHUNK_SIZE: int = 1000
|
| 30 |
+
CHUNK_OVERLAP: int = 200
|
| 31 |
+
TOP_K_RETRIEVAL: int = 10
|
| 32 |
+
TOP_K_RERANK: int = 5
|
| 33 |
+
|
| 34 |
+
# ββ Embeddings (local HuggingFace model) βββββββββββββ
|
| 35 |
+
EMBEDDING_MODEL: str = "sentence-transformers/all-MiniLM-L6-v2"
|
| 36 |
+
EMBEDDING_DIMENSION: int = 384
|
| 37 |
+
|
| 38 |
+
# ββ ChromaDB βββββββββββββββββββββββββββββββββββββββββ
|
| 39 |
+
CHROMA_PERSIST_DIR: str = "./data/chroma_db"
|
| 40 |
+
|
| 41 |
+
# ββ LLM (HuggingFace Inference API) ββββββββββββββββββ
|
| 42 |
+
HF_TOKEN: str = ""
|
| 43 |
+
LLM_MODEL: str = "mistralai/Mistral-7B-Instruct-v0.2"
|
| 44 |
+
LLM_MAX_NEW_TOKENS: int = 1024
|
| 45 |
+
LLM_TEMPERATURE: float = 0.3
|
| 46 |
+
|
| 47 |
+
# ββ Reranker βββββββββββββββββββββββββββββββββββββββββ
|
| 48 |
+
RERANKER_MODEL: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"
|
| 49 |
+
|
| 50 |
+
class Config:
|
| 51 |
+
env_file = ".env"
|
| 52 |
+
env_file_encoding = "utf-8"
|
| 53 |
+
extra = "ignore"
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
@lru_cache()
|
| 57 |
+
def get_settings() -> Settings:
|
| 58 |
+
"""Cached settings instance β loaded once on startup."""
|
| 59 |
+
return Settings()
|
backend/app/database.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
SQLAlchemy database setup with SQLite.
|
| 3 |
+
Uses synchronous SQLAlchemy for simplicity and compatibility.
|
| 4 |
+
"""
|
| 5 |
+
import os
|
| 6 |
+
from sqlalchemy import create_engine
|
| 7 |
+
from sqlalchemy.orm import sessionmaker, declarative_base
|
| 8 |
+
from app.config import get_settings
|
| 9 |
+
|
| 10 |
+
settings = get_settings()
|
| 11 |
+
|
| 12 |
+
# ββ Ensure data directory exists βββββββββββββββββββββ
|
| 13 |
+
db_path = settings.DATABASE_URL.replace("sqlite:///", "")
|
| 14 |
+
os.makedirs(os.path.dirname(db_path) if os.path.dirname(db_path) else ".", exist_ok=True)
|
| 15 |
+
|
| 16 |
+
# ββ Engine & Session βββββββββββββββββββββββββββββββββ
|
| 17 |
+
engine = create_engine(
|
| 18 |
+
settings.DATABASE_URL,
|
| 19 |
+
connect_args={"check_same_thread": False}, # Required for SQLite
|
| 20 |
+
echo=settings.DEBUG,
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
| 24 |
+
|
| 25 |
+
Base = declarative_base()
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def get_db():
|
| 29 |
+
"""FastAPI dependency β yields a DB session per request."""
|
| 30 |
+
db = SessionLocal()
|
| 31 |
+
try:
|
| 32 |
+
yield db
|
| 33 |
+
finally:
|
| 34 |
+
db.close()
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def init_db():
|
| 38 |
+
"""Create all tables on startup."""
|
| 39 |
+
from app import models # noqa: F401 β import to register models
|
| 40 |
+
Base.metadata.create_all(bind=engine)
|
backend/app/main.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
FastAPI application entry point.
|
| 3 |
+
Mounts all routes, configures CORS, and serves the Next.js frontend build.
|
| 4 |
+
"""
|
| 5 |
+
import os
|
| 6 |
+
import logging
|
| 7 |
+
from contextlib import asynccontextmanager
|
| 8 |
+
|
| 9 |
+
from fastapi import FastAPI
|
| 10 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 11 |
+
from fastapi.staticfiles import StaticFiles
|
| 12 |
+
from fastapi.responses import FileResponse
|
| 13 |
+
|
| 14 |
+
from app.config import get_settings
|
| 15 |
+
from app.database import init_db
|
| 16 |
+
|
| 17 |
+
# Configure logging
|
| 18 |
+
logging.basicConfig(
|
| 19 |
+
level=logging.INFO,
|
| 20 |
+
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
| 21 |
+
)
|
| 22 |
+
logger = logging.getLogger(__name__)
|
| 23 |
+
|
| 24 |
+
settings = get_settings()
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@asynccontextmanager
|
| 28 |
+
async def lifespan(app: FastAPI):
|
| 29 |
+
"""Application startup/shutdown lifecycle."""
|
| 30 |
+
# ββ Startup ββββββββββββββββββββββββββββββββββββββ
|
| 31 |
+
logger.info(f"Starting {settings.APP_NAME}")
|
| 32 |
+
|
| 33 |
+
# Create tables
|
| 34 |
+
init_db()
|
| 35 |
+
logger.info("Database initialized")
|
| 36 |
+
|
| 37 |
+
# Ensure upload directory exists
|
| 38 |
+
os.makedirs(settings.UPLOAD_DIR, exist_ok=True)
|
| 39 |
+
os.makedirs(settings.CHROMA_PERSIST_DIR, exist_ok=True)
|
| 40 |
+
|
| 41 |
+
# Pre-load embedding model (warm up)
|
| 42 |
+
try:
|
| 43 |
+
from app.rag.embeddings import get_embedding_model
|
| 44 |
+
get_embedding_model()
|
| 45 |
+
logger.info("Embedding model pre-loaded")
|
| 46 |
+
except Exception as e:
|
| 47 |
+
logger.warning(f"Failed to pre-load embedding model: {e}")
|
| 48 |
+
|
| 49 |
+
yield
|
| 50 |
+
|
| 51 |
+
# ββ Shutdown βββββββββββββββββββββββββββββββββββββ
|
| 52 |
+
logger.info("Shutting down")
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
# ββ Create App βββββββββββββββββββββββββββββββββββββββ
|
| 56 |
+
app = FastAPI(
|
| 57 |
+
title=settings.APP_NAME,
|
| 58 |
+
description="Enterprise Agentic RAG System β Upload PDFs and chat with AI",
|
| 59 |
+
version="2.0.0",
|
| 60 |
+
lifespan=lifespan,
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
# ββ CORS (allow frontend dev server) βββββββββββββββββ
|
| 64 |
+
app.add_middleware(
|
| 65 |
+
CORSMiddleware,
|
| 66 |
+
allow_origins=["http://localhost:3000", "http://localhost:7860", "*"],
|
| 67 |
+
allow_credentials=True,
|
| 68 |
+
allow_methods=["*"],
|
| 69 |
+
allow_headers=["*"],
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
# ββ Mount API Routes βββββββββββββββββββββββββββββββββ
|
| 73 |
+
from app.routes.auth import router as auth_router
|
| 74 |
+
from app.routes.documents import router as documents_router
|
| 75 |
+
from app.routes.chat import router as chat_router
|
| 76 |
+
|
| 77 |
+
app.include_router(auth_router, prefix="/api/v1")
|
| 78 |
+
app.include_router(documents_router, prefix="/api/v1")
|
| 79 |
+
app.include_router(chat_router, prefix="/api/v1")
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
# ββ Health Check βββββββββββββββββββββββββββββββββββββ
|
| 83 |
+
@app.get("/api/health")
|
| 84 |
+
def health_check():
|
| 85 |
+
return {
|
| 86 |
+
"status": "healthy",
|
| 87 |
+
"app": settings.APP_NAME,
|
| 88 |
+
"version": "2.0.0",
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
# ββ Serve Next.js Frontend (production) ββββββββββββββ
|
| 93 |
+
FRONTEND_BUILD_DIR = os.path.join(os.path.dirname(__file__), "..", "..", "frontend", "out")
|
| 94 |
+
|
| 95 |
+
if os.path.exists(FRONTEND_BUILD_DIR):
|
| 96 |
+
# Serve static assets (JS, CSS, images)
|
| 97 |
+
app.mount("/_next", StaticFiles(directory=os.path.join(FRONTEND_BUILD_DIR, "_next")), name="next_static")
|
| 98 |
+
|
| 99 |
+
# Serve other static files if they exist
|
| 100 |
+
static_dir = os.path.join(FRONTEND_BUILD_DIR, "static")
|
| 101 |
+
if os.path.exists(static_dir):
|
| 102 |
+
app.mount("/static", StaticFiles(directory=static_dir), name="static")
|
| 103 |
+
|
| 104 |
+
@app.get("/{full_path:path}")
|
| 105 |
+
async def serve_frontend(full_path: str):
|
| 106 |
+
"""Serve Next.js static export β tries exact file, then .html, then index.html."""
|
| 107 |
+
# Try exact file path
|
| 108 |
+
file_path = os.path.join(FRONTEND_BUILD_DIR, full_path)
|
| 109 |
+
if os.path.isfile(file_path):
|
| 110 |
+
return FileResponse(file_path)
|
| 111 |
+
|
| 112 |
+
# Try with .html extension
|
| 113 |
+
html_path = os.path.join(FRONTEND_BUILD_DIR, f"{full_path}.html")
|
| 114 |
+
if os.path.isfile(html_path):
|
| 115 |
+
return FileResponse(html_path)
|
| 116 |
+
|
| 117 |
+
# Try as directory index
|
| 118 |
+
index_path = os.path.join(FRONTEND_BUILD_DIR, full_path, "index.html")
|
| 119 |
+
if os.path.isfile(index_path):
|
| 120 |
+
return FileResponse(index_path)
|
| 121 |
+
|
| 122 |
+
# Fallback to root index.html (SPA routing)
|
| 123 |
+
root_index = os.path.join(FRONTEND_BUILD_DIR, "index.html")
|
| 124 |
+
if os.path.isfile(root_index):
|
| 125 |
+
return FileResponse(root_index)
|
| 126 |
+
|
| 127 |
+
return FileResponse(root_index) if os.path.exists(root_index) else {"error": "Not found"}
|
| 128 |
+
else:
|
| 129 |
+
logger.info("No frontend build found β running in API-only mode")
|
| 130 |
+
|
| 131 |
+
@app.get("/")
|
| 132 |
+
def root():
|
| 133 |
+
return {
|
| 134 |
+
"message": f"Welcome to {settings.APP_NAME} API",
|
| 135 |
+
"docs": "/docs",
|
| 136 |
+
"health": "/api/health",
|
| 137 |
+
}
|
backend/app/models.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
SQLAlchemy ORM models for users, documents, and chat messages.
|
| 3 |
+
"""
|
| 4 |
+
import uuid
|
| 5 |
+
from datetime import datetime, timezone
|
| 6 |
+
from sqlalchemy import Column, String, Integer, DateTime, ForeignKey, Text, Boolean
|
| 7 |
+
from sqlalchemy.orm import relationship
|
| 8 |
+
from app.database import Base
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def generate_uuid():
|
| 12 |
+
return str(uuid.uuid4())
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class User(Base):
|
| 16 |
+
__tablename__ = "users"
|
| 17 |
+
|
| 18 |
+
id = Column(String, primary_key=True, default=generate_uuid)
|
| 19 |
+
username = Column(String(80), unique=True, nullable=False, index=True)
|
| 20 |
+
email = Column(String(120), unique=True, nullable=False, index=True)
|
| 21 |
+
hashed_password = Column(String(255), nullable=False)
|
| 22 |
+
is_admin = Column(Boolean, default=False)
|
| 23 |
+
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
| 24 |
+
|
| 25 |
+
# Relationships
|
| 26 |
+
documents = relationship("Document", back_populates="owner", cascade="all, delete-orphan")
|
| 27 |
+
messages = relationship("ChatMessage", back_populates="user", cascade="all, delete-orphan")
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class Document(Base):
|
| 31 |
+
__tablename__ = "documents"
|
| 32 |
+
|
| 33 |
+
id = Column(String, primary_key=True, default=generate_uuid)
|
| 34 |
+
user_id = Column(String, ForeignKey("users.id"), nullable=False, index=True)
|
| 35 |
+
filename = Column(String(255), nullable=False) # Stored filename (UUID-based)
|
| 36 |
+
original_name = Column(String(255), nullable=False) # User's original filename
|
| 37 |
+
file_size = Column(Integer, default=0) # Size in bytes
|
| 38 |
+
page_count = Column(Integer, default=0)
|
| 39 |
+
chunk_count = Column(Integer, default=0)
|
| 40 |
+
status = Column(String(20), default="pending") # pending | processing | ready | failed
|
| 41 |
+
error_message = Column(Text, nullable=True)
|
| 42 |
+
uploaded_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
| 43 |
+
|
| 44 |
+
# Relationships
|
| 45 |
+
owner = relationship("User", back_populates="documents")
|
| 46 |
+
messages = relationship("ChatMessage", back_populates="document", cascade="all, delete-orphan")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
class ChatMessage(Base):
|
| 50 |
+
__tablename__ = "chat_messages"
|
| 51 |
+
|
| 52 |
+
id = Column(String, primary_key=True, default=generate_uuid)
|
| 53 |
+
user_id = Column(String, ForeignKey("users.id"), nullable=False, index=True)
|
| 54 |
+
document_id = Column(String, ForeignKey("documents.id"), nullable=True, index=True)
|
| 55 |
+
role = Column(String(20), nullable=False) # "user" | "assistant"
|
| 56 |
+
content = Column(Text, nullable=False)
|
| 57 |
+
sources_json = Column(Text, nullable=True) # JSON string of source citations
|
| 58 |
+
created_at = Column(DateTime, default=lambda: datetime.now(timezone.utc))
|
| 59 |
+
|
| 60 |
+
# Relationships
|
| 61 |
+
user = relationship("User", back_populates="messages")
|
| 62 |
+
document = relationship("Document", back_populates="messages")
|
backend/app/rag/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# RAG Pipeline
|
backend/app/rag/agent.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
RAG Agent β generation with HuggingFace Inference API (chat completion).
|
| 3 |
+
Supports both streaming (SSE) and non-streaming responses.
|
| 4 |
+
"""
|
| 5 |
+
import logging
|
| 6 |
+
import json
|
| 7 |
+
from typing import List, Dict, Any, Optional, Generator
|
| 8 |
+
|
| 9 |
+
from huggingface_hub import InferenceClient
|
| 10 |
+
from app.config import get_settings
|
| 11 |
+
from app.rag.retriever import retrieve
|
| 12 |
+
from app.rag.prompts import SYSTEM_PROMPT, RAG_PROMPT_TEMPLATE, GREETING_PROMPT
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
settings = get_settings()
|
| 16 |
+
|
| 17 |
+
# ββ Singleton LLM client βββββββββββββββββββββββββββββ
|
| 18 |
+
_llm_client = None
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def get_llm_client() -> InferenceClient:
|
| 22 |
+
"""Get or create HuggingFace InferenceClient (singleton)."""
|
| 23 |
+
global _llm_client
|
| 24 |
+
|
| 25 |
+
if _llm_client is None:
|
| 26 |
+
_llm_client = InferenceClient(
|
| 27 |
+
token=settings.HF_TOKEN,
|
| 28 |
+
)
|
| 29 |
+
logger.info(f"LLM client initialized for model: {settings.LLM_MODEL}")
|
| 30 |
+
|
| 31 |
+
return _llm_client
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def is_greeting(question: str) -> bool:
|
| 35 |
+
"""Detect if the question is a casual greeting rather than a document query."""
|
| 36 |
+
greetings = {
|
| 37 |
+
"hi", "hello", "hey", "how are you", "what's up", "whats up",
|
| 38 |
+
"good morning", "good evening", "good afternoon", "thanks", "thank you",
|
| 39 |
+
"bye", "goodbye", "help", "what can you do", "who are you",
|
| 40 |
+
}
|
| 41 |
+
return question.lower().strip().rstrip("!?.") in greetings
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def build_context(chunks: List[Dict[str, Any]]) -> str:
|
| 45 |
+
"""Format retrieved chunks into a context string."""
|
| 46 |
+
if not chunks:
|
| 47 |
+
return "No relevant document context was found."
|
| 48 |
+
|
| 49 |
+
context_parts = []
|
| 50 |
+
for i, chunk in enumerate(chunks, 1):
|
| 51 |
+
confidence = chunk.get("confidence", 0)
|
| 52 |
+
context_parts.append(
|
| 53 |
+
f"### Excerpt {i} β {chunk['filename']}, Page {chunk['page']} "
|
| 54 |
+
f"(Relevance: {confidence}%)\n\n{chunk['text']}"
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
return "\n\n---\n\n".join(context_parts)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def _chat_messages(system: str, user_content: str) -> list:
|
| 61 |
+
"""Build messages list for chat completion API."""
|
| 62 |
+
return [
|
| 63 |
+
{"role": "system", "content": system},
|
| 64 |
+
{"role": "user", "content": user_content},
|
| 65 |
+
]
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def generate_answer(
|
| 69 |
+
question: str,
|
| 70 |
+
user_id: str,
|
| 71 |
+
document_id: Optional[str] = None,
|
| 72 |
+
) -> Dict[str, Any]:
|
| 73 |
+
"""
|
| 74 |
+
Full RAG pipeline: retrieve β build context β generate answer.
|
| 75 |
+
Returns dict with 'answer' and 'sources'.
|
| 76 |
+
"""
|
| 77 |
+
client = get_llm_client()
|
| 78 |
+
|
| 79 |
+
# ββ Handle greetings βββββββββββββββββββββββββββββ
|
| 80 |
+
if is_greeting(question):
|
| 81 |
+
messages = _chat_messages(
|
| 82 |
+
"You are Document AI Analyst, a friendly AI assistant for document analysis.",
|
| 83 |
+
question,
|
| 84 |
+
)
|
| 85 |
+
response = client.chat_completion(
|
| 86 |
+
messages=messages,
|
| 87 |
+
model=settings.LLM_MODEL,
|
| 88 |
+
max_tokens=256,
|
| 89 |
+
temperature=0.7,
|
| 90 |
+
)
|
| 91 |
+
answer = response.choices[0].message.content.strip()
|
| 92 |
+
return {"answer": answer, "sources": []}
|
| 93 |
+
|
| 94 |
+
# ββ Retrieve relevant chunks βββββββββββββββββββββ
|
| 95 |
+
chunks = retrieve(
|
| 96 |
+
query=question,
|
| 97 |
+
user_id=user_id,
|
| 98 |
+
document_id=document_id,
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
# ββ Build prompt βββββββββββββββββββββββββββββββββ
|
| 102 |
+
context = build_context(chunks)
|
| 103 |
+
user_content = RAG_PROMPT_TEMPLATE.format(context=context, question=question)
|
| 104 |
+
messages = _chat_messages(SYSTEM_PROMPT, user_content)
|
| 105 |
+
|
| 106 |
+
# ββ Generate answer ββββββββββββββββββββββββββββββ
|
| 107 |
+
try:
|
| 108 |
+
response = client.chat_completion(
|
| 109 |
+
messages=messages,
|
| 110 |
+
model=settings.LLM_MODEL,
|
| 111 |
+
max_tokens=settings.LLM_MAX_NEW_TOKENS,
|
| 112 |
+
temperature=settings.LLM_TEMPERATURE,
|
| 113 |
+
)
|
| 114 |
+
answer = response.choices[0].message.content.strip()
|
| 115 |
+
except Exception as e:
|
| 116 |
+
logger.error(f"LLM generation error: {e}")
|
| 117 |
+
answer = f"I encountered an error generating a response. Please try again. Error: {str(e)}"
|
| 118 |
+
|
| 119 |
+
# ββ Format sources βββββββββββββββββββββββββββββββ
|
| 120 |
+
sources = [
|
| 121 |
+
{
|
| 122 |
+
"text": chunk["text"][:300] + ("..." if len(chunk["text"]) > 300 else ""),
|
| 123 |
+
"filename": chunk["filename"],
|
| 124 |
+
"page": chunk["page"],
|
| 125 |
+
"score": chunk["score"],
|
| 126 |
+
"confidence": chunk["confidence"],
|
| 127 |
+
}
|
| 128 |
+
for chunk in chunks
|
| 129 |
+
]
|
| 130 |
+
|
| 131 |
+
return {"answer": answer, "sources": sources}
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def generate_answer_stream(
|
| 135 |
+
question: str,
|
| 136 |
+
user_id: str,
|
| 137 |
+
document_id: Optional[str] = None,
|
| 138 |
+
) -> Generator[str, None, None]:
|
| 139 |
+
"""
|
| 140 |
+
Streaming RAG pipeline β yields SSE-formatted chunks.
|
| 141 |
+
First yields sources, then streams answer tokens.
|
| 142 |
+
"""
|
| 143 |
+
client = get_llm_client()
|
| 144 |
+
|
| 145 |
+
# ββ Handle greetings βββββββββββββββββββββββββββββ
|
| 146 |
+
if is_greeting(question):
|
| 147 |
+
yield f"data: {json.dumps({'type': 'sources', 'data': []})}\n\n"
|
| 148 |
+
|
| 149 |
+
try:
|
| 150 |
+
messages = _chat_messages(
|
| 151 |
+
"You are Document AI Analyst, a friendly AI assistant for document analysis.",
|
| 152 |
+
question,
|
| 153 |
+
)
|
| 154 |
+
stream = client.chat_completion(
|
| 155 |
+
messages=messages,
|
| 156 |
+
model=settings.LLM_MODEL,
|
| 157 |
+
max_tokens=256,
|
| 158 |
+
temperature=0.7,
|
| 159 |
+
stream=True,
|
| 160 |
+
)
|
| 161 |
+
for chunk in stream:
|
| 162 |
+
delta = chunk.choices[0].delta.content
|
| 163 |
+
if delta:
|
| 164 |
+
yield f"data: {json.dumps({'type': 'token', 'data': delta})}\n\n"
|
| 165 |
+
except Exception as e:
|
| 166 |
+
yield f"data: {json.dumps({'type': 'error', 'data': str(e)})}\n\n"
|
| 167 |
+
|
| 168 |
+
yield f"data: {json.dumps({'type': 'done'})}\n\n"
|
| 169 |
+
return
|
| 170 |
+
|
| 171 |
+
# ββ Retrieve relevant chunks βββββββββββββββββββββ
|
| 172 |
+
chunks = retrieve(
|
| 173 |
+
query=question,
|
| 174 |
+
user_id=user_id,
|
| 175 |
+
document_id=document_id,
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
# ββ Yield sources first ββββββββββββββββββββββββββ
|
| 179 |
+
sources = [
|
| 180 |
+
{
|
| 181 |
+
"text": chunk["text"][:300] + ("..." if len(chunk["text"]) > 300 else ""),
|
| 182 |
+
"filename": chunk["filename"],
|
| 183 |
+
"page": chunk["page"],
|
| 184 |
+
"score": chunk["score"],
|
| 185 |
+
"confidence": chunk["confidence"],
|
| 186 |
+
}
|
| 187 |
+
for chunk in chunks
|
| 188 |
+
]
|
| 189 |
+
yield f"data: {json.dumps({'type': 'sources', 'data': sources})}\n\n"
|
| 190 |
+
|
| 191 |
+
# ββ Build prompt βββββββββββββββββββββββββββββββββ
|
| 192 |
+
context = build_context(chunks)
|
| 193 |
+
user_content = RAG_PROMPT_TEMPLATE.format(context=context, question=question)
|
| 194 |
+
messages = _chat_messages(SYSTEM_PROMPT, user_content)
|
| 195 |
+
|
| 196 |
+
# ββ Stream answer tokens βββββββββββββββββββββββββ
|
| 197 |
+
try:
|
| 198 |
+
stream = client.chat_completion(
|
| 199 |
+
messages=messages,
|
| 200 |
+
model=settings.LLM_MODEL,
|
| 201 |
+
max_tokens=settings.LLM_MAX_NEW_TOKENS,
|
| 202 |
+
temperature=settings.LLM_TEMPERATURE,
|
| 203 |
+
stream=True,
|
| 204 |
+
)
|
| 205 |
+
for chunk in stream:
|
| 206 |
+
delta = chunk.choices[0].delta.content
|
| 207 |
+
if delta:
|
| 208 |
+
yield f"data: {json.dumps({'type': 'token', 'data': delta})}\n\n"
|
| 209 |
+
|
| 210 |
+
except Exception as e:
|
| 211 |
+
logger.error(f"LLM streaming error: {e}")
|
| 212 |
+
yield f"data: {json.dumps({'type': 'error', 'data': str(e)})}\n\n"
|
| 213 |
+
|
| 214 |
+
yield f"data: {json.dumps({'type': 'done'})}\n\n"
|
backend/app/rag/chunker.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Smart document chunking using LangChain's RecursiveCharacterTextSplitter.
|
| 3 |
+
Supports PDF, DOCX, TXT, and Markdown files with page-level metadata.
|
| 4 |
+
"""
|
| 5 |
+
import fitz # PyMuPDF
|
| 6 |
+
import docx
|
| 7 |
+
from typing import List, Dict, Any
|
| 8 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 9 |
+
from app.config import get_settings
|
| 10 |
+
|
| 11 |
+
settings = get_settings()
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def extract_pdf(filepath: str) -> List[Dict[str, Any]]:
|
| 15 |
+
"""Extract text from PDF with page numbers."""
|
| 16 |
+
doc = fitz.open(filepath)
|
| 17 |
+
pages = []
|
| 18 |
+
|
| 19 |
+
for page_num, page in enumerate(doc):
|
| 20 |
+
text = page.get_text()
|
| 21 |
+
if text.strip():
|
| 22 |
+
pages.append({
|
| 23 |
+
"text": text,
|
| 24 |
+
"page": page_num + 1,
|
| 25 |
+
})
|
| 26 |
+
|
| 27 |
+
doc.close()
|
| 28 |
+
return pages
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def extract_docx(filepath: str) -> List[Dict[str, Any]]:
|
| 32 |
+
"""Extract text from DOCX files."""
|
| 33 |
+
doc = docx.Document(filepath)
|
| 34 |
+
full_text = "\n".join([para.text for para in doc.paragraphs if para.text.strip()])
|
| 35 |
+
|
| 36 |
+
return [{"text": full_text, "page": 1}] if full_text else []
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def extract_txt(filepath: str) -> List[Dict[str, Any]]:
|
| 40 |
+
"""Extract text from TXT/Markdown files."""
|
| 41 |
+
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
|
| 42 |
+
text = f.read()
|
| 43 |
+
|
| 44 |
+
return [{"text": text, "page": 1}] if text.strip() else []
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def chunk_document(filepath: str) -> List[Dict[str, Any]]:
|
| 48 |
+
"""
|
| 49 |
+
Load a document, extract text per page, and split into semantic chunks.
|
| 50 |
+
Returns list of dicts with 'text', 'page', and 'chunk_index'.
|
| 51 |
+
"""
|
| 52 |
+
ext = filepath.rsplit(".", 1)[-1].lower()
|
| 53 |
+
|
| 54 |
+
# ββ Extract text by file type ββββββββββββββββββββ
|
| 55 |
+
if ext == "pdf":
|
| 56 |
+
pages = extract_pdf(filepath)
|
| 57 |
+
elif ext == "docx":
|
| 58 |
+
pages = extract_docx(filepath)
|
| 59 |
+
elif ext in ("txt", "md"):
|
| 60 |
+
pages = extract_txt(filepath)
|
| 61 |
+
else:
|
| 62 |
+
raise ValueError(f"Unsupported file type: {ext}")
|
| 63 |
+
|
| 64 |
+
if not pages:
|
| 65 |
+
return []
|
| 66 |
+
|
| 67 |
+
# ββ LangChain recursive splitter βββββββββββββββββ
|
| 68 |
+
splitter = RecursiveCharacterTextSplitter(
|
| 69 |
+
chunk_size=settings.CHUNK_SIZE,
|
| 70 |
+
chunk_overlap=settings.CHUNK_OVERLAP,
|
| 71 |
+
separators=["\n\n", "\n", ". ", " ", ""],
|
| 72 |
+
length_function=len,
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
all_chunks = []
|
| 76 |
+
chunk_index = 0
|
| 77 |
+
|
| 78 |
+
for page_data in pages:
|
| 79 |
+
text = page_data["text"]
|
| 80 |
+
page_num = page_data["page"]
|
| 81 |
+
|
| 82 |
+
# Split this page's text
|
| 83 |
+
splits = splitter.split_text(text)
|
| 84 |
+
|
| 85 |
+
for split_text in splits:
|
| 86 |
+
if split_text.strip():
|
| 87 |
+
all_chunks.append({
|
| 88 |
+
"text": split_text.strip(),
|
| 89 |
+
"page": page_num,
|
| 90 |
+
"chunk_index": chunk_index,
|
| 91 |
+
})
|
| 92 |
+
chunk_index += 1
|
| 93 |
+
|
| 94 |
+
return all_chunks
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def get_page_count(filepath: str) -> int:
|
| 98 |
+
"""Get total page count of a document."""
|
| 99 |
+
ext = filepath.rsplit(".", 1)[-1].lower()
|
| 100 |
+
|
| 101 |
+
if ext == "pdf":
|
| 102 |
+
doc = fitz.open(filepath)
|
| 103 |
+
count = len(doc)
|
| 104 |
+
doc.close()
|
| 105 |
+
return count
|
| 106 |
+
|
| 107 |
+
return 1 # DOCX, TXT, MD are treated as single-page
|
backend/app/rag/embeddings.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
HuggingFace local embeddings using sentence-transformers.
|
| 3 |
+
Loads the model once via singleton pattern for efficiency.
|
| 4 |
+
"""
|
| 5 |
+
import logging
|
| 6 |
+
from typing import List
|
| 7 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 8 |
+
from app.config import get_settings
|
| 9 |
+
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
settings = get_settings()
|
| 12 |
+
|
| 13 |
+
# ββ Singleton embedding model ββββββββββββββββββββββββ
|
| 14 |
+
_embedding_model = None
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def get_embedding_model() -> HuggingFaceEmbeddings:
|
| 18 |
+
"""
|
| 19 |
+
Get or create the embedding model (singleton).
|
| 20 |
+
Uses sentence-transformers/all-MiniLM-L6-v2 β lightweight 384-dim model.
|
| 21 |
+
"""
|
| 22 |
+
global _embedding_model
|
| 23 |
+
|
| 24 |
+
if _embedding_model is None:
|
| 25 |
+
logger.info(f"Loading embedding model: {settings.EMBEDDING_MODEL}")
|
| 26 |
+
_embedding_model = HuggingFaceEmbeddings(
|
| 27 |
+
model_name=settings.EMBEDDING_MODEL,
|
| 28 |
+
model_kwargs={"device": "cpu"},
|
| 29 |
+
encode_kwargs={"normalize_embeddings": True, "batch_size": 32},
|
| 30 |
+
)
|
| 31 |
+
logger.info("Embedding model loaded successfully")
|
| 32 |
+
|
| 33 |
+
return _embedding_model
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def embed_texts(texts: List[str]) -> List[List[float]]:
|
| 37 |
+
"""Embed a batch of texts into vectors."""
|
| 38 |
+
model = get_embedding_model()
|
| 39 |
+
return model.embed_documents(texts)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def embed_query(query: str) -> List[float]:
|
| 43 |
+
"""Embed a single query string."""
|
| 44 |
+
model = get_embedding_model()
|
| 45 |
+
return model.embed_query(query)
|
backend/app/rag/prompts.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Prompt templates for the RAG agent.
|
| 3 |
+
Enforces citation format and chain-of-thought reasoning.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
SYSTEM_PROMPT = """You are Document AI Analyst, an expert AI assistant specialized in analyzing documents and providing accurate, well-cited answers.
|
| 7 |
+
|
| 8 |
+
IMPORTANT RULES:
|
| 9 |
+
1. Answer ONLY based on the provided document context. Do not use external knowledge.
|
| 10 |
+
2. Always cite your sources using this exact format: [Source: filename, Page X]
|
| 11 |
+
3. If the context doesn't contain enough information to answer, say: "I couldn't find sufficient information in the uploaded documents to answer this question."
|
| 12 |
+
4. Be precise, clear, and well-structured in your responses.
|
| 13 |
+
5. Use bullet points and formatting when listing multiple items.
|
| 14 |
+
6. For numerical data or key facts, quote the relevant text directly.
|
| 15 |
+
|
| 16 |
+
FORMATTING:
|
| 17 |
+
- Use **bold** for key terms and important findings
|
| 18 |
+
- Use bullet points for lists
|
| 19 |
+
- Use > blockquotes for direct quotes from documents
|
| 20 |
+
- Include citations inline with your answer"""
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
RAG_PROMPT_TEMPLATE = """Based on the following document excerpts, answer the user's question accurately and cite your sources.
|
| 24 |
+
|
| 25 |
+
## Document Context
|
| 26 |
+
|
| 27 |
+
{context}
|
| 28 |
+
|
| 29 |
+
## User Question
|
| 30 |
+
|
| 31 |
+
{question}
|
| 32 |
+
|
| 33 |
+
## Instructions
|
| 34 |
+
|
| 35 |
+
Provide a comprehensive answer based strictly on the document context above. Include inline citations using [Source: filename, Page X] format for every claim you make. If the documents don't contain relevant information, clearly state that.
|
| 36 |
+
|
| 37 |
+
## Answer
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
GREETING_PROMPT = """You are Document AI Analyst, a friendly and professional AI assistant. The user has greeted you or asked a general question not related to any specific document.
|
| 42 |
+
|
| 43 |
+
Respond naturally and briefly. Let them know you can help them:
|
| 44 |
+
- Upload and analyze PDF, DOCX, TXT, and Markdown documents
|
| 45 |
+
- Answer questions about their uploaded documents
|
| 46 |
+
- Extract key insights, summaries, and specific data points
|
| 47 |
+
- Provide accurate citations with page numbers
|
| 48 |
+
|
| 49 |
+
Keep the response concise and friendly.
|
| 50 |
+
|
| 51 |
+
User: {question}
|
| 52 |
+
|
| 53 |
+
Response:"""
|
backend/app/rag/retriever.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Two-stage retrieval: ChromaDB similarity search + cross-encoder reranking.
|
| 3 |
+
"""
|
| 4 |
+
import logging
|
| 5 |
+
from typing import List, Dict, Any, Optional
|
| 6 |
+
from app.config import get_settings
|
| 7 |
+
from app.rag.embeddings import embed_query
|
| 8 |
+
from app.rag.vectorstore import query_chunks
|
| 9 |
+
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
settings = get_settings()
|
| 12 |
+
|
| 13 |
+
# ββ Singleton reranker βββββββββββββββββββββββββββββββ
|
| 14 |
+
_reranker = None
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def get_reranker():
|
| 18 |
+
"""Load cross-encoder reranker model (singleton)."""
|
| 19 |
+
global _reranker
|
| 20 |
+
|
| 21 |
+
if _reranker is None:
|
| 22 |
+
try:
|
| 23 |
+
from sentence_transformers import CrossEncoder
|
| 24 |
+
logger.info(f"Loading reranker: {settings.RERANKER_MODEL}")
|
| 25 |
+
_reranker = CrossEncoder(settings.RERANKER_MODEL, max_length=512)
|
| 26 |
+
logger.info("Reranker loaded successfully")
|
| 27 |
+
except Exception as e:
|
| 28 |
+
logger.warning(f"Failed to load reranker: {e}. Falling back to embedding-only retrieval.")
|
| 29 |
+
_reranker = "disabled"
|
| 30 |
+
|
| 31 |
+
return _reranker if _reranker != "disabled" else None
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def retrieve(
|
| 35 |
+
query: str,
|
| 36 |
+
user_id: str,
|
| 37 |
+
document_id: Optional[str] = None,
|
| 38 |
+
) -> List[Dict[str, Any]]:
|
| 39 |
+
"""
|
| 40 |
+
Two-stage retrieval pipeline:
|
| 41 |
+
1. ChromaDB similarity search (top-K broad)
|
| 42 |
+
2. Cross-encoder reranking (top-K refined)
|
| 43 |
+
|
| 44 |
+
Returns chunks with confidence scores.
|
| 45 |
+
"""
|
| 46 |
+
# ββ Stage 1: Embedding search ββββββββββββββββββββ
|
| 47 |
+
query_vector = embed_query(query)
|
| 48 |
+
candidates = query_chunks(
|
| 49 |
+
query_embedding=query_vector,
|
| 50 |
+
user_id=user_id,
|
| 51 |
+
document_id=document_id,
|
| 52 |
+
top_k=settings.TOP_K_RETRIEVAL,
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
if not candidates:
|
| 56 |
+
return []
|
| 57 |
+
|
| 58 |
+
# ββ Stage 2: Cross-encoder reranking βββββββββββββ
|
| 59 |
+
reranker = get_reranker()
|
| 60 |
+
|
| 61 |
+
if reranker is not None and len(candidates) > 1:
|
| 62 |
+
try:
|
| 63 |
+
# Build query-document pairs for reranking
|
| 64 |
+
pairs = [(query, chunk["text"]) for chunk in candidates]
|
| 65 |
+
rerank_scores = reranker.predict(pairs)
|
| 66 |
+
|
| 67 |
+
# Assign rerank scores
|
| 68 |
+
for i, chunk in enumerate(candidates):
|
| 69 |
+
chunk["rerank_score"] = float(rerank_scores[i])
|
| 70 |
+
|
| 71 |
+
# Sort by rerank score (descending)
|
| 72 |
+
candidates.sort(key=lambda x: x.get("rerank_score", 0), reverse=True)
|
| 73 |
+
|
| 74 |
+
except Exception as e:
|
| 75 |
+
logger.warning(f"Reranking failed, using embedding scores: {e}")
|
| 76 |
+
|
| 77 |
+
# ββ Take top-K after reranking βββββββββββββββββββ
|
| 78 |
+
top_chunks = candidates[:settings.TOP_K_RERANK]
|
| 79 |
+
|
| 80 |
+
# ββ Calculate confidence percentages βββββββββββββ
|
| 81 |
+
if top_chunks:
|
| 82 |
+
max_score = max(
|
| 83 |
+
chunk.get("rerank_score", chunk.get("score", 0))
|
| 84 |
+
for chunk in top_chunks
|
| 85 |
+
)
|
| 86 |
+
max_score = max(max_score, 0.001) # Avoid division by zero
|
| 87 |
+
|
| 88 |
+
for chunk in top_chunks:
|
| 89 |
+
raw = chunk.get("rerank_score", chunk.get("score", 0))
|
| 90 |
+
chunk["confidence"] = round((raw / max_score) * 100, 1)
|
| 91 |
+
# Clean up internal score
|
| 92 |
+
if "rerank_score" in chunk:
|
| 93 |
+
chunk["score"] = round(chunk["rerank_score"], 4)
|
| 94 |
+
del chunk["rerank_score"]
|
| 95 |
+
|
| 96 |
+
return top_chunks
|
backend/app/rag/vectorstore.py
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ChromaDB vector store operations.
|
| 3 |
+
Per-user collections for data isolation.
|
| 4 |
+
"""
|
| 5 |
+
import logging
|
| 6 |
+
from typing import List, Dict, Any, Optional
|
| 7 |
+
import chromadb
|
| 8 |
+
from chromadb.config import Settings as ChromaSettings
|
| 9 |
+
from app.config import get_settings
|
| 10 |
+
from app.rag.embeddings import get_embedding_model
|
| 11 |
+
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
settings = get_settings()
|
| 14 |
+
|
| 15 |
+
# ββ Singleton ChromaDB client ββββββββββββββββββββββββ
|
| 16 |
+
_chroma_client = None
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def get_chroma_client() -> chromadb.ClientAPI:
|
| 20 |
+
"""Get or create persistent ChromaDB client."""
|
| 21 |
+
global _chroma_client
|
| 22 |
+
|
| 23 |
+
if _chroma_client is None:
|
| 24 |
+
import os
|
| 25 |
+
os.makedirs(settings.CHROMA_PERSIST_DIR, exist_ok=True)
|
| 26 |
+
|
| 27 |
+
_chroma_client = chromadb.PersistentClient(
|
| 28 |
+
path=settings.CHROMA_PERSIST_DIR,
|
| 29 |
+
settings=ChromaSettings(anonymized_telemetry=False),
|
| 30 |
+
)
|
| 31 |
+
logger.info(f"ChromaDB initialized at {settings.CHROMA_PERSIST_DIR}")
|
| 32 |
+
|
| 33 |
+
return _chroma_client
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def get_collection_name(user_id: str) -> str:
|
| 37 |
+
"""Generate a valid collection name for a user."""
|
| 38 |
+
# ChromaDB collection names must be 3-63 chars, alphanumeric + underscores
|
| 39 |
+
clean_id = user_id.replace("-", "_")
|
| 40 |
+
name = f"user_{clean_id}"
|
| 41 |
+
# Truncate if too long
|
| 42 |
+
return name[:63]
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def store_chunks(
|
| 46 |
+
chunks: List[Dict[str, Any]],
|
| 47 |
+
document_id: str,
|
| 48 |
+
filename: str,
|
| 49 |
+
user_id: str,
|
| 50 |
+
) -> int:
|
| 51 |
+
"""
|
| 52 |
+
Embed and store document chunks in ChromaDB.
|
| 53 |
+
Returns the number of chunks stored.
|
| 54 |
+
"""
|
| 55 |
+
if not chunks:
|
| 56 |
+
return 0
|
| 57 |
+
|
| 58 |
+
client = get_chroma_client()
|
| 59 |
+
embedding_model = get_embedding_model()
|
| 60 |
+
|
| 61 |
+
collection_name = get_collection_name(user_id)
|
| 62 |
+
collection = client.get_or_create_collection(
|
| 63 |
+
name=collection_name,
|
| 64 |
+
metadata={"hnsw:space": "cosine"},
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
# ββ Prepare batch data βββββββββββββββββββββββββββ
|
| 68 |
+
texts = [chunk["text"] for chunk in chunks]
|
| 69 |
+
ids = [f"{document_id}_{chunk['chunk_index']}" for chunk in chunks]
|
| 70 |
+
metadatas = [
|
| 71 |
+
{
|
| 72 |
+
"text": chunk["text"],
|
| 73 |
+
"filename": filename,
|
| 74 |
+
"document_id": document_id,
|
| 75 |
+
"page": chunk["page"],
|
| 76 |
+
"chunk_index": chunk["chunk_index"],
|
| 77 |
+
}
|
| 78 |
+
for chunk in chunks
|
| 79 |
+
]
|
| 80 |
+
|
| 81 |
+
# ββ Embed and upsert in batches ββββββββββββββββββ
|
| 82 |
+
batch_size = 50
|
| 83 |
+
total_stored = 0
|
| 84 |
+
|
| 85 |
+
for i in range(0, len(texts), batch_size):
|
| 86 |
+
batch_texts = texts[i:i + batch_size]
|
| 87 |
+
batch_ids = ids[i:i + batch_size]
|
| 88 |
+
batch_metadatas = metadatas[i:i + batch_size]
|
| 89 |
+
|
| 90 |
+
# Generate embeddings
|
| 91 |
+
embeddings = embedding_model.embed_documents(batch_texts)
|
| 92 |
+
|
| 93 |
+
collection.add(
|
| 94 |
+
ids=batch_ids,
|
| 95 |
+
embeddings=embeddings,
|
| 96 |
+
metadatas=batch_metadatas,
|
| 97 |
+
documents=batch_texts,
|
| 98 |
+
)
|
| 99 |
+
total_stored += len(batch_texts)
|
| 100 |
+
|
| 101 |
+
logger.info(f"Stored {total_stored} chunks for document {document_id}")
|
| 102 |
+
return total_stored
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def query_chunks(
|
| 106 |
+
query_embedding: List[float],
|
| 107 |
+
user_id: str,
|
| 108 |
+
document_id: Optional[str] = None,
|
| 109 |
+
top_k: int = 10,
|
| 110 |
+
) -> List[Dict[str, Any]]:
|
| 111 |
+
"""
|
| 112 |
+
Query ChromaDB for relevant chunks.
|
| 113 |
+
Returns list of dicts with text, metadata, and distance.
|
| 114 |
+
"""
|
| 115 |
+
client = get_chroma_client()
|
| 116 |
+
collection_name = get_collection_name(user_id)
|
| 117 |
+
|
| 118 |
+
try:
|
| 119 |
+
collection = client.get_collection(name=collection_name)
|
| 120 |
+
except Exception:
|
| 121 |
+
logger.warning(f"Collection {collection_name} not found")
|
| 122 |
+
return []
|
| 123 |
+
|
| 124 |
+
# ββ Build filter βββββββββββββββββββββββββββββββββ
|
| 125 |
+
where_filter = None
|
| 126 |
+
if document_id:
|
| 127 |
+
where_filter = {"document_id": {"$eq": document_id}}
|
| 128 |
+
|
| 129 |
+
# ββ Query ββββββββββββββββββββββββββββββββββββββββ
|
| 130 |
+
results = collection.query(
|
| 131 |
+
query_embeddings=[query_embedding],
|
| 132 |
+
n_results=top_k,
|
| 133 |
+
where=where_filter,
|
| 134 |
+
include=["documents", "metadatas", "distances"],
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
# ββ Format results βββββββββββββββββββββββββββββββ
|
| 138 |
+
chunks = []
|
| 139 |
+
if results and results["documents"] and results["documents"][0]:
|
| 140 |
+
for i, doc in enumerate(results["documents"][0]):
|
| 141 |
+
metadata = results["metadatas"][0][i] if results["metadatas"] else {}
|
| 142 |
+
distance = results["distances"][0][i] if results["distances"] else 0
|
| 143 |
+
|
| 144 |
+
# Convert cosine distance to similarity score (0-1)
|
| 145 |
+
similarity = 1 - distance
|
| 146 |
+
|
| 147 |
+
chunks.append({
|
| 148 |
+
"text": doc,
|
| 149 |
+
"filename": metadata.get("filename", ""),
|
| 150 |
+
"document_id": metadata.get("document_id", ""),
|
| 151 |
+
"page": metadata.get("page", 1),
|
| 152 |
+
"score": round(similarity, 4),
|
| 153 |
+
})
|
| 154 |
+
|
| 155 |
+
return chunks
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def delete_document_chunks(document_id: str, user_id: str):
|
| 159 |
+
"""Delete all chunks for a specific document."""
|
| 160 |
+
client = get_chroma_client()
|
| 161 |
+
collection_name = get_collection_name(user_id)
|
| 162 |
+
|
| 163 |
+
try:
|
| 164 |
+
collection = client.get_collection(name=collection_name)
|
| 165 |
+
# Get all IDs for this document
|
| 166 |
+
results = collection.get(
|
| 167 |
+
where={"document_id": {"$eq": document_id}},
|
| 168 |
+
include=[],
|
| 169 |
+
)
|
| 170 |
+
if results["ids"]:
|
| 171 |
+
collection.delete(ids=results["ids"])
|
| 172 |
+
logger.info(f"Deleted {len(results['ids'])} chunks for document {document_id}")
|
| 173 |
+
except Exception as e:
|
| 174 |
+
logger.warning(f"Error deleting chunks: {e}")
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def delete_user_collection(user_id: str):
|
| 178 |
+
"""Delete entire collection for a user."""
|
| 179 |
+
client = get_chroma_client()
|
| 180 |
+
collection_name = get_collection_name(user_id)
|
| 181 |
+
|
| 182 |
+
try:
|
| 183 |
+
client.delete_collection(name=collection_name)
|
| 184 |
+
logger.info(f"Deleted collection {collection_name}")
|
| 185 |
+
except Exception as e:
|
| 186 |
+
logger.warning(f"Error deleting collection: {e}")
|
backend/app/routes/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# API Routes
|
backend/app/routes/auth.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Auth API routes β register, login, and user profile.
|
| 3 |
+
"""
|
| 4 |
+
from fastapi import APIRouter, Depends, HTTPException, status
|
| 5 |
+
from sqlalchemy.orm import Session
|
| 6 |
+
|
| 7 |
+
from app.database import get_db
|
| 8 |
+
from app.models import User
|
| 9 |
+
from app.schemas import UserRegister, UserLogin, TokenResponse, UserResponse
|
| 10 |
+
from app.auth import hash_password, verify_password, create_token, get_current_user
|
| 11 |
+
|
| 12 |
+
router = APIRouter(prefix="/auth", tags=["Authentication"])
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@router.post("/register", response_model=TokenResponse, status_code=status.HTTP_201_CREATED)
|
| 16 |
+
def register(payload: UserRegister, db: Session = Depends(get_db)):
|
| 17 |
+
"""Register a new user account."""
|
| 18 |
+
# Check existing username
|
| 19 |
+
if db.query(User).filter(User.username == payload.username).first():
|
| 20 |
+
raise HTTPException(
|
| 21 |
+
status_code=status.HTTP_409_CONFLICT,
|
| 22 |
+
detail="Username already taken",
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
# Check existing email
|
| 26 |
+
if db.query(User).filter(User.email == payload.email).first():
|
| 27 |
+
raise HTTPException(
|
| 28 |
+
status_code=status.HTTP_409_CONFLICT,
|
| 29 |
+
detail="Email already registered",
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
# Create user
|
| 33 |
+
user = User(
|
| 34 |
+
username=payload.username,
|
| 35 |
+
email=payload.email,
|
| 36 |
+
hashed_password=hash_password(payload.password),
|
| 37 |
+
)
|
| 38 |
+
db.add(user)
|
| 39 |
+
db.commit()
|
| 40 |
+
db.refresh(user)
|
| 41 |
+
|
| 42 |
+
# Generate token
|
| 43 |
+
token = create_token(user.id)
|
| 44 |
+
|
| 45 |
+
return TokenResponse(
|
| 46 |
+
access_token=token,
|
| 47 |
+
user=UserResponse.model_validate(user),
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
@router.post("/login", response_model=TokenResponse)
|
| 52 |
+
def login(payload: UserLogin, db: Session = Depends(get_db)):
|
| 53 |
+
"""Login with email and password."""
|
| 54 |
+
user = db.query(User).filter(User.email == payload.email).first()
|
| 55 |
+
|
| 56 |
+
if not user or not verify_password(payload.password, user.hashed_password):
|
| 57 |
+
raise HTTPException(
|
| 58 |
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 59 |
+
detail="Invalid email or password",
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
token = create_token(user.id)
|
| 63 |
+
|
| 64 |
+
return TokenResponse(
|
| 65 |
+
access_token=token,
|
| 66 |
+
user=UserResponse.model_validate(user),
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
@router.get("/me", response_model=UserResponse)
|
| 71 |
+
def get_me(user: User = Depends(get_current_user)):
|
| 72 |
+
"""Get current authenticated user profile."""
|
| 73 |
+
return UserResponse.model_validate(user)
|
backend/app/routes/chat.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Chat routes β ask questions with RAG, stream responses via SSE, manage history.
|
| 3 |
+
"""
|
| 4 |
+
import json
|
| 5 |
+
import logging
|
| 6 |
+
from typing import Optional
|
| 7 |
+
|
| 8 |
+
from fastapi import APIRouter, Depends, HTTPException
|
| 9 |
+
from fastapi.responses import StreamingResponse
|
| 10 |
+
from sqlalchemy.orm import Session
|
| 11 |
+
|
| 12 |
+
from app.database import get_db
|
| 13 |
+
from app.models import User, ChatMessage, Document
|
| 14 |
+
from app.schemas import ChatRequest, ChatResponse, ChatMessageResponse, ChatHistoryResponse, SourceChunk
|
| 15 |
+
from app.auth import get_current_user
|
| 16 |
+
from app.rag.agent import generate_answer, generate_answer_stream
|
| 17 |
+
|
| 18 |
+
logger = logging.getLogger(__name__)
|
| 19 |
+
|
| 20 |
+
router = APIRouter(prefix="/chat", tags=["Chat"])
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@router.post("/ask", response_model=ChatResponse)
|
| 24 |
+
def ask_question(
|
| 25 |
+
payload: ChatRequest,
|
| 26 |
+
user: User = Depends(get_current_user),
|
| 27 |
+
db: Session = Depends(get_db),
|
| 28 |
+
):
|
| 29 |
+
"""Ask a question with RAG retrieval (non-streaming)."""
|
| 30 |
+
# Validate document exists if specified
|
| 31 |
+
if payload.document_id:
|
| 32 |
+
doc = db.query(Document).filter(
|
| 33 |
+
Document.id == payload.document_id,
|
| 34 |
+
Document.user_id == user.id,
|
| 35 |
+
).first()
|
| 36 |
+
|
| 37 |
+
if not doc:
|
| 38 |
+
raise HTTPException(status_code=404, detail="Document not found")
|
| 39 |
+
|
| 40 |
+
if doc.status != "ready":
|
| 41 |
+
raise HTTPException(
|
| 42 |
+
status_code=400,
|
| 43 |
+
detail=f"Document is still {doc.status}. Please wait for processing to complete.",
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
# Generate answer
|
| 47 |
+
result = generate_answer(
|
| 48 |
+
question=payload.question,
|
| 49 |
+
user_id=user.id,
|
| 50 |
+
document_id=payload.document_id,
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
# Save to chat history
|
| 54 |
+
_save_message(db, user.id, payload.document_id, "user", payload.question)
|
| 55 |
+
_save_message(db, user.id, payload.document_id, "assistant", result["answer"], result["sources"])
|
| 56 |
+
|
| 57 |
+
return ChatResponse(
|
| 58 |
+
answer=result["answer"],
|
| 59 |
+
sources=[SourceChunk(**s) for s in result["sources"]],
|
| 60 |
+
document_id=payload.document_id,
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
@router.post("/ask/stream")
|
| 65 |
+
def ask_question_stream(
|
| 66 |
+
payload: ChatRequest,
|
| 67 |
+
user: User = Depends(get_current_user),
|
| 68 |
+
db: Session = Depends(get_db),
|
| 69 |
+
):
|
| 70 |
+
"""Ask a question with SSE streaming response."""
|
| 71 |
+
# Validate document
|
| 72 |
+
if payload.document_id:
|
| 73 |
+
doc = db.query(Document).filter(
|
| 74 |
+
Document.id == payload.document_id,
|
| 75 |
+
Document.user_id == user.id,
|
| 76 |
+
).first()
|
| 77 |
+
|
| 78 |
+
if not doc:
|
| 79 |
+
raise HTTPException(status_code=404, detail="Document not found")
|
| 80 |
+
|
| 81 |
+
if doc.status != "ready":
|
| 82 |
+
raise HTTPException(
|
| 83 |
+
status_code=400,
|
| 84 |
+
detail=f"Document is still {doc.status}. Please wait for processing to complete.",
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
# Save user message immediately
|
| 88 |
+
_save_message(db, user.id, payload.document_id, "user", payload.question)
|
| 89 |
+
|
| 90 |
+
# Stream response
|
| 91 |
+
def event_stream():
|
| 92 |
+
full_answer = ""
|
| 93 |
+
sources = []
|
| 94 |
+
|
| 95 |
+
for chunk in generate_answer_stream(
|
| 96 |
+
question=payload.question,
|
| 97 |
+
user_id=user.id,
|
| 98 |
+
document_id=payload.document_id,
|
| 99 |
+
):
|
| 100 |
+
yield chunk
|
| 101 |
+
|
| 102 |
+
# Parse to accumulate full answer for history
|
| 103 |
+
try:
|
| 104 |
+
if chunk.startswith("data: "):
|
| 105 |
+
data = json.loads(chunk[6:].strip())
|
| 106 |
+
if data.get("type") == "token":
|
| 107 |
+
full_answer += data.get("data", "")
|
| 108 |
+
elif data.get("type") == "sources":
|
| 109 |
+
sources = data.get("data", [])
|
| 110 |
+
except Exception:
|
| 111 |
+
pass
|
| 112 |
+
|
| 113 |
+
# Save assistant response to history
|
| 114 |
+
from app.database import SessionLocal
|
| 115 |
+
save_db = SessionLocal()
|
| 116 |
+
try:
|
| 117 |
+
_save_message(save_db, user.id, payload.document_id, "assistant", full_answer, sources)
|
| 118 |
+
finally:
|
| 119 |
+
save_db.close()
|
| 120 |
+
|
| 121 |
+
return StreamingResponse(
|
| 122 |
+
event_stream(),
|
| 123 |
+
media_type="text/event-stream",
|
| 124 |
+
headers={
|
| 125 |
+
"Cache-Control": "no-cache",
|
| 126 |
+
"Connection": "keep-alive",
|
| 127 |
+
"X-Accel-Buffering": "no",
|
| 128 |
+
},
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
@router.get("/history/{document_id}", response_model=ChatHistoryResponse)
|
| 133 |
+
def get_chat_history(
|
| 134 |
+
document_id: str,
|
| 135 |
+
user: User = Depends(get_current_user),
|
| 136 |
+
db: Session = Depends(get_db),
|
| 137 |
+
):
|
| 138 |
+
"""Get chat history for a specific document."""
|
| 139 |
+
messages = (
|
| 140 |
+
db.query(ChatMessage)
|
| 141 |
+
.filter(
|
| 142 |
+
ChatMessage.user_id == user.id,
|
| 143 |
+
ChatMessage.document_id == document_id,
|
| 144 |
+
)
|
| 145 |
+
.order_by(ChatMessage.created_at.asc())
|
| 146 |
+
.all()
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
formatted = []
|
| 150 |
+
for msg in messages:
|
| 151 |
+
sources = []
|
| 152 |
+
if msg.sources_json:
|
| 153 |
+
try:
|
| 154 |
+
sources = [SourceChunk(**s) for s in json.loads(msg.sources_json)]
|
| 155 |
+
except Exception:
|
| 156 |
+
pass
|
| 157 |
+
|
| 158 |
+
formatted.append(ChatMessageResponse(
|
| 159 |
+
id=msg.id,
|
| 160 |
+
role=msg.role,
|
| 161 |
+
content=msg.content,
|
| 162 |
+
sources=sources,
|
| 163 |
+
created_at=msg.created_at,
|
| 164 |
+
))
|
| 165 |
+
|
| 166 |
+
return ChatHistoryResponse(messages=formatted, document_id=document_id)
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
@router.delete("/history/{document_id}")
|
| 170 |
+
def clear_chat_history(
|
| 171 |
+
document_id: str,
|
| 172 |
+
user: User = Depends(get_current_user),
|
| 173 |
+
db: Session = Depends(get_db),
|
| 174 |
+
):
|
| 175 |
+
"""Clear chat history for a specific document."""
|
| 176 |
+
db.query(ChatMessage).filter(
|
| 177 |
+
ChatMessage.user_id == user.id,
|
| 178 |
+
ChatMessage.document_id == document_id,
|
| 179 |
+
).delete()
|
| 180 |
+
db.commit()
|
| 181 |
+
|
| 182 |
+
return {"message": "Chat history cleared"}
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def _save_message(
|
| 186 |
+
db: Session,
|
| 187 |
+
user_id: str,
|
| 188 |
+
document_id: Optional[str],
|
| 189 |
+
role: str,
|
| 190 |
+
content: str,
|
| 191 |
+
sources: list = None,
|
| 192 |
+
):
|
| 193 |
+
"""Helper: save a chat message to the database."""
|
| 194 |
+
msg = ChatMessage(
|
| 195 |
+
user_id=user_id,
|
| 196 |
+
document_id=document_id,
|
| 197 |
+
role=role,
|
| 198 |
+
content=content,
|
| 199 |
+
sources_json=json.dumps(sources) if sources else None,
|
| 200 |
+
)
|
| 201 |
+
db.add(msg)
|
| 202 |
+
db.commit()
|
backend/app/routes/documents.py
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Document management routes β upload, list, delete, and serve PDF files.
|
| 3 |
+
Background ingestion via FastAPI BackgroundTasks.
|
| 4 |
+
"""
|
| 5 |
+
import os
|
| 6 |
+
import uuid
|
| 7 |
+
import logging
|
| 8 |
+
from typing import Optional
|
| 9 |
+
|
| 10 |
+
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, BackgroundTasks, status
|
| 11 |
+
from fastapi.responses import FileResponse
|
| 12 |
+
from sqlalchemy.orm import Session
|
| 13 |
+
|
| 14 |
+
from app.database import get_db
|
| 15 |
+
from app.models import User, Document
|
| 16 |
+
from app.schemas import DocumentResponse, DocumentListResponse
|
| 17 |
+
from app.auth import get_current_user
|
| 18 |
+
from app.config import get_settings
|
| 19 |
+
from app.rag.chunker import chunk_document, get_page_count
|
| 20 |
+
from app.rag.vectorstore import store_chunks, delete_document_chunks
|
| 21 |
+
|
| 22 |
+
logger = logging.getLogger(__name__)
|
| 23 |
+
settings = get_settings()
|
| 24 |
+
|
| 25 |
+
router = APIRouter(prefix="/documents", tags=["Documents"])
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def _ingest_document(document_id: str, filepath: str, original_name: str, user_id: str):
|
| 29 |
+
"""
|
| 30 |
+
Background task: chunk document, generate embeddings, store in ChromaDB.
|
| 31 |
+
Updates document status in the database.
|
| 32 |
+
"""
|
| 33 |
+
from app.database import SessionLocal
|
| 34 |
+
|
| 35 |
+
db = SessionLocal()
|
| 36 |
+
try:
|
| 37 |
+
doc = db.query(Document).filter(Document.id == document_id).first()
|
| 38 |
+
if not doc:
|
| 39 |
+
logger.error(f"Document {document_id} not found for ingestion")
|
| 40 |
+
return
|
| 41 |
+
|
| 42 |
+
# Update status to processing
|
| 43 |
+
doc.status = "processing"
|
| 44 |
+
db.commit()
|
| 45 |
+
|
| 46 |
+
# Get page count
|
| 47 |
+
page_count = get_page_count(filepath)
|
| 48 |
+
doc.page_count = page_count
|
| 49 |
+
|
| 50 |
+
# Chunk the document
|
| 51 |
+
chunks = chunk_document(filepath)
|
| 52 |
+
|
| 53 |
+
if not chunks:
|
| 54 |
+
doc.status = "failed"
|
| 55 |
+
doc.error_message = "No text could be extracted from the document"
|
| 56 |
+
db.commit()
|
| 57 |
+
return
|
| 58 |
+
|
| 59 |
+
# Store embeddings in ChromaDB
|
| 60 |
+
chunk_count = store_chunks(
|
| 61 |
+
chunks=chunks,
|
| 62 |
+
document_id=document_id,
|
| 63 |
+
filename=original_name,
|
| 64 |
+
user_id=user_id,
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
# Update document record
|
| 68 |
+
doc.chunk_count = chunk_count
|
| 69 |
+
doc.status = "ready"
|
| 70 |
+
db.commit()
|
| 71 |
+
|
| 72 |
+
logger.info(f"Document {document_id} ingested: {page_count} pages, {chunk_count} chunks")
|
| 73 |
+
|
| 74 |
+
except Exception as e:
|
| 75 |
+
logger.error(f"Ingestion error for {document_id}: {e}")
|
| 76 |
+
try:
|
| 77 |
+
doc = db.query(Document).filter(Document.id == document_id).first()
|
| 78 |
+
if doc:
|
| 79 |
+
doc.status = "failed"
|
| 80 |
+
doc.error_message = str(e)[:500]
|
| 81 |
+
db.commit()
|
| 82 |
+
except Exception:
|
| 83 |
+
pass
|
| 84 |
+
finally:
|
| 85 |
+
db.close()
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
@router.post("/upload", response_model=DocumentResponse, status_code=status.HTTP_201_CREATED)
|
| 89 |
+
async def upload_document(
|
| 90 |
+
background_tasks: BackgroundTasks,
|
| 91 |
+
file: UploadFile = File(...),
|
| 92 |
+
user: User = Depends(get_current_user),
|
| 93 |
+
db: Session = Depends(get_db),
|
| 94 |
+
):
|
| 95 |
+
"""Upload a document for RAG processing."""
|
| 96 |
+
# ββ Validate file type βββββββββββββββββββββββββββ
|
| 97 |
+
if not file.filename:
|
| 98 |
+
raise HTTPException(status_code=400, detail="No filename provided")
|
| 99 |
+
|
| 100 |
+
ext = file.filename.rsplit(".", 1)[-1].lower()
|
| 101 |
+
if ext not in settings.ALLOWED_EXTENSIONS:
|
| 102 |
+
raise HTTPException(
|
| 103 |
+
status_code=400,
|
| 104 |
+
detail=f"File type '.{ext}' not supported. Allowed: {', '.join(settings.ALLOWED_EXTENSIONS)}",
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
# ββ Read and validate size βββββββββββββββββββββββ
|
| 108 |
+
content = await file.read()
|
| 109 |
+
file_size = len(content)
|
| 110 |
+
|
| 111 |
+
if file_size > settings.MAX_FILE_SIZE_MB * 1024 * 1024:
|
| 112 |
+
raise HTTPException(
|
| 113 |
+
status_code=400,
|
| 114 |
+
detail=f"File too large. Maximum size: {settings.MAX_FILE_SIZE_MB}MB",
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
# ββ Save file to disk ββββββββββββββββββββββββββββ
|
| 118 |
+
user_dir = os.path.join(settings.UPLOAD_DIR, user.id)
|
| 119 |
+
os.makedirs(user_dir, exist_ok=True)
|
| 120 |
+
|
| 121 |
+
stored_filename = f"{uuid.uuid4().hex}.{ext}"
|
| 122 |
+
filepath = os.path.join(user_dir, stored_filename)
|
| 123 |
+
|
| 124 |
+
with open(filepath, "wb") as f:
|
| 125 |
+
f.write(content)
|
| 126 |
+
|
| 127 |
+
# ββ Create database record βββββββββββββββββββββββ
|
| 128 |
+
document = Document(
|
| 129 |
+
user_id=user.id,
|
| 130 |
+
filename=stored_filename,
|
| 131 |
+
original_name=file.filename,
|
| 132 |
+
file_size=file_size,
|
| 133 |
+
status="pending",
|
| 134 |
+
)
|
| 135 |
+
db.add(document)
|
| 136 |
+
db.commit()
|
| 137 |
+
db.refresh(document)
|
| 138 |
+
|
| 139 |
+
# ββ Trigger background ingestion βββββββββββββββββ
|
| 140 |
+
background_tasks.add_task(
|
| 141 |
+
_ingest_document,
|
| 142 |
+
document_id=document.id,
|
| 143 |
+
filepath=filepath,
|
| 144 |
+
original_name=file.filename,
|
| 145 |
+
user_id=user.id,
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
return DocumentResponse.model_validate(document)
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
@router.get("/", response_model=DocumentListResponse)
|
| 152 |
+
def list_documents(
|
| 153 |
+
user: User = Depends(get_current_user),
|
| 154 |
+
db: Session = Depends(get_db),
|
| 155 |
+
):
|
| 156 |
+
"""List all documents for the authenticated user."""
|
| 157 |
+
docs = (
|
| 158 |
+
db.query(Document)
|
| 159 |
+
.filter(Document.user_id == user.id)
|
| 160 |
+
.order_by(Document.uploaded_at.desc())
|
| 161 |
+
.all()
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
return DocumentListResponse(
|
| 165 |
+
documents=[DocumentResponse.model_validate(d) for d in docs],
|
| 166 |
+
total=len(docs),
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
@router.get("/{document_id}", response_model=DocumentResponse)
|
| 171 |
+
def get_document(
|
| 172 |
+
document_id: str,
|
| 173 |
+
user: User = Depends(get_current_user),
|
| 174 |
+
db: Session = Depends(get_db),
|
| 175 |
+
):
|
| 176 |
+
"""Get a specific document's details."""
|
| 177 |
+
doc = db.query(Document).filter(
|
| 178 |
+
Document.id == document_id,
|
| 179 |
+
Document.user_id == user.id,
|
| 180 |
+
).first()
|
| 181 |
+
|
| 182 |
+
if not doc:
|
| 183 |
+
raise HTTPException(status_code=404, detail="Document not found")
|
| 184 |
+
|
| 185 |
+
return DocumentResponse.model_validate(doc)
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
@router.get("/{document_id}/pdf")
|
| 189 |
+
def serve_pdf(
|
| 190 |
+
document_id: str,
|
| 191 |
+
user: User = Depends(get_current_user),
|
| 192 |
+
db: Session = Depends(get_db),
|
| 193 |
+
):
|
| 194 |
+
"""Serve the PDF file for the document viewer."""
|
| 195 |
+
doc = db.query(Document).filter(
|
| 196 |
+
Document.id == document_id,
|
| 197 |
+
Document.user_id == user.id,
|
| 198 |
+
).first()
|
| 199 |
+
|
| 200 |
+
if not doc:
|
| 201 |
+
raise HTTPException(status_code=404, detail="Document not found")
|
| 202 |
+
|
| 203 |
+
filepath = os.path.join(settings.UPLOAD_DIR, user.id, doc.filename)
|
| 204 |
+
|
| 205 |
+
if not os.path.exists(filepath):
|
| 206 |
+
raise HTTPException(status_code=404, detail="File not found on disk")
|
| 207 |
+
|
| 208 |
+
return FileResponse(
|
| 209 |
+
filepath,
|
| 210 |
+
media_type="application/pdf",
|
| 211 |
+
filename=doc.original_name,
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
@router.delete("/{document_id}", status_code=status.HTTP_200_OK)
|
| 216 |
+
def delete_document(
|
| 217 |
+
document_id: str,
|
| 218 |
+
user: User = Depends(get_current_user),
|
| 219 |
+
db: Session = Depends(get_db),
|
| 220 |
+
):
|
| 221 |
+
"""Delete a document and its vector embeddings."""
|
| 222 |
+
doc = db.query(Document).filter(
|
| 223 |
+
Document.id == document_id,
|
| 224 |
+
Document.user_id == user.id,
|
| 225 |
+
).first()
|
| 226 |
+
|
| 227 |
+
if not doc:
|
| 228 |
+
raise HTTPException(status_code=404, detail="Document not found")
|
| 229 |
+
|
| 230 |
+
# Delete file from disk
|
| 231 |
+
filepath = os.path.join(settings.UPLOAD_DIR, user.id, doc.filename)
|
| 232 |
+
if os.path.exists(filepath):
|
| 233 |
+
os.remove(filepath)
|
| 234 |
+
|
| 235 |
+
# Delete vectors from ChromaDB
|
| 236 |
+
try:
|
| 237 |
+
delete_document_chunks(document_id=document_id, user_id=user.id)
|
| 238 |
+
except Exception as e:
|
| 239 |
+
logger.warning(f"Error deleting vectors: {e}")
|
| 240 |
+
|
| 241 |
+
# Delete from database (cascades to chat messages)
|
| 242 |
+
db.delete(doc)
|
| 243 |
+
db.commit()
|
| 244 |
+
|
| 245 |
+
return {"message": f"Document '{doc.original_name}' deleted successfully"}
|
backend/app/schemas.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Pydantic schemas for API request/response validation.
|
| 3 |
+
"""
|
| 4 |
+
from pydantic import BaseModel, EmailStr, Field
|
| 5 |
+
from typing import Optional, List
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
# ββ Auth βββββββββββββββββββββββββββββββββββββββββββββ
|
| 10 |
+
|
| 11 |
+
class UserRegister(BaseModel):
|
| 12 |
+
username: str = Field(..., min_length=3, max_length=80)
|
| 13 |
+
email: EmailStr
|
| 14 |
+
password: str = Field(..., min_length=6)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class UserLogin(BaseModel):
|
| 18 |
+
email: EmailStr
|
| 19 |
+
password: str
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class TokenResponse(BaseModel):
|
| 23 |
+
access_token: str
|
| 24 |
+
token_type: str = "bearer"
|
| 25 |
+
user: "UserResponse"
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class UserResponse(BaseModel):
|
| 29 |
+
id: str
|
| 30 |
+
username: str
|
| 31 |
+
email: str
|
| 32 |
+
is_admin: bool
|
| 33 |
+
created_at: datetime
|
| 34 |
+
|
| 35 |
+
class Config:
|
| 36 |
+
from_attributes = True
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
# ββ Documents ββββββββββββββββββββββββββββββββββββββββ
|
| 40 |
+
|
| 41 |
+
class DocumentResponse(BaseModel):
|
| 42 |
+
id: str
|
| 43 |
+
original_name: str
|
| 44 |
+
file_size: int
|
| 45 |
+
page_count: int
|
| 46 |
+
chunk_count: int
|
| 47 |
+
status: str
|
| 48 |
+
error_message: Optional[str] = None
|
| 49 |
+
uploaded_at: datetime
|
| 50 |
+
|
| 51 |
+
class Config:
|
| 52 |
+
from_attributes = True
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
class DocumentListResponse(BaseModel):
|
| 56 |
+
documents: List[DocumentResponse]
|
| 57 |
+
total: int
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# ββ Chat βββββββββββββββββββββββββββββββββββββββββββββ
|
| 61 |
+
|
| 62 |
+
class ChatRequest(BaseModel):
|
| 63 |
+
question: str = Field(..., min_length=1, max_length=2000)
|
| 64 |
+
document_id: Optional[str] = None
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
class SourceChunk(BaseModel):
|
| 68 |
+
text: str
|
| 69 |
+
filename: str
|
| 70 |
+
page: int
|
| 71 |
+
score: float
|
| 72 |
+
confidence: float
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
class ChatResponse(BaseModel):
|
| 76 |
+
answer: str
|
| 77 |
+
sources: List[SourceChunk] = []
|
| 78 |
+
document_id: Optional[str] = None
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
class ChatMessageResponse(BaseModel):
|
| 82 |
+
id: str
|
| 83 |
+
role: str
|
| 84 |
+
content: str
|
| 85 |
+
sources: List[SourceChunk] = []
|
| 86 |
+
created_at: datetime
|
| 87 |
+
|
| 88 |
+
class Config:
|
| 89 |
+
from_attributes = True
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
class ChatHistoryResponse(BaseModel):
|
| 93 |
+
messages: List[ChatMessageResponse]
|
| 94 |
+
document_id: Optional[str] = None
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
# Rebuild models for forward references
|
| 98 |
+
TokenResponse.model_rebuild()
|
backend/requirements.txt
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ββ Enterprise Agentic RAG System β Backend Dependencies ββ
|
| 2 |
+
|
| 3 |
+
# Core Framework
|
| 4 |
+
fastapi
|
| 5 |
+
uvicorn[standard]
|
| 6 |
+
python-multipart
|
| 7 |
+
|
| 8 |
+
# Database
|
| 9 |
+
sqlalchemy
|
| 10 |
+
aiosqlite
|
| 11 |
+
|
| 12 |
+
# Auth
|
| 13 |
+
pyjwt
|
| 14 |
+
passlib[bcrypt]
|
| 15 |
+
python-dotenv
|
| 16 |
+
|
| 17 |
+
# Config
|
| 18 |
+
pydantic-settings
|
| 19 |
+
pydantic[email]
|
| 20 |
+
|
| 21 |
+
# Document Processing
|
| 22 |
+
PyMuPDF
|
| 23 |
+
python-docx
|
| 24 |
+
|
| 25 |
+
# LangChain & RAG
|
| 26 |
+
langchain
|
| 27 |
+
langchain-community
|
| 28 |
+
langchain-huggingface
|
| 29 |
+
langchain-text-splitters
|
| 30 |
+
|
| 31 |
+
# Embeddings & ML
|
| 32 |
+
sentence-transformers
|
| 33 |
+
transformers
|
| 34 |
+
|
| 35 |
+
# Vector Database
|
| 36 |
+
chromadb
|
| 37 |
+
|
| 38 |
+
# LLM Inference
|
| 39 |
+
huggingface-hub
|
| 40 |
+
|
| 41 |
+
# Production
|
| 42 |
+
gunicorn
|
docker-compose.yml
CHANGED
|
@@ -1,31 +1,19 @@
|
|
| 1 |
version: '3.8'
|
| 2 |
|
| 3 |
services:
|
| 4 |
-
|
| 5 |
build: .
|
| 6 |
ports:
|
| 7 |
-
- "
|
| 8 |
volumes:
|
| 9 |
-
-
|
| 10 |
-
- ./vectorstore:/app/vectorstore
|
| 11 |
environment:
|
| 12 |
-
-
|
| 13 |
-
|
| 14 |
-
-
|
| 15 |
-
-
|
| 16 |
-
-
|
| 17 |
-
|
| 18 |
-
depends_on:
|
| 19 |
-
- mongo
|
| 20 |
-
restart: always
|
| 21 |
-
|
| 22 |
-
mongo:
|
| 23 |
-
image: mongo:6.0
|
| 24 |
-
ports:
|
| 25 |
-
- "27017:27017"
|
| 26 |
-
volumes:
|
| 27 |
-
- mongo_data:/data/db
|
| 28 |
-
restart: always
|
| 29 |
|
| 30 |
volumes:
|
| 31 |
-
|
|
|
|
| 1 |
version: '3.8'
|
| 2 |
|
| 3 |
services:
|
| 4 |
+
app:
|
| 5 |
build: .
|
| 6 |
ports:
|
| 7 |
+
- "7860:7860"
|
| 8 |
volumes:
|
| 9 |
+
- app_data:/app/data
|
|
|
|
| 10 |
environment:
|
| 11 |
+
- SECRET_KEY=${SECRET_KEY:-dev-secret-key-change-me}
|
| 12 |
+
- HF_TOKEN=${HF_TOKEN}
|
| 13 |
+
- DATABASE_URL=sqlite:///./data/app.db
|
| 14 |
+
- UPLOAD_DIR=./data/uploads
|
| 15 |
+
- CHROMA_PERSIST_DIR=./data/chroma_db
|
| 16 |
+
restart: unless-stopped
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
volumes:
|
| 19 |
+
app_data:
|
frontend/.gitignore
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
| 2 |
+
|
| 3 |
+
# dependencies
|
| 4 |
+
/node_modules
|
| 5 |
+
/.pnp
|
| 6 |
+
.pnp.*
|
| 7 |
+
.yarn/*
|
| 8 |
+
!.yarn/patches
|
| 9 |
+
!.yarn/plugins
|
| 10 |
+
!.yarn/releases
|
| 11 |
+
!.yarn/versions
|
| 12 |
+
|
| 13 |
+
# testing
|
| 14 |
+
/coverage
|
| 15 |
+
|
| 16 |
+
# next.js
|
| 17 |
+
/.next/
|
| 18 |
+
/out/
|
| 19 |
+
|
| 20 |
+
# production
|
| 21 |
+
/build
|
| 22 |
+
|
| 23 |
+
# misc
|
| 24 |
+
.DS_Store
|
| 25 |
+
*.pem
|
| 26 |
+
|
| 27 |
+
# debug
|
| 28 |
+
npm-debug.log*
|
| 29 |
+
yarn-debug.log*
|
| 30 |
+
yarn-error.log*
|
| 31 |
+
.pnpm-debug.log*
|
| 32 |
+
|
| 33 |
+
# env files (can opt-in for committing if needed)
|
| 34 |
+
.env*
|
| 35 |
+
|
| 36 |
+
# vercel
|
| 37 |
+
.vercel
|
| 38 |
+
|
| 39 |
+
# typescript
|
| 40 |
+
*.tsbuildinfo
|
| 41 |
+
next-env.d.ts
|
frontend/AGENTS.md
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!-- BEGIN:nextjs-agent-rules -->
|
| 2 |
+
# This is NOT the Next.js you know
|
| 3 |
+
|
| 4 |
+
This version has breaking changes β APIs, conventions, and file structure may all differ from your training data. Read the relevant guide in `node_modules/next/dist/docs/` before writing any code. Heed deprecation notices.
|
| 5 |
+
<!-- END:nextjs-agent-rules -->
|
frontend/CLAUDE.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
@AGENTS.md
|
frontend/README.md
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
|
| 2 |
+
|
| 3 |
+
## Getting Started
|
| 4 |
+
|
| 5 |
+
First, run the development server:
|
| 6 |
+
|
| 7 |
+
```bash
|
| 8 |
+
npm run dev
|
| 9 |
+
# or
|
| 10 |
+
yarn dev
|
| 11 |
+
# or
|
| 12 |
+
pnpm dev
|
| 13 |
+
# or
|
| 14 |
+
bun dev
|
| 15 |
+
```
|
| 16 |
+
|
| 17 |
+
Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
|
| 18 |
+
|
| 19 |
+
You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
|
| 20 |
+
|
| 21 |
+
This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
|
| 22 |
+
|
| 23 |
+
## Learn More
|
| 24 |
+
|
| 25 |
+
To learn more about Next.js, take a look at the following resources:
|
| 26 |
+
|
| 27 |
+
- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
|
| 28 |
+
- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
|
| 29 |
+
|
| 30 |
+
You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
|
| 31 |
+
|
| 32 |
+
## Deploy on Vercel
|
| 33 |
+
|
| 34 |
+
The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
|
| 35 |
+
|
| 36 |
+
Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.
|
frontend/components.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"$schema": "https://ui.shadcn.com/schema.json",
|
| 3 |
+
"style": "base-nova",
|
| 4 |
+
"rsc": true,
|
| 5 |
+
"tsx": true,
|
| 6 |
+
"tailwind": {
|
| 7 |
+
"config": "",
|
| 8 |
+
"css": "src/app/globals.css",
|
| 9 |
+
"baseColor": "neutral",
|
| 10 |
+
"cssVariables": true,
|
| 11 |
+
"prefix": ""
|
| 12 |
+
},
|
| 13 |
+
"iconLibrary": "lucide",
|
| 14 |
+
"rtl": false,
|
| 15 |
+
"aliases": {
|
| 16 |
+
"components": "@/components",
|
| 17 |
+
"utils": "@/lib/utils",
|
| 18 |
+
"ui": "@/components/ui",
|
| 19 |
+
"lib": "@/lib",
|
| 20 |
+
"hooks": "@/hooks"
|
| 21 |
+
},
|
| 22 |
+
"menuColor": "default",
|
| 23 |
+
"menuAccent": "subtle",
|
| 24 |
+
"registries": {}
|
| 25 |
+
}
|
frontend/eslint.config.mjs
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { defineConfig, globalIgnores } from "eslint/config";
|
| 2 |
+
import nextVitals from "eslint-config-next/core-web-vitals";
|
| 3 |
+
import nextTs from "eslint-config-next/typescript";
|
| 4 |
+
|
| 5 |
+
const eslintConfig = defineConfig([
|
| 6 |
+
...nextVitals,
|
| 7 |
+
...nextTs,
|
| 8 |
+
// Override default ignores of eslint-config-next.
|
| 9 |
+
globalIgnores([
|
| 10 |
+
// Default ignores of eslint-config-next:
|
| 11 |
+
".next/**",
|
| 12 |
+
"out/**",
|
| 13 |
+
"build/**",
|
| 14 |
+
"next-env.d.ts",
|
| 15 |
+
]),
|
| 16 |
+
]);
|
| 17 |
+
|
| 18 |
+
export default eslintConfig;
|
frontend/next.config.ts
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import type { NextConfig } from "next";
|
| 2 |
+
|
| 3 |
+
const nextConfig: NextConfig = {
|
| 4 |
+
output: "export",
|
| 5 |
+
images: { unoptimized: true },
|
| 6 |
+
// Turbopack config (Next.js 16 default bundler)
|
| 7 |
+
turbopack: {},
|
| 8 |
+
};
|
| 9 |
+
|
| 10 |
+
export default nextConfig;
|
frontend/package-lock.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
frontend/package.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"name": "frontend",
|
| 3 |
+
"version": "0.1.0",
|
| 4 |
+
"private": true,
|
| 5 |
+
"scripts": {
|
| 6 |
+
"dev": "next dev",
|
| 7 |
+
"build": "next build",
|
| 8 |
+
"start": "next start",
|
| 9 |
+
"lint": "eslint"
|
| 10 |
+
},
|
| 11 |
+
"dependencies": {
|
| 12 |
+
"@base-ui/react": "^1.4.1",
|
| 13 |
+
"class-variance-authority": "^0.7.1",
|
| 14 |
+
"clsx": "^2.1.1",
|
| 15 |
+
"lucide-react": "^1.8.0",
|
| 16 |
+
"next": "16.2.4",
|
| 17 |
+
"pdfjs-dist": "^5.6.205",
|
| 18 |
+
"react": "19.2.4",
|
| 19 |
+
"react-dom": "19.2.4",
|
| 20 |
+
"react-dropzone": "^15.0.0",
|
| 21 |
+
"react-markdown": "^10.1.0",
|
| 22 |
+
"react-pdf": "^10.4.1",
|
| 23 |
+
"remark-gfm": "^4.0.1",
|
| 24 |
+
"shadcn": "^4.3.1",
|
| 25 |
+
"tailwind-merge": "^3.5.0",
|
| 26 |
+
"tw-animate-css": "^1.4.0"
|
| 27 |
+
},
|
| 28 |
+
"devDependencies": {
|
| 29 |
+
"@tailwindcss/postcss": "^4",
|
| 30 |
+
"@types/node": "^20",
|
| 31 |
+
"@types/react": "^19",
|
| 32 |
+
"@types/react-dom": "^19",
|
| 33 |
+
"eslint": "^9",
|
| 34 |
+
"eslint-config-next": "16.2.4",
|
| 35 |
+
"tailwindcss": "^4",
|
| 36 |
+
"typescript": "^5"
|
| 37 |
+
}
|
| 38 |
+
}
|
frontend/postcss.config.mjs
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
const config = {
|
| 2 |
+
plugins: {
|
| 3 |
+
"@tailwindcss/postcss": {},
|
| 4 |
+
},
|
| 5 |
+
};
|
| 6 |
+
|
| 7 |
+
export default config;
|
frontend/public/file.svg
ADDED
|
|
frontend/public/globe.svg
ADDED
|
|
frontend/public/next.svg
ADDED
|
|
frontend/public/vercel.svg
ADDED
|
|
frontend/public/window.svg
ADDED
|
|
frontend/src/app/dashboard/page.tsx
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import { useEffect, useState, useCallback } from "react";
|
| 4 |
+
import { useRouter } from "next/navigation";
|
| 5 |
+
import { useAuth } from "@/lib/auth";
|
| 6 |
+
import { api } from "@/lib/api";
|
| 7 |
+
import Header from "@/components/layout/Header";
|
| 8 |
+
import DocumentSidebar from "@/components/document/DocumentSidebar";
|
| 9 |
+
import ChatPanel from "@/components/chat/ChatPanel";
|
| 10 |
+
import PDFViewer from "@/components/document/PDFViewer";
|
| 11 |
+
|
| 12 |
+
export interface DocInfo {
|
| 13 |
+
id: string;
|
| 14 |
+
original_name: string;
|
| 15 |
+
file_size: number;
|
| 16 |
+
page_count: number;
|
| 17 |
+
chunk_count: number;
|
| 18 |
+
status: string;
|
| 19 |
+
error_message: string | null;
|
| 20 |
+
uploaded_at: string;
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
export default function DashboardPage() {
|
| 24 |
+
const { user, loading } = useAuth();
|
| 25 |
+
const router = useRouter();
|
| 26 |
+
|
| 27 |
+
const [documents, setDocuments] = useState<DocInfo[]>([]);
|
| 28 |
+
const [activeDoc, setActiveDoc] = useState<DocInfo | null>(null);
|
| 29 |
+
const [pdfPage, setPdfPage] = useState(1);
|
| 30 |
+
const [sidebarOpen, setSidebarOpen] = useState(true);
|
| 31 |
+
const [viewerOpen, setViewerOpen] = useState(true);
|
| 32 |
+
|
| 33 |
+
// Auth guard
|
| 34 |
+
useEffect(() => {
|
| 35 |
+
if (!loading && !user) router.replace("/login");
|
| 36 |
+
}, [user, loading, router]);
|
| 37 |
+
|
| 38 |
+
// Load documents
|
| 39 |
+
const loadDocuments = useCallback(async () => {
|
| 40 |
+
try {
|
| 41 |
+
const data = await api.get<{ documents: DocInfo[] }>("/api/v1/documents/");
|
| 42 |
+
setDocuments(data.documents);
|
| 43 |
+
} catch {
|
| 44 |
+
// silently fail
|
| 45 |
+
}
|
| 46 |
+
}, []);
|
| 47 |
+
|
| 48 |
+
useEffect(() => {
|
| 49 |
+
if (user) loadDocuments();
|
| 50 |
+
}, [user, loadDocuments]);
|
| 51 |
+
|
| 52 |
+
// Poll for processing status
|
| 53 |
+
useEffect(() => {
|
| 54 |
+
const hasPending = documents.some(
|
| 55 |
+
(d) => d.status === "pending" || d.status === "processing"
|
| 56 |
+
);
|
| 57 |
+
if (!hasPending) return;
|
| 58 |
+
|
| 59 |
+
const interval = setInterval(loadDocuments, 3000);
|
| 60 |
+
return () => clearInterval(interval);
|
| 61 |
+
}, [documents, loadDocuments]);
|
| 62 |
+
|
| 63 |
+
if (loading || !user) {
|
| 64 |
+
return (
|
| 65 |
+
<div className="min-h-screen flex items-center justify-center">
|
| 66 |
+
<div className="animate-pulse-glow w-12 h-12 rounded-full bg-primary/20" />
|
| 67 |
+
</div>
|
| 68 |
+
);
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
return (
|
| 72 |
+
<div className="h-screen flex flex-col overflow-hidden">
|
| 73 |
+
<Header
|
| 74 |
+
sidebarOpen={sidebarOpen}
|
| 75 |
+
onToggleSidebar={() => setSidebarOpen(!sidebarOpen)}
|
| 76 |
+
viewerOpen={viewerOpen}
|
| 77 |
+
onToggleViewer={() => setViewerOpen(!viewerOpen)}
|
| 78 |
+
/>
|
| 79 |
+
|
| 80 |
+
<div className="flex-1 flex overflow-hidden">
|
| 81 |
+
{/* ββ Left: Document Sidebar ββββββββββββββββ */}
|
| 82 |
+
{sidebarOpen && (
|
| 83 |
+
<div className="w-72 flex-shrink-0 border-r border-border/50 overflow-hidden animate-fade-in-up">
|
| 84 |
+
<DocumentSidebar
|
| 85 |
+
documents={documents}
|
| 86 |
+
activeDoc={activeDoc}
|
| 87 |
+
onSelectDoc={(doc) => {
|
| 88 |
+
setActiveDoc(doc);
|
| 89 |
+
setPdfPage(1);
|
| 90 |
+
}}
|
| 91 |
+
onDocumentsChange={loadDocuments}
|
| 92 |
+
/>
|
| 93 |
+
</div>
|
| 94 |
+
)}
|
| 95 |
+
|
| 96 |
+
{/* ββ Center: Chat Panel βββββββββββββββββββ */}
|
| 97 |
+
<div className="flex-1 min-w-0 flex flex-col">
|
| 98 |
+
<ChatPanel
|
| 99 |
+
activeDoc={activeDoc}
|
| 100 |
+
onCitationClick={(page) => {
|
| 101 |
+
setPdfPage(page);
|
| 102 |
+
if (!viewerOpen) setViewerOpen(true);
|
| 103 |
+
}}
|
| 104 |
+
/>
|
| 105 |
+
</div>
|
| 106 |
+
|
| 107 |
+
{/* ββ Right: PDF Viewer ββββββββββββββββββββ */}
|
| 108 |
+
{viewerOpen && activeDoc && activeDoc.original_name.endsWith(".pdf") && (
|
| 109 |
+
<div className="w-[480px] flex-shrink-0 border-l border-border/50 overflow-hidden animate-fade-in-up">
|
| 110 |
+
<PDFViewer
|
| 111 |
+
documentId={activeDoc.id}
|
| 112 |
+
currentPage={pdfPage}
|
| 113 |
+
onPageChange={setPdfPage}
|
| 114 |
+
totalPages={activeDoc.page_count}
|
| 115 |
+
/>
|
| 116 |
+
</div>
|
| 117 |
+
)}
|
| 118 |
+
</div>
|
| 119 |
+
</div>
|
| 120 |
+
);
|
| 121 |
+
}
|
frontend/src/app/favicon.ico
ADDED
|
|
frontend/src/app/globals.css
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@import "tailwindcss";
|
| 2 |
+
@import "tw-animate-css";
|
| 3 |
+
@import "shadcn/tailwind.css";
|
| 4 |
+
|
| 5 |
+
@custom-variant dark (&:is(.dark *));
|
| 6 |
+
|
| 7 |
+
@theme inline {
|
| 8 |
+
--color-background: var(--background);
|
| 9 |
+
--color-foreground: var(--foreground);
|
| 10 |
+
--font-sans: var(--font-sans);
|
| 11 |
+
--font-mono: var(--font-geist-mono);
|
| 12 |
+
--font-heading: var(--font-sans);
|
| 13 |
+
--color-sidebar-ring: var(--sidebar-ring);
|
| 14 |
+
--color-sidebar-border: var(--sidebar-border);
|
| 15 |
+
--color-sidebar-accent-foreground: var(--sidebar-accent-foreground);
|
| 16 |
+
--color-sidebar-accent: var(--sidebar-accent);
|
| 17 |
+
--color-sidebar-primary-foreground: var(--sidebar-primary-foreground);
|
| 18 |
+
--color-sidebar-primary: var(--sidebar-primary);
|
| 19 |
+
--color-sidebar-foreground: var(--sidebar-foreground);
|
| 20 |
+
--color-sidebar: var(--sidebar);
|
| 21 |
+
--color-chart-5: var(--chart-5);
|
| 22 |
+
--color-chart-4: var(--chart-4);
|
| 23 |
+
--color-chart-3: var(--chart-3);
|
| 24 |
+
--color-chart-2: var(--chart-2);
|
| 25 |
+
--color-chart-1: var(--chart-1);
|
| 26 |
+
--color-ring: var(--ring);
|
| 27 |
+
--color-input: var(--input);
|
| 28 |
+
--color-border: var(--border);
|
| 29 |
+
--color-destructive: var(--destructive);
|
| 30 |
+
--color-accent-foreground: var(--accent-foreground);
|
| 31 |
+
--color-accent: var(--accent);
|
| 32 |
+
--color-muted-foreground: var(--muted-foreground);
|
| 33 |
+
--color-muted: var(--muted);
|
| 34 |
+
--color-secondary-foreground: var(--secondary-foreground);
|
| 35 |
+
--color-secondary: var(--secondary);
|
| 36 |
+
--color-primary-foreground: var(--primary-foreground);
|
| 37 |
+
--color-primary: var(--primary);
|
| 38 |
+
--color-popover-foreground: var(--popover-foreground);
|
| 39 |
+
--color-popover: var(--popover);
|
| 40 |
+
--color-card-foreground: var(--card-foreground);
|
| 41 |
+
--color-card: var(--card);
|
| 42 |
+
--radius-sm: calc(var(--radius) * 0.6);
|
| 43 |
+
--radius-md: calc(var(--radius) * 0.8);
|
| 44 |
+
--radius-lg: var(--radius);
|
| 45 |
+
--radius-xl: calc(var(--radius) * 1.4);
|
| 46 |
+
--radius-2xl: calc(var(--radius) * 1.8);
|
| 47 |
+
--radius-3xl: calc(var(--radius) * 2.2);
|
| 48 |
+
--radius-4xl: calc(var(--radius) * 2.6);
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
:root {
|
| 52 |
+
--background: oklch(0.145 0 0);
|
| 53 |
+
--foreground: oklch(0.985 0 0);
|
| 54 |
+
--card: oklch(0.178 0 0);
|
| 55 |
+
--card-foreground: oklch(0.985 0 0);
|
| 56 |
+
--popover: oklch(0.178 0 0);
|
| 57 |
+
--popover-foreground: oklch(0.985 0 0);
|
| 58 |
+
--primary: oklch(0.65 0.2 265);
|
| 59 |
+
--primary-foreground: oklch(0.985 0 0);
|
| 60 |
+
--secondary: oklch(0.22 0 0);
|
| 61 |
+
--secondary-foreground: oklch(0.985 0 0);
|
| 62 |
+
--muted: oklch(0.22 0 0);
|
| 63 |
+
--muted-foreground: oklch(0.6 0 0);
|
| 64 |
+
--accent: oklch(0.55 0.18 265);
|
| 65 |
+
--accent-foreground: oklch(0.985 0 0);
|
| 66 |
+
--destructive: oklch(0.704 0.191 22.216);
|
| 67 |
+
--border: oklch(1 0 0 / 10%);
|
| 68 |
+
--input: oklch(1 0 0 / 12%);
|
| 69 |
+
--ring: oklch(0.65 0.2 265);
|
| 70 |
+
--chart-1: oklch(0.75 0.18 265);
|
| 71 |
+
--chart-2: oklch(0.65 0.2 160);
|
| 72 |
+
--chart-3: oklch(0.55 0.23 30);
|
| 73 |
+
--chart-4: oklch(0.7 0.15 200);
|
| 74 |
+
--chart-5: oklch(0.6 0.2 300);
|
| 75 |
+
--radius: 0.625rem;
|
| 76 |
+
--sidebar: oklch(0.12 0 0);
|
| 77 |
+
--sidebar-foreground: oklch(0.985 0 0);
|
| 78 |
+
--sidebar-primary: oklch(0.65 0.2 265);
|
| 79 |
+
--sidebar-primary-foreground: oklch(0.985 0 0);
|
| 80 |
+
--sidebar-accent: oklch(0.22 0 0);
|
| 81 |
+
--sidebar-accent-foreground: oklch(0.985 0 0);
|
| 82 |
+
--sidebar-border: oklch(1 0 0 / 8%);
|
| 83 |
+
--sidebar-ring: oklch(0.65 0.2 265);
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
.light {
|
| 87 |
+
--background: oklch(0.985 0 0);
|
| 88 |
+
--foreground: oklch(0.145 0 0);
|
| 89 |
+
--card: oklch(1 0 0);
|
| 90 |
+
--card-foreground: oklch(0.145 0 0);
|
| 91 |
+
--popover: oklch(1 0 0);
|
| 92 |
+
--popover-foreground: oklch(0.145 0 0);
|
| 93 |
+
--primary: oklch(0.55 0.23 265);
|
| 94 |
+
--primary-foreground: oklch(0.985 0 0);
|
| 95 |
+
--secondary: oklch(0.95 0 0);
|
| 96 |
+
--secondary-foreground: oklch(0.205 0 0);
|
| 97 |
+
--muted: oklch(0.95 0 0);
|
| 98 |
+
--muted-foreground: oklch(0.45 0 0);
|
| 99 |
+
--accent: oklch(0.95 0.02 265);
|
| 100 |
+
--accent-foreground: oklch(0.45 0.2 265);
|
| 101 |
+
--destructive: oklch(0.577 0.245 27.325);
|
| 102 |
+
--border: oklch(0 0 0 / 10%);
|
| 103 |
+
--input: oklch(0 0 0 / 8%);
|
| 104 |
+
--ring: oklch(0.55 0.23 265);
|
| 105 |
+
--sidebar: oklch(0.97 0 0);
|
| 106 |
+
--sidebar-foreground: oklch(0.145 0 0);
|
| 107 |
+
--sidebar-primary: oklch(0.55 0.23 265);
|
| 108 |
+
--sidebar-primary-foreground: oklch(0.985 0 0);
|
| 109 |
+
--sidebar-accent: oklch(0.94 0 0);
|
| 110 |
+
--sidebar-accent-foreground: oklch(0.205 0 0);
|
| 111 |
+
--sidebar-border: oklch(0 0 0 / 8%);
|
| 112 |
+
--sidebar-ring: oklch(0.55 0.23 265);
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
@layer base {
|
| 116 |
+
* {
|
| 117 |
+
@apply border-border outline-ring/50;
|
| 118 |
+
}
|
| 119 |
+
body {
|
| 120 |
+
@apply bg-background text-foreground;
|
| 121 |
+
}
|
| 122 |
+
html {
|
| 123 |
+
@apply font-sans;
|
| 124 |
+
}
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
/* ββ Custom Scrollbar ββββββββββββββββββββββββββββββββ */
|
| 128 |
+
::-webkit-scrollbar {
|
| 129 |
+
width: 6px;
|
| 130 |
+
height: 6px;
|
| 131 |
+
}
|
| 132 |
+
::-webkit-scrollbar-track {
|
| 133 |
+
background: transparent;
|
| 134 |
+
}
|
| 135 |
+
::-webkit-scrollbar-thumb {
|
| 136 |
+
background: oklch(0.4 0 0);
|
| 137 |
+
border-radius: 999px;
|
| 138 |
+
}
|
| 139 |
+
::-webkit-scrollbar-thumb:hover {
|
| 140 |
+
background: oklch(0.55 0 0);
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
/* ββ Animations ββββββββββββββββββββββββββββββββββββββ */
|
| 144 |
+
@keyframes shimmer {
|
| 145 |
+
0% { background-position: -200% 0; }
|
| 146 |
+
100% { background-position: 200% 0; }
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
@keyframes fadeInUp {
|
| 150 |
+
from { opacity: 0; transform: translateY(12px); }
|
| 151 |
+
to { opacity: 1; transform: translateY(0); }
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
@keyframes pulse-glow {
|
| 155 |
+
0%, 100% { box-shadow: 0 0 8px oklch(0.65 0.2 265 / 30%); }
|
| 156 |
+
50% { box-shadow: 0 0 20px oklch(0.65 0.2 265 / 50%); }
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
.animate-fade-in-up {
|
| 160 |
+
animation: fadeInUp 0.4s ease-out forwards;
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
.animate-shimmer {
|
| 164 |
+
background: linear-gradient(90deg, transparent 33%, oklch(1 0 0 / 5%) 50%, transparent 66%);
|
| 165 |
+
background-size: 200% 100%;
|
| 166 |
+
animation: shimmer 1.5s infinite;
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
.animate-pulse-glow {
|
| 170 |
+
animation: pulse-glow 2s ease-in-out infinite;
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
/* ββ Markdown Styles in Chat βββββββββββββββββββββββββ */
|
| 174 |
+
.prose-chat h1, .prose-chat h2, .prose-chat h3 {
|
| 175 |
+
font-weight: 600;
|
| 176 |
+
margin-top: 1em;
|
| 177 |
+
margin-bottom: 0.5em;
|
| 178 |
+
}
|
| 179 |
+
.prose-chat h1 { font-size: 1.25rem; }
|
| 180 |
+
.prose-chat h2 { font-size: 1.1rem; }
|
| 181 |
+
.prose-chat h3 { font-size: 1rem; }
|
| 182 |
+
.prose-chat p { margin-bottom: 0.75em; line-height: 1.7; }
|
| 183 |
+
.prose-chat ul, .prose-chat ol { padding-left: 1.5em; margin-bottom: 0.75em; }
|
| 184 |
+
.prose-chat li { margin-bottom: 0.25em; }
|
| 185 |
+
.prose-chat code {
|
| 186 |
+
background: oklch(1 0 0 / 8%);
|
| 187 |
+
padding: 0.15em 0.4em;
|
| 188 |
+
border-radius: 4px;
|
| 189 |
+
font-size: 0.9em;
|
| 190 |
+
}
|
| 191 |
+
.prose-chat pre {
|
| 192 |
+
background: oklch(0.12 0 0);
|
| 193 |
+
padding: 1em;
|
| 194 |
+
border-radius: 8px;
|
| 195 |
+
overflow-x: auto;
|
| 196 |
+
margin-bottom: 0.75em;
|
| 197 |
+
}
|
| 198 |
+
.prose-chat pre code {
|
| 199 |
+
background: none;
|
| 200 |
+
padding: 0;
|
| 201 |
+
}
|
| 202 |
+
.prose-chat blockquote {
|
| 203 |
+
border-left: 3px solid oklch(0.65 0.2 265);
|
| 204 |
+
padding-left: 1em;
|
| 205 |
+
margin-left: 0;
|
| 206 |
+
margin-bottom: 0.75em;
|
| 207 |
+
color: oklch(0.7 0 0);
|
| 208 |
+
}
|
| 209 |
+
.prose-chat strong { color: oklch(0.9 0 0); }
|
| 210 |
+
.prose-chat a { color: oklch(0.65 0.2 265); text-decoration: underline; }
|
| 211 |
+
|
| 212 |
+
.light .prose-chat code { background: oklch(0 0 0 / 6%); }
|
| 213 |
+
.light .prose-chat pre { background: oklch(0.96 0 0); }
|
| 214 |
+
.light .prose-chat strong { color: oklch(0.2 0 0); }
|
| 215 |
+
.light .prose-chat blockquote { color: oklch(0.4 0 0); }
|
frontend/src/app/layout.tsx
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import type { Metadata } from "next";
|
| 2 |
+
import { Inter } from "next/font/google";
|
| 3 |
+
import "./globals.css";
|
| 4 |
+
import { AuthProvider } from "@/lib/auth";
|
| 5 |
+
import { TooltipProvider } from "@/components/ui/tooltip";
|
| 6 |
+
|
| 7 |
+
const inter = Inter({
|
| 8 |
+
variable: "--font-sans",
|
| 9 |
+
subsets: ["latin"],
|
| 10 |
+
display: "swap",
|
| 11 |
+
});
|
| 12 |
+
|
| 13 |
+
export const metadata: Metadata = {
|
| 14 |
+
title: "Document AI Analyst β Enterprise RAG System",
|
| 15 |
+
description:
|
| 16 |
+
"Upload complex PDFs and chat with an AI agent that pulls specific insights, summarizes data, and accurately cites sources using Retrieval-Augmented Generation.",
|
| 17 |
+
keywords: ["RAG", "Document AI", "PDF Analysis", "LLM", "Vector Search"],
|
| 18 |
+
};
|
| 19 |
+
|
| 20 |
+
export default function RootLayout({
|
| 21 |
+
children,
|
| 22 |
+
}: Readonly<{
|
| 23 |
+
children: React.ReactNode;
|
| 24 |
+
}>) {
|
| 25 |
+
return (
|
| 26 |
+
<html lang="en" className={`${inter.variable} dark h-full antialiased`}>
|
| 27 |
+
<body className="min-h-full flex flex-col bg-background text-foreground">
|
| 28 |
+
<AuthProvider>
|
| 29 |
+
<TooltipProvider>
|
| 30 |
+
{children}
|
| 31 |
+
</TooltipProvider>
|
| 32 |
+
</AuthProvider>
|
| 33 |
+
</body>
|
| 34 |
+
</html>
|
| 35 |
+
);
|
| 36 |
+
}
|
frontend/src/app/login/page.tsx
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import { useState } from "react";
|
| 4 |
+
import { useRouter } from "next/navigation";
|
| 5 |
+
import { useAuth } from "@/lib/auth";
|
| 6 |
+
import { Button } from "@/components/ui/button";
|
| 7 |
+
import { Input } from "@/components/ui/input";
|
| 8 |
+
import { Card, CardContent, CardHeader, CardTitle, CardDescription } from "@/components/ui/card";
|
| 9 |
+
import { Brain, Eye, EyeOff } from "lucide-react";
|
| 10 |
+
import Link from "next/link";
|
| 11 |
+
|
| 12 |
+
export default function LoginPage() {
|
| 13 |
+
const { login } = useAuth();
|
| 14 |
+
const router = useRouter();
|
| 15 |
+
const [email, setEmail] = useState("");
|
| 16 |
+
const [password, setPassword] = useState("");
|
| 17 |
+
const [showPw, setShowPw] = useState(false);
|
| 18 |
+
const [error, setError] = useState("");
|
| 19 |
+
const [loading, setLoading] = useState(false);
|
| 20 |
+
|
| 21 |
+
const handleSubmit = async (e: React.FormEvent) => {
|
| 22 |
+
e.preventDefault();
|
| 23 |
+
setError("");
|
| 24 |
+
setLoading(true);
|
| 25 |
+
|
| 26 |
+
try {
|
| 27 |
+
await login(email, password);
|
| 28 |
+
router.replace("/dashboard");
|
| 29 |
+
} catch (err: unknown) {
|
| 30 |
+
const message = err instanceof Error ? err.message : "Login failed";
|
| 31 |
+
setError(message);
|
| 32 |
+
} finally {
|
| 33 |
+
setLoading(false);
|
| 34 |
+
}
|
| 35 |
+
};
|
| 36 |
+
|
| 37 |
+
return (
|
| 38 |
+
<div className="min-h-screen flex items-center justify-center px-4">
|
| 39 |
+
{/* Background glow */}
|
| 40 |
+
<div className="absolute top-1/3 left-1/2 -translate-x-1/2 w-[500px] h-[300px] bg-primary/8 rounded-full blur-[100px] pointer-events-none" />
|
| 41 |
+
|
| 42 |
+
<Card className="w-full max-w-md relative z-10 bg-card/80 backdrop-blur-xl border-border/50 animate-fade-in-up">
|
| 43 |
+
<CardHeader className="text-center pb-2">
|
| 44 |
+
<div className="flex justify-center mb-4">
|
| 45 |
+
<div className="w-12 h-12 rounded-xl bg-primary/15 flex items-center justify-center">
|
| 46 |
+
<Brain className="w-6 h-6 text-primary" />
|
| 47 |
+
</div>
|
| 48 |
+
</div>
|
| 49 |
+
<CardTitle className="text-2xl font-bold">Welcome back</CardTitle>
|
| 50 |
+
<CardDescription>Sign in to your Document AI Analyst account</CardDescription>
|
| 51 |
+
</CardHeader>
|
| 52 |
+
|
| 53 |
+
<CardContent>
|
| 54 |
+
<form onSubmit={handleSubmit} className="space-y-4">
|
| 55 |
+
{error && (
|
| 56 |
+
<div className="p-3 rounded-lg bg-destructive/10 border border-destructive/30 text-sm text-destructive">
|
| 57 |
+
{error}
|
| 58 |
+
</div>
|
| 59 |
+
)}
|
| 60 |
+
|
| 61 |
+
<div className="space-y-2">
|
| 62 |
+
<label className="text-sm font-medium">Email</label>
|
| 63 |
+
<Input
|
| 64 |
+
id="login-email"
|
| 65 |
+
type="email"
|
| 66 |
+
placeholder="you@example.com"
|
| 67 |
+
value={email}
|
| 68 |
+
onChange={(e) => setEmail(e.target.value)}
|
| 69 |
+
required
|
| 70 |
+
className="h-11"
|
| 71 |
+
/>
|
| 72 |
+
</div>
|
| 73 |
+
|
| 74 |
+
<div className="space-y-2">
|
| 75 |
+
<label className="text-sm font-medium">Password</label>
|
| 76 |
+
<div className="relative">
|
| 77 |
+
<Input
|
| 78 |
+
id="login-password"
|
| 79 |
+
type={showPw ? "text" : "password"}
|
| 80 |
+
placeholder="β’β’β’β’β’β’β’β’"
|
| 81 |
+
value={password}
|
| 82 |
+
onChange={(e) => setPassword(e.target.value)}
|
| 83 |
+
required
|
| 84 |
+
className="h-11 pr-10"
|
| 85 |
+
/>
|
| 86 |
+
<button
|
| 87 |
+
type="button"
|
| 88 |
+
onClick={() => setShowPw(!showPw)}
|
| 89 |
+
className="absolute right-3 top-1/2 -translate-y-1/2 text-muted-foreground hover:text-foreground transition-colors"
|
| 90 |
+
>
|
| 91 |
+
{showPw ? <EyeOff className="w-4 h-4" /> : <Eye className="w-4 h-4" />}
|
| 92 |
+
</button>
|
| 93 |
+
</div>
|
| 94 |
+
</div>
|
| 95 |
+
|
| 96 |
+
<Button type="submit" className="w-full h-11 text-base" disabled={loading}>
|
| 97 |
+
{loading ? (
|
| 98 |
+
<span className="flex items-center gap-2">
|
| 99 |
+
<span className="w-4 h-4 border-2 border-primary-foreground/30 border-t-primary-foreground rounded-full animate-spin" />
|
| 100 |
+
Signing in...
|
| 101 |
+
</span>
|
| 102 |
+
) : (
|
| 103 |
+
"Sign In"
|
| 104 |
+
)}
|
| 105 |
+
</Button>
|
| 106 |
+
</form>
|
| 107 |
+
|
| 108 |
+
<p className="text-center text-sm text-muted-foreground mt-6">
|
| 109 |
+
Don't have an account?{" "}
|
| 110 |
+
<Link href="/register" className="text-primary hover:underline font-medium">
|
| 111 |
+
Create one
|
| 112 |
+
</Link>
|
| 113 |
+
</p>
|
| 114 |
+
</CardContent>
|
| 115 |
+
</Card>
|
| 116 |
+
</div>
|
| 117 |
+
);
|
| 118 |
+
}
|
frontend/src/app/page.tsx
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import { useEffect } from "react";
|
| 4 |
+
import { useRouter } from "next/navigation";
|
| 5 |
+
import { useAuth } from "@/lib/auth";
|
| 6 |
+
import { FileText, MessageSquare, Brain, Shield, Zap, Search } from "lucide-react";
|
| 7 |
+
import { Button } from "@/components/ui/button";
|
| 8 |
+
import Link from "next/link";
|
| 9 |
+
|
| 10 |
+
export default function HomePage() {
|
| 11 |
+
const { user, loading } = useAuth();
|
| 12 |
+
const router = useRouter();
|
| 13 |
+
|
| 14 |
+
useEffect(() => {
|
| 15 |
+
if (!loading && user) {
|
| 16 |
+
router.replace("/dashboard");
|
| 17 |
+
}
|
| 18 |
+
}, [user, loading, router]);
|
| 19 |
+
|
| 20 |
+
if (loading) {
|
| 21 |
+
return (
|
| 22 |
+
<div className="min-h-screen flex items-center justify-center">
|
| 23 |
+
<div className="animate-pulse-glow w-12 h-12 rounded-full bg-primary/20" />
|
| 24 |
+
</div>
|
| 25 |
+
);
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
return (
|
| 29 |
+
<div className="min-h-screen flex flex-col">
|
| 30 |
+
{/* ββ Hero ββββββββββββββββββββββββββββββββββββββ */}
|
| 31 |
+
<div className="flex-1 flex flex-col items-center justify-center px-6 py-20">
|
| 32 |
+
{/* Glow effect */}
|
| 33 |
+
<div className="absolute top-1/4 left-1/2 -translate-x-1/2 w-[600px] h-[400px] bg-primary/10 rounded-full blur-[120px] pointer-events-none" />
|
| 34 |
+
|
| 35 |
+
<div className="relative z-10 text-center max-w-3xl mx-auto animate-fade-in-up">
|
| 36 |
+
<div className="inline-flex items-center gap-2 px-4 py-1.5 rounded-full bg-primary/10 border border-primary/20 text-sm text-primary mb-8">
|
| 37 |
+
<Brain className="w-4 h-4" />
|
| 38 |
+
Enterprise Agentic RAG System
|
| 39 |
+
</div>
|
| 40 |
+
|
| 41 |
+
<h1 className="text-5xl sm:text-6xl font-bold tracking-tight mb-6 leading-[1.1]">
|
| 42 |
+
Chat with your{" "}
|
| 43 |
+
<span className="bg-gradient-to-r from-primary to-[oklch(0.65_0.2_200)] bg-clip-text text-transparent">
|
| 44 |
+
documents
|
| 45 |
+
</span>{" "}
|
| 46 |
+
intelligently
|
| 47 |
+
</h1>
|
| 48 |
+
|
| 49 |
+
<p className="text-lg text-muted-foreground max-w-xl mx-auto mb-10 leading-relaxed">
|
| 50 |
+
Upload financial reports, legal contracts, or research papers and get
|
| 51 |
+
accurate, cited insights powered by advanced AI retrieval.
|
| 52 |
+
</p>
|
| 53 |
+
|
| 54 |
+
<div className="flex gap-4 justify-center">
|
| 55 |
+
<Link href="/register">
|
| 56 |
+
<Button size="lg" className="px-8 text-base h-12">
|
| 57 |
+
Get Started Free
|
| 58 |
+
</Button>
|
| 59 |
+
</Link>
|
| 60 |
+
<Link href="/login">
|
| 61 |
+
<Button size="lg" variant="outline" className="px-8 text-base h-12">
|
| 62 |
+
Sign In
|
| 63 |
+
</Button>
|
| 64 |
+
</Link>
|
| 65 |
+
</div>
|
| 66 |
+
</div>
|
| 67 |
+
|
| 68 |
+
{/* ββ Features Grid ββββββββββββββββββββββββββ */}
|
| 69 |
+
<div className="relative z-10 mt-24 w-full max-w-4xl mx-auto grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 gap-4">
|
| 70 |
+
{[
|
| 71 |
+
{
|
| 72 |
+
icon: FileText,
|
| 73 |
+
title: "Multi-Format Upload",
|
| 74 |
+
desc: "PDF, DOCX, TXT, and Markdown with smart chunking",
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
icon: Search,
|
| 78 |
+
title: "Semantic Search",
|
| 79 |
+
desc: "Two-stage retrieval with cross-encoder reranking",
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
icon: MessageSquare,
|
| 83 |
+
title: "Streaming Chat",
|
| 84 |
+
desc: "Real-time AI responses with source citations",
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
icon: Zap,
|
| 88 |
+
title: "Instant Insights",
|
| 89 |
+
desc: "Extract key facts, summaries, and data points",
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
icon: Shield,
|
| 93 |
+
title: "Data Isolation",
|
| 94 |
+
desc: "Per-user vector collections for complete privacy",
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
icon: Brain,
|
| 98 |
+
title: "Open-Source LLMs",
|
| 99 |
+
desc: "Powered by Mistral and HuggingFace ecosystem",
|
| 100 |
+
},
|
| 101 |
+
].map((f, i) => (
|
| 102 |
+
<div
|
| 103 |
+
key={i}
|
| 104 |
+
className="group p-5 rounded-xl border border-border/50 bg-card/50 backdrop-blur-sm hover:border-primary/30 hover:bg-card transition-all duration-300"
|
| 105 |
+
style={{ animationDelay: `${i * 80}ms` }}
|
| 106 |
+
>
|
| 107 |
+
<f.icon className="w-5 h-5 text-primary mb-3 group-hover:scale-110 transition-transform" />
|
| 108 |
+
<h3 className="font-semibold text-sm mb-1">{f.title}</h3>
|
| 109 |
+
<p className="text-xs text-muted-foreground leading-relaxed">{f.desc}</p>
|
| 110 |
+
</div>
|
| 111 |
+
))}
|
| 112 |
+
</div>
|
| 113 |
+
</div>
|
| 114 |
+
|
| 115 |
+
{/* ββ Footer ββββββββββββββββββββββββββββββββββββ */}
|
| 116 |
+
<footer className="text-center py-6 text-xs text-muted-foreground border-t border-border/50">
|
| 117 |
+
Built with FastAPI β’ LangChain β’ ChromaDB β’ HuggingFace β’ Next.js
|
| 118 |
+
</footer>
|
| 119 |
+
</div>
|
| 120 |
+
);
|
| 121 |
+
}
|
frontend/src/app/register/page.tsx
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import { useState } from "react";
|
| 4 |
+
import { useRouter } from "next/navigation";
|
| 5 |
+
import { useAuth } from "@/lib/auth";
|
| 6 |
+
import { Button } from "@/components/ui/button";
|
| 7 |
+
import { Input } from "@/components/ui/input";
|
| 8 |
+
import { Card, CardContent, CardHeader, CardTitle, CardDescription } from "@/components/ui/card";
|
| 9 |
+
import { Brain, Eye, EyeOff } from "lucide-react";
|
| 10 |
+
import Link from "next/link";
|
| 11 |
+
|
| 12 |
+
export default function RegisterPage() {
|
| 13 |
+
const { register } = useAuth();
|
| 14 |
+
const router = useRouter();
|
| 15 |
+
const [username, setUsername] = useState("");
|
| 16 |
+
const [email, setEmail] = useState("");
|
| 17 |
+
const [password, setPassword] = useState("");
|
| 18 |
+
const [showPw, setShowPw] = useState(false);
|
| 19 |
+
const [error, setError] = useState("");
|
| 20 |
+
const [loading, setLoading] = useState(false);
|
| 21 |
+
|
| 22 |
+
const handleSubmit = async (e: React.FormEvent) => {
|
| 23 |
+
e.preventDefault();
|
| 24 |
+
setError("");
|
| 25 |
+
setLoading(true);
|
| 26 |
+
|
| 27 |
+
try {
|
| 28 |
+
await register(username, email, password);
|
| 29 |
+
router.replace("/dashboard");
|
| 30 |
+
} catch (err: unknown) {
|
| 31 |
+
const message = err instanceof Error ? err.message : "Registration failed";
|
| 32 |
+
setError(message);
|
| 33 |
+
} finally {
|
| 34 |
+
setLoading(false);
|
| 35 |
+
}
|
| 36 |
+
};
|
| 37 |
+
|
| 38 |
+
return (
|
| 39 |
+
<div className="min-h-screen flex items-center justify-center px-4">
|
| 40 |
+
<div className="absolute top-1/3 left-1/2 -translate-x-1/2 w-[500px] h-[300px] bg-primary/8 rounded-full blur-[100px] pointer-events-none" />
|
| 41 |
+
|
| 42 |
+
<Card className="w-full max-w-md relative z-10 bg-card/80 backdrop-blur-xl border-border/50 animate-fade-in-up">
|
| 43 |
+
<CardHeader className="text-center pb-2">
|
| 44 |
+
<div className="flex justify-center mb-4">
|
| 45 |
+
<div className="w-12 h-12 rounded-xl bg-primary/15 flex items-center justify-center">
|
| 46 |
+
<Brain className="w-6 h-6 text-primary" />
|
| 47 |
+
</div>
|
| 48 |
+
</div>
|
| 49 |
+
<CardTitle className="text-2xl font-bold">Create Account</CardTitle>
|
| 50 |
+
<CardDescription>Start analyzing documents with AI</CardDescription>
|
| 51 |
+
</CardHeader>
|
| 52 |
+
|
| 53 |
+
<CardContent>
|
| 54 |
+
<form onSubmit={handleSubmit} className="space-y-4">
|
| 55 |
+
{error && (
|
| 56 |
+
<div className="p-3 rounded-lg bg-destructive/10 border border-destructive/30 text-sm text-destructive">
|
| 57 |
+
{error}
|
| 58 |
+
</div>
|
| 59 |
+
)}
|
| 60 |
+
|
| 61 |
+
<div className="space-y-2">
|
| 62 |
+
<label className="text-sm font-medium">Username</label>
|
| 63 |
+
<Input
|
| 64 |
+
id="reg-username"
|
| 65 |
+
type="text"
|
| 66 |
+
placeholder="paramjit"
|
| 67 |
+
value={username}
|
| 68 |
+
onChange={(e) => setUsername(e.target.value)}
|
| 69 |
+
required
|
| 70 |
+
minLength={3}
|
| 71 |
+
className="h-11"
|
| 72 |
+
/>
|
| 73 |
+
</div>
|
| 74 |
+
|
| 75 |
+
<div className="space-y-2">
|
| 76 |
+
<label className="text-sm font-medium">Email</label>
|
| 77 |
+
<Input
|
| 78 |
+
id="reg-email"
|
| 79 |
+
type="email"
|
| 80 |
+
placeholder="you@example.com"
|
| 81 |
+
value={email}
|
| 82 |
+
onChange={(e) => setEmail(e.target.value)}
|
| 83 |
+
required
|
| 84 |
+
className="h-11"
|
| 85 |
+
/>
|
| 86 |
+
</div>
|
| 87 |
+
|
| 88 |
+
<div className="space-y-2">
|
| 89 |
+
<label className="text-sm font-medium">Password</label>
|
| 90 |
+
<div className="relative">
|
| 91 |
+
<Input
|
| 92 |
+
id="reg-password"
|
| 93 |
+
type={showPw ? "text" : "password"}
|
| 94 |
+
placeholder="Minimum 6 characters"
|
| 95 |
+
value={password}
|
| 96 |
+
onChange={(e) => setPassword(e.target.value)}
|
| 97 |
+
required
|
| 98 |
+
minLength={6}
|
| 99 |
+
className="h-11 pr-10"
|
| 100 |
+
/>
|
| 101 |
+
<button
|
| 102 |
+
type="button"
|
| 103 |
+
onClick={() => setShowPw(!showPw)}
|
| 104 |
+
className="absolute right-3 top-1/2 -translate-y-1/2 text-muted-foreground hover:text-foreground transition-colors"
|
| 105 |
+
>
|
| 106 |
+
{showPw ? <EyeOff className="w-4 h-4" /> : <Eye className="w-4 h-4" />}
|
| 107 |
+
</button>
|
| 108 |
+
</div>
|
| 109 |
+
</div>
|
| 110 |
+
|
| 111 |
+
<Button type="submit" className="w-full h-11 text-base" disabled={loading}>
|
| 112 |
+
{loading ? (
|
| 113 |
+
<span className="flex items-center gap-2">
|
| 114 |
+
<span className="w-4 h-4 border-2 border-primary-foreground/30 border-t-primary-foreground rounded-full animate-spin" />
|
| 115 |
+
Creating account...
|
| 116 |
+
</span>
|
| 117 |
+
) : (
|
| 118 |
+
"Create Account"
|
| 119 |
+
)}
|
| 120 |
+
</Button>
|
| 121 |
+
</form>
|
| 122 |
+
|
| 123 |
+
<p className="text-center text-sm text-muted-foreground mt-6">
|
| 124 |
+
Already have an account?{" "}
|
| 125 |
+
<Link href="/login" className="text-primary hover:underline font-medium">
|
| 126 |
+
Sign in
|
| 127 |
+
</Link>
|
| 128 |
+
</p>
|
| 129 |
+
</CardContent>
|
| 130 |
+
</Card>
|
| 131 |
+
</div>
|
| 132 |
+
);
|
| 133 |
+
}
|
frontend/src/components/chat/ChatPanel.tsx
ADDED
|
@@ -0,0 +1,250 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import { useState, useRef, useEffect } from "react";
|
| 4 |
+
import type { DocInfo } from "@/app/dashboard/page";
|
| 5 |
+
import { api } from "@/lib/api";
|
| 6 |
+
import { ScrollArea } from "@/components/ui/scroll-area";
|
| 7 |
+
import { Button } from "@/components/ui/button";
|
| 8 |
+
import { Textarea } from "@/components/ui/textarea";
|
| 9 |
+
import MessageBubble from "./MessageBubble";
|
| 10 |
+
import SourceCard from "./SourceCard";
|
| 11 |
+
import { Send, Loader2, Trash2, MessageSquare } from "lucide-react";
|
| 12 |
+
|
| 13 |
+
export interface SourceChunk {
|
| 14 |
+
text: string;
|
| 15 |
+
filename: string;
|
| 16 |
+
page: number;
|
| 17 |
+
score: number;
|
| 18 |
+
confidence: number;
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
export interface ChatMsg {
|
| 22 |
+
id: string;
|
| 23 |
+
role: "user" | "assistant";
|
| 24 |
+
content: string;
|
| 25 |
+
sources: SourceChunk[];
|
| 26 |
+
isStreaming?: boolean;
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
interface Props {
|
| 30 |
+
activeDoc: DocInfo | null;
|
| 31 |
+
onCitationClick: (page: number) => void;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
export default function ChatPanel({ activeDoc, onCitationClick }: Props) {
|
| 35 |
+
const [messages, setMessages] = useState<ChatMsg[]>([]);
|
| 36 |
+
const [input, setInput] = useState("");
|
| 37 |
+
const [streaming, setStreaming] = useState(false);
|
| 38 |
+
const scrollRef = useRef<HTMLDivElement>(null);
|
| 39 |
+
const prevDocId = useRef<string | null>(null);
|
| 40 |
+
|
| 41 |
+
// Auto-scroll to bottom
|
| 42 |
+
useEffect(() => {
|
| 43 |
+
if (scrollRef.current) {
|
| 44 |
+
scrollRef.current.scrollTop = scrollRef.current.scrollHeight;
|
| 45 |
+
}
|
| 46 |
+
}, [messages]);
|
| 47 |
+
|
| 48 |
+
// Load history on doc change
|
| 49 |
+
useEffect(() => {
|
| 50 |
+
if (!activeDoc || activeDoc.id === prevDocId.current) return;
|
| 51 |
+
prevDocId.current = activeDoc.id;
|
| 52 |
+
|
| 53 |
+
api
|
| 54 |
+
.get<{ messages: Array<{ id: string; role: string; content: string; sources?: SourceChunk[] }> }>(
|
| 55 |
+
`/api/v1/chat/history/${activeDoc.id}`
|
| 56 |
+
)
|
| 57 |
+
.then((data) => {
|
| 58 |
+
setMessages(
|
| 59 |
+
data.messages.map((m) => ({
|
| 60 |
+
id: m.id,
|
| 61 |
+
role: m.role as "user" | "assistant",
|
| 62 |
+
content: m.content,
|
| 63 |
+
sources: m.sources || [],
|
| 64 |
+
}))
|
| 65 |
+
);
|
| 66 |
+
})
|
| 67 |
+
.catch(() => setMessages([]));
|
| 68 |
+
}, [activeDoc]);
|
| 69 |
+
|
| 70 |
+
const handleSend = async () => {
|
| 71 |
+
if (!input.trim() || streaming) return;
|
| 72 |
+
|
| 73 |
+
const question = input.trim();
|
| 74 |
+
setInput("");
|
| 75 |
+
|
| 76 |
+
// Add user message
|
| 77 |
+
const userMsg: ChatMsg = {
|
| 78 |
+
id: `user-${Date.now()}`,
|
| 79 |
+
role: "user",
|
| 80 |
+
content: question,
|
| 81 |
+
sources: [],
|
| 82 |
+
};
|
| 83 |
+
setMessages((prev) => [...prev, userMsg]);
|
| 84 |
+
|
| 85 |
+
// Add placeholder assistant message
|
| 86 |
+
const assistantId = `assistant-${Date.now()}`;
|
| 87 |
+
const assistantMsg: ChatMsg = {
|
| 88 |
+
id: assistantId,
|
| 89 |
+
role: "assistant",
|
| 90 |
+
content: "",
|
| 91 |
+
sources: [],
|
| 92 |
+
isStreaming: true,
|
| 93 |
+
};
|
| 94 |
+
setMessages((prev) => [...prev, assistantMsg]);
|
| 95 |
+
setStreaming(true);
|
| 96 |
+
|
| 97 |
+
try {
|
| 98 |
+
const stream = api.streamPost("/api/v1/chat/ask/stream", {
|
| 99 |
+
question,
|
| 100 |
+
document_id: activeDoc?.id || null,
|
| 101 |
+
});
|
| 102 |
+
|
| 103 |
+
for await (const event of stream) {
|
| 104 |
+
if (event.type === "token") {
|
| 105 |
+
setMessages((prev) =>
|
| 106 |
+
prev.map((m) =>
|
| 107 |
+
m.id === assistantId
|
| 108 |
+
? { ...m, content: m.content + (event.data as string) }
|
| 109 |
+
: m
|
| 110 |
+
)
|
| 111 |
+
);
|
| 112 |
+
} else if (event.type === "sources") {
|
| 113 |
+
setMessages((prev) =>
|
| 114 |
+
prev.map((m) =>
|
| 115 |
+
m.id === assistantId
|
| 116 |
+
? { ...m, sources: event.data as SourceChunk[] }
|
| 117 |
+
: m
|
| 118 |
+
)
|
| 119 |
+
);
|
| 120 |
+
} else if (event.type === "error") {
|
| 121 |
+
setMessages((prev) =>
|
| 122 |
+
prev.map((m) =>
|
| 123 |
+
m.id === assistantId
|
| 124 |
+
? { ...m, content: `Error: ${event.data}`, isStreaming: false }
|
| 125 |
+
: m
|
| 126 |
+
)
|
| 127 |
+
);
|
| 128 |
+
} else if (event.type === "done") {
|
| 129 |
+
setMessages((prev) =>
|
| 130 |
+
prev.map((m) =>
|
| 131 |
+
m.id === assistantId ? { ...m, isStreaming: false } : m
|
| 132 |
+
)
|
| 133 |
+
);
|
| 134 |
+
}
|
| 135 |
+
}
|
| 136 |
+
} catch (err) {
|
| 137 |
+
setMessages((prev) =>
|
| 138 |
+
prev.map((m) =>
|
| 139 |
+
m.id === assistantId
|
| 140 |
+
? {
|
| 141 |
+
...m,
|
| 142 |
+
content: `Failed to get response: ${err instanceof Error ? err.message : "Unknown error"}`,
|
| 143 |
+
isStreaming: false,
|
| 144 |
+
}
|
| 145 |
+
: m
|
| 146 |
+
)
|
| 147 |
+
);
|
| 148 |
+
} finally {
|
| 149 |
+
setStreaming(false);
|
| 150 |
+
}
|
| 151 |
+
};
|
| 152 |
+
|
| 153 |
+
const handleClear = async () => {
|
| 154 |
+
if (!activeDoc || !confirm("Clear all chat history for this document?")) return;
|
| 155 |
+
try {
|
| 156 |
+
await api.delete(`/api/v1/chat/history/${activeDoc.id}`);
|
| 157 |
+
setMessages([]);
|
| 158 |
+
} catch {
|
| 159 |
+
// silently fail
|
| 160 |
+
}
|
| 161 |
+
};
|
| 162 |
+
|
| 163 |
+
const handleKeyDown = (e: React.KeyboardEvent) => {
|
| 164 |
+
if (e.key === "Enter" && !e.shiftKey) {
|
| 165 |
+
e.preventDefault();
|
| 166 |
+
handleSend();
|
| 167 |
+
}
|
| 168 |
+
};
|
| 169 |
+
|
| 170 |
+
return (
|
| 171 |
+
<div className="h-full flex flex-col">
|
| 172 |
+
{/* ββ Chat Messages ββββββββββββββββββββββββββββ */}
|
| 173 |
+
<ScrollArea className="flex-1 px-4" ref={scrollRef}>
|
| 174 |
+
{messages.length === 0 ? (
|
| 175 |
+
<div className="h-full flex flex-col items-center justify-center py-20">
|
| 176 |
+
<div className="w-16 h-16 rounded-2xl bg-primary/10 flex items-center justify-center mb-4">
|
| 177 |
+
<MessageSquare className="w-8 h-8 text-primary/60" />
|
| 178 |
+
</div>
|
| 179 |
+
<h3 className="text-lg font-semibold mb-1">
|
| 180 |
+
{activeDoc ? "Ask about your document" : "Select a document"}
|
| 181 |
+
</h3>
|
| 182 |
+
<p className="text-sm text-muted-foreground text-center max-w-sm">
|
| 183 |
+
{activeDoc
|
| 184 |
+
? `"${activeDoc.original_name}" is ready. Ask any question and get cited answers.`
|
| 185 |
+
: "Upload and select a document from the sidebar to start chatting."}
|
| 186 |
+
</p>
|
| 187 |
+
</div>
|
| 188 |
+
) : (
|
| 189 |
+
<div className="py-4 space-y-1 max-w-3xl mx-auto">
|
| 190 |
+
{messages.map((msg) => (
|
| 191 |
+
<div key={msg.id}>
|
| 192 |
+
<MessageBubble message={msg} />
|
| 193 |
+
{msg.role === "assistant" && msg.sources.length > 0 && (
|
| 194 |
+
<div className="ml-10 mt-1 mb-3">
|
| 195 |
+
<SourceCard sources={msg.sources} onPageClick={onCitationClick} />
|
| 196 |
+
</div>
|
| 197 |
+
)}
|
| 198 |
+
</div>
|
| 199 |
+
))}
|
| 200 |
+
</div>
|
| 201 |
+
)}
|
| 202 |
+
</ScrollArea>
|
| 203 |
+
|
| 204 |
+
{/* ββ Input Area βββββββββββββββββββββββββββββββ */}
|
| 205 |
+
<div className="border-t border-border/50 p-4 bg-card/30 backdrop-blur-sm">
|
| 206 |
+
<div className="max-w-3xl mx-auto flex gap-2 items-end">
|
| 207 |
+
<Textarea
|
| 208 |
+
id="chat-input"
|
| 209 |
+
value={input}
|
| 210 |
+
onChange={(e) => setInput(e.target.value)}
|
| 211 |
+
onKeyDown={handleKeyDown}
|
| 212 |
+
placeholder={
|
| 213 |
+
activeDoc
|
| 214 |
+
? `Ask about "${activeDoc.original_name}"...`
|
| 215 |
+
: "Select a document first..."
|
| 216 |
+
}
|
| 217 |
+
disabled={streaming}
|
| 218 |
+
className="min-h-[44px] max-h-32 resize-none bg-background/50 border-border/50"
|
| 219 |
+
rows={1}
|
| 220 |
+
/>
|
| 221 |
+
<div className="flex gap-1.5 shrink-0">
|
| 222 |
+
<Button
|
| 223 |
+
id="send-btn"
|
| 224 |
+
size="icon"
|
| 225 |
+
onClick={handleSend}
|
| 226 |
+
disabled={!input.trim() || streaming}
|
| 227 |
+
className="h-[44px] w-[44px]"
|
| 228 |
+
>
|
| 229 |
+
{streaming ? (
|
| 230 |
+
<Loader2 className="w-4 h-4 animate-spin" />
|
| 231 |
+
) : (
|
| 232 |
+
<Send className="w-4 h-4" />
|
| 233 |
+
)}
|
| 234 |
+
</Button>
|
| 235 |
+
{messages.length > 0 && (
|
| 236 |
+
<Button
|
| 237 |
+
variant="ghost"
|
| 238 |
+
size="icon"
|
| 239 |
+
onClick={handleClear}
|
| 240 |
+
className="h-[44px] w-[44px] text-muted-foreground hover:text-destructive"
|
| 241 |
+
>
|
| 242 |
+
<Trash2 className="w-4 h-4" />
|
| 243 |
+
</Button>
|
| 244 |
+
)}
|
| 245 |
+
</div>
|
| 246 |
+
</div>
|
| 247 |
+
</div>
|
| 248 |
+
</div>
|
| 249 |
+
);
|
| 250 |
+
}
|
frontend/src/components/chat/MessageBubble.tsx
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import ReactMarkdown from "react-markdown";
|
| 4 |
+
import remarkGfm from "remark-gfm";
|
| 5 |
+
import type { ChatMsg } from "./ChatPanel";
|
| 6 |
+
import { Brain, User } from "lucide-react";
|
| 7 |
+
|
| 8 |
+
interface Props {
|
| 9 |
+
message: ChatMsg;
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
export default function MessageBubble({ message }: Props) {
|
| 13 |
+
const isUser = message.role === "user";
|
| 14 |
+
|
| 15 |
+
return (
|
| 16 |
+
<div
|
| 17 |
+
className={`flex gap-3 py-3 animate-fade-in-up ${isUser ? "justify-end" : "justify-start"}`}
|
| 18 |
+
>
|
| 19 |
+
{!isUser && (
|
| 20 |
+
<div className="w-8 h-8 rounded-lg bg-primary/15 flex items-center justify-center shrink-0 mt-0.5">
|
| 21 |
+
<Brain className="w-4 h-4 text-primary" />
|
| 22 |
+
</div>
|
| 23 |
+
)}
|
| 24 |
+
|
| 25 |
+
<div
|
| 26 |
+
className={`max-w-[80%] rounded-xl px-4 py-3 ${
|
| 27 |
+
isUser
|
| 28 |
+
? "bg-primary text-primary-foreground rounded-br-sm"
|
| 29 |
+
: "bg-card border border-border/50 rounded-bl-sm"
|
| 30 |
+
}`}
|
| 31 |
+
>
|
| 32 |
+
{isUser ? (
|
| 33 |
+
<p className="text-sm leading-relaxed whitespace-pre-wrap">{message.content}</p>
|
| 34 |
+
) : (
|
| 35 |
+
<div className="prose-chat text-sm">
|
| 36 |
+
{message.content ? (
|
| 37 |
+
<ReactMarkdown remarkPlugins={[remarkGfm]}>
|
| 38 |
+
{message.content}
|
| 39 |
+
</ReactMarkdown>
|
| 40 |
+
) : message.isStreaming ? (
|
| 41 |
+
<div className="flex items-center gap-1.5">
|
| 42 |
+
<span className="w-1.5 h-1.5 rounded-full bg-primary/60 animate-bounce [animation-delay:0ms]" />
|
| 43 |
+
<span className="w-1.5 h-1.5 rounded-full bg-primary/60 animate-bounce [animation-delay:150ms]" />
|
| 44 |
+
<span className="w-1.5 h-1.5 rounded-full bg-primary/60 animate-bounce [animation-delay:300ms]" />
|
| 45 |
+
</div>
|
| 46 |
+
) : null}
|
| 47 |
+
{message.isStreaming && message.content && (
|
| 48 |
+
<span className="inline-block w-0.5 h-4 bg-primary/60 animate-pulse ml-0.5 align-text-bottom" />
|
| 49 |
+
)}
|
| 50 |
+
</div>
|
| 51 |
+
)}
|
| 52 |
+
</div>
|
| 53 |
+
|
| 54 |
+
{isUser && (
|
| 55 |
+
<div className="w-8 h-8 rounded-lg bg-primary/20 flex items-center justify-center shrink-0 mt-0.5">
|
| 56 |
+
<User className="w-4 h-4 text-primary-foreground" />
|
| 57 |
+
</div>
|
| 58 |
+
)}
|
| 59 |
+
</div>
|
| 60 |
+
);
|
| 61 |
+
}
|
frontend/src/components/chat/SourceCard.tsx
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import { useState } from "react";
|
| 4 |
+
import type { SourceChunk } from "./ChatPanel";
|
| 5 |
+
import { Badge } from "@/components/ui/badge";
|
| 6 |
+
import { Button } from "@/components/ui/button";
|
| 7 |
+
import { ChevronDown, ChevronUp, FileText, Eye } from "lucide-react";
|
| 8 |
+
|
| 9 |
+
interface Props {
|
| 10 |
+
sources: SourceChunk[];
|
| 11 |
+
onPageClick: (page: number) => void;
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
export default function SourceCard({ sources, onPageClick }: Props) {
|
| 15 |
+
const [expanded, setExpanded] = useState(false);
|
| 16 |
+
|
| 17 |
+
if (sources.length === 0) return null;
|
| 18 |
+
|
| 19 |
+
return (
|
| 20 |
+
<div className="rounded-lg border border-border/50 bg-card/50 overflow-hidden">
|
| 21 |
+
{/* ββ Header ββββββββββββββββββββββββββββββββββββ */}
|
| 22 |
+
<button
|
| 23 |
+
onClick={() => setExpanded(!expanded)}
|
| 24 |
+
className="w-full flex items-center justify-between px-3 py-2 text-xs hover:bg-accent/30 transition-colors"
|
| 25 |
+
>
|
| 26 |
+
<span className="flex items-center gap-1.5 text-muted-foreground">
|
| 27 |
+
<FileText className="w-3.5 h-3.5" />
|
| 28 |
+
{sources.length} source{sources.length > 1 ? "s" : ""} cited
|
| 29 |
+
</span>
|
| 30 |
+
{expanded ? (
|
| 31 |
+
<ChevronUp className="w-3.5 h-3.5 text-muted-foreground" />
|
| 32 |
+
) : (
|
| 33 |
+
<ChevronDown className="w-3.5 h-3.5 text-muted-foreground" />
|
| 34 |
+
)}
|
| 35 |
+
</button>
|
| 36 |
+
|
| 37 |
+
{/* ββ Collapsed: Mini badges ββββββββββββββββββββ */}
|
| 38 |
+
{!expanded && (
|
| 39 |
+
<div className="px-3 pb-2 flex flex-wrap gap-1">
|
| 40 |
+
{sources.map((src, i) => (
|
| 41 |
+
<Badge
|
| 42 |
+
key={i}
|
| 43 |
+
variant="secondary"
|
| 44 |
+
className="text-[10px] h-5 cursor-pointer hover:bg-primary/20 transition-colors"
|
| 45 |
+
onClick={() => onPageClick(src.page)}
|
| 46 |
+
>
|
| 47 |
+
p.{src.page} β’ {src.confidence}%
|
| 48 |
+
</Badge>
|
| 49 |
+
))}
|
| 50 |
+
</div>
|
| 51 |
+
)}
|
| 52 |
+
|
| 53 |
+
{/* ββ Expanded: Full source cards βββββββββββββββ */}
|
| 54 |
+
{expanded && (
|
| 55 |
+
<div className="border-t border-border/30">
|
| 56 |
+
{sources.map((src, i) => (
|
| 57 |
+
<div
|
| 58 |
+
key={i}
|
| 59 |
+
className="px-3 py-2.5 border-b border-border/20 last:border-b-0 hover:bg-accent/20 transition-colors"
|
| 60 |
+
>
|
| 61 |
+
<div className="flex items-center justify-between mb-1.5">
|
| 62 |
+
<div className="flex items-center gap-2">
|
| 63 |
+
<span className="text-[10px] font-medium text-muted-foreground">
|
| 64 |
+
{src.filename}
|
| 65 |
+
</span>
|
| 66 |
+
<Badge variant="outline" className="text-[9px] h-4 px-1.5">
|
| 67 |
+
Page {src.page}
|
| 68 |
+
</Badge>
|
| 69 |
+
<Badge
|
| 70 |
+
variant="secondary"
|
| 71 |
+
className={`text-[9px] h-4 px-1.5 ${
|
| 72 |
+
src.confidence >= 80
|
| 73 |
+
? "text-emerald-400 bg-emerald-400/10"
|
| 74 |
+
: src.confidence >= 50
|
| 75 |
+
? "text-yellow-400 bg-yellow-400/10"
|
| 76 |
+
: "text-muted-foreground"
|
| 77 |
+
}`}
|
| 78 |
+
>
|
| 79 |
+
{src.confidence}% match
|
| 80 |
+
</Badge>
|
| 81 |
+
</div>
|
| 82 |
+
<Button
|
| 83 |
+
variant="ghost"
|
| 84 |
+
size="sm"
|
| 85 |
+
className="h-6 px-2 text-[10px]"
|
| 86 |
+
onClick={() => onPageClick(src.page)}
|
| 87 |
+
>
|
| 88 |
+
<Eye className="w-3 h-3 mr-1" />
|
| 89 |
+
View
|
| 90 |
+
</Button>
|
| 91 |
+
</div>
|
| 92 |
+
<p className="text-[11px] text-muted-foreground leading-relaxed line-clamp-3">
|
| 93 |
+
{src.text}
|
| 94 |
+
</p>
|
| 95 |
+
</div>
|
| 96 |
+
))}
|
| 97 |
+
</div>
|
| 98 |
+
)}
|
| 99 |
+
</div>
|
| 100 |
+
);
|
| 101 |
+
}
|
frontend/src/components/document/DocumentSidebar.tsx
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import { useState, useCallback } from "react";
|
| 4 |
+
import type { DocInfo } from "@/app/dashboard/page";
|
| 5 |
+
import { api } from "@/lib/api";
|
| 6 |
+
import { ScrollArea } from "@/components/ui/scroll-area";
|
| 7 |
+
import { Button } from "@/components/ui/button";
|
| 8 |
+
import { Badge } from "@/components/ui/badge";
|
| 9 |
+
import { Progress } from "@/components/ui/progress";
|
| 10 |
+
import {
|
| 11 |
+
FileText, Upload, Trash2, FileCheck, Clock, AlertCircle, Loader2, FolderOpen,
|
| 12 |
+
} from "lucide-react";
|
| 13 |
+
import { useDropzone } from "react-dropzone";
|
| 14 |
+
|
| 15 |
+
interface Props {
|
| 16 |
+
documents: DocInfo[];
|
| 17 |
+
activeDoc: DocInfo | null;
|
| 18 |
+
onSelectDoc: (doc: DocInfo) => void;
|
| 19 |
+
onDocumentsChange: () => void;
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
export default function DocumentSidebar({ documents, activeDoc, onSelectDoc, onDocumentsChange }: Props) {
|
| 23 |
+
const [uploading, setUploading] = useState(false);
|
| 24 |
+
const [uploadProgress, setUploadProgress] = useState(0);
|
| 25 |
+
const [deleting, setDeleting] = useState<string | null>(null);
|
| 26 |
+
|
| 27 |
+
const onDrop = useCallback(
|
| 28 |
+
async (acceptedFiles: File[]) => {
|
| 29 |
+
if (acceptedFiles.length === 0) return;
|
| 30 |
+
setUploading(true);
|
| 31 |
+
setUploadProgress(0);
|
| 32 |
+
|
| 33 |
+
try {
|
| 34 |
+
for (let i = 0; i < acceptedFiles.length; i++) {
|
| 35 |
+
const formData = new FormData();
|
| 36 |
+
formData.append("file", acceptedFiles[i]);
|
| 37 |
+
await api.postForm("/api/v1/documents/upload", formData);
|
| 38 |
+
setUploadProgress(((i + 1) / acceptedFiles.length) * 100);
|
| 39 |
+
}
|
| 40 |
+
onDocumentsChange();
|
| 41 |
+
} catch (err) {
|
| 42 |
+
console.error("Upload failed:", err);
|
| 43 |
+
} finally {
|
| 44 |
+
setUploading(false);
|
| 45 |
+
setUploadProgress(0);
|
| 46 |
+
}
|
| 47 |
+
},
|
| 48 |
+
[onDocumentsChange]
|
| 49 |
+
);
|
| 50 |
+
|
| 51 |
+
const { getRootProps, getInputProps, isDragActive } = useDropzone({
|
| 52 |
+
onDrop,
|
| 53 |
+
accept: {
|
| 54 |
+
"application/pdf": [".pdf"],
|
| 55 |
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": [".docx"],
|
| 56 |
+
"text/plain": [".txt"],
|
| 57 |
+
"text/markdown": [".md"],
|
| 58 |
+
},
|
| 59 |
+
disabled: uploading,
|
| 60 |
+
});
|
| 61 |
+
|
| 62 |
+
const handleDelete = async (docId: string, e: React.MouseEvent) => {
|
| 63 |
+
e.stopPropagation();
|
| 64 |
+
if (!confirm("Delete this document and all its data?")) return;
|
| 65 |
+
setDeleting(docId);
|
| 66 |
+
try {
|
| 67 |
+
await api.delete(`/api/v1/documents/${docId}`);
|
| 68 |
+
onDocumentsChange();
|
| 69 |
+
} catch (err) {
|
| 70 |
+
console.error("Delete failed:", err);
|
| 71 |
+
} finally {
|
| 72 |
+
setDeleting(null);
|
| 73 |
+
}
|
| 74 |
+
};
|
| 75 |
+
|
| 76 |
+
const statusIcon = (status: string) => {
|
| 77 |
+
switch (status) {
|
| 78 |
+
case "ready":
|
| 79 |
+
return <FileCheck className="w-3.5 h-3.5 text-emerald-400" />;
|
| 80 |
+
case "processing":
|
| 81 |
+
return <Loader2 className="w-3.5 h-3.5 text-primary animate-spin" />;
|
| 82 |
+
case "pending":
|
| 83 |
+
return <Clock className="w-3.5 h-3.5 text-yellow-400" />;
|
| 84 |
+
case "failed":
|
| 85 |
+
return <AlertCircle className="w-3.5 h-3.5 text-destructive" />;
|
| 86 |
+
default:
|
| 87 |
+
return <FileText className="w-3.5 h-3.5" />;
|
| 88 |
+
}
|
| 89 |
+
};
|
| 90 |
+
|
| 91 |
+
const formatSize = (bytes: number) => {
|
| 92 |
+
if (bytes < 1024) return `${bytes} B`;
|
| 93 |
+
if (bytes < 1048576) return `${(bytes / 1024).toFixed(1)} KB`;
|
| 94 |
+
return `${(bytes / 1048576).toFixed(1)} MB`;
|
| 95 |
+
};
|
| 96 |
+
|
| 97 |
+
return (
|
| 98 |
+
<div className="h-full flex flex-col bg-sidebar">
|
| 99 |
+
{/* ββ Upload Zone βββββββββββββββββββββββββββββββ */}
|
| 100 |
+
<div className="p-3 border-b border-sidebar-border">
|
| 101 |
+
<div
|
| 102 |
+
{...getRootProps()}
|
| 103 |
+
className={`relative rounded-lg border-2 border-dashed p-4 text-center cursor-pointer transition-all duration-200
|
| 104 |
+
${isDragActive ? "border-primary bg-primary/10 scale-[1.02]" : "border-sidebar-border hover:border-primary/40 hover:bg-sidebar-accent/50"}
|
| 105 |
+
${uploading ? "pointer-events-none opacity-60" : ""}`}
|
| 106 |
+
>
|
| 107 |
+
<input {...getInputProps()} />
|
| 108 |
+
{uploading ? (
|
| 109 |
+
<div className="space-y-2">
|
| 110 |
+
<Loader2 className="w-5 h-5 mx-auto animate-spin text-primary" />
|
| 111 |
+
<p className="text-xs text-muted-foreground">Uploading...</p>
|
| 112 |
+
<Progress value={uploadProgress} className="h-1" />
|
| 113 |
+
</div>
|
| 114 |
+
) : (
|
| 115 |
+
<>
|
| 116 |
+
<Upload className="w-5 h-5 mx-auto text-muted-foreground mb-2" />
|
| 117 |
+
<p className="text-xs text-muted-foreground">
|
| 118 |
+
{isDragActive ? "Drop files here" : "Drop files or click to upload"}
|
| 119 |
+
</p>
|
| 120 |
+
<p className="text-[10px] text-muted-foreground/60 mt-1">
|
| 121 |
+
PDF, DOCX, TXT, MD (max 50MB)
|
| 122 |
+
</p>
|
| 123 |
+
</>
|
| 124 |
+
)}
|
| 125 |
+
</div>
|
| 126 |
+
</div>
|
| 127 |
+
|
| 128 |
+
{/* ββ Documents List ββββββββββββββββββββββββββββ */}
|
| 129 |
+
<div className="px-3 pt-3 pb-1">
|
| 130 |
+
<h3 className="text-[11px] font-semibold uppercase tracking-wider text-muted-foreground mb-2">
|
| 131 |
+
Documents ({documents.length})
|
| 132 |
+
</h3>
|
| 133 |
+
</div>
|
| 134 |
+
|
| 135 |
+
<ScrollArea className="flex-1 px-3">
|
| 136 |
+
{documents.length === 0 ? (
|
| 137 |
+
<div className="text-center py-12">
|
| 138 |
+
<FolderOpen className="w-8 h-8 mx-auto text-muted-foreground/40 mb-3" />
|
| 139 |
+
<p className="text-sm text-muted-foreground">No documents yet</p>
|
| 140 |
+
<p className="text-xs text-muted-foreground/60 mt-1">Upload a file to get started</p>
|
| 141 |
+
</div>
|
| 142 |
+
) : (
|
| 143 |
+
<div className="space-y-1 pb-3">
|
| 144 |
+
{documents.map((doc) => (
|
| 145 |
+
<button
|
| 146 |
+
key={doc.id}
|
| 147 |
+
onClick={() => doc.status === "ready" && onSelectDoc(doc)}
|
| 148 |
+
className={`w-full text-left p-2.5 rounded-lg transition-all duration-200 group
|
| 149 |
+
${activeDoc?.id === doc.id
|
| 150 |
+
? "bg-primary/15 border border-primary/30"
|
| 151 |
+
: "hover:bg-sidebar-accent border border-transparent"}
|
| 152 |
+
${doc.status !== "ready" ? "opacity-60 cursor-default" : "cursor-pointer"}`}
|
| 153 |
+
>
|
| 154 |
+
<div className="flex items-start gap-2.5">
|
| 155 |
+
{statusIcon(doc.status)}
|
| 156 |
+
<div className="flex-1 min-w-0">
|
| 157 |
+
<p className="text-sm font-medium truncate leading-tight">
|
| 158 |
+
{doc.original_name}
|
| 159 |
+
</p>
|
| 160 |
+
<div className="flex items-center gap-2 mt-1">
|
| 161 |
+
<span className="text-[10px] text-muted-foreground">
|
| 162 |
+
{formatSize(doc.file_size)}
|
| 163 |
+
</span>
|
| 164 |
+
{doc.status === "ready" && (
|
| 165 |
+
<>
|
| 166 |
+
<span className="text-[10px] text-muted-foreground">β’</span>
|
| 167 |
+
<span className="text-[10px] text-muted-foreground">
|
| 168 |
+
{doc.page_count} pg
|
| 169 |
+
</span>
|
| 170 |
+
<span className="text-[10px] text-muted-foreground">β’</span>
|
| 171 |
+
<span className="text-[10px] text-muted-foreground">
|
| 172 |
+
{doc.chunk_count} chunks
|
| 173 |
+
</span>
|
| 174 |
+
</>
|
| 175 |
+
)}
|
| 176 |
+
{doc.status === "processing" && (
|
| 177 |
+
<Badge variant="secondary" className="text-[9px] h-4 px-1.5">
|
| 178 |
+
Processing
|
| 179 |
+
</Badge>
|
| 180 |
+
)}
|
| 181 |
+
{doc.status === "failed" && (
|
| 182 |
+
<Badge variant="destructive" className="text-[9px] h-4 px-1.5">
|
| 183 |
+
Failed
|
| 184 |
+
</Badge>
|
| 185 |
+
)}
|
| 186 |
+
</div>
|
| 187 |
+
</div>
|
| 188 |
+
<Button
|
| 189 |
+
variant="ghost"
|
| 190 |
+
size="icon"
|
| 191 |
+
className="h-6 w-6 opacity-0 group-hover:opacity-100 transition-opacity shrink-0"
|
| 192 |
+
onClick={(e) => handleDelete(doc.id, e)}
|
| 193 |
+
disabled={deleting === doc.id}
|
| 194 |
+
>
|
| 195 |
+
{deleting === doc.id ? (
|
| 196 |
+
<Loader2 className="w-3 h-3 animate-spin" />
|
| 197 |
+
) : (
|
| 198 |
+
<Trash2 className="w-3 h-3 text-destructive" />
|
| 199 |
+
)}
|
| 200 |
+
</Button>
|
| 201 |
+
</div>
|
| 202 |
+
</button>
|
| 203 |
+
))}
|
| 204 |
+
</div>
|
| 205 |
+
)}
|
| 206 |
+
</ScrollArea>
|
| 207 |
+
</div>
|
| 208 |
+
);
|
| 209 |
+
}
|