MHamdan commited on
Commit
76c3b0a
·
1 Parent(s): 4d295a8

Enhance SPARKNET for TTO automation with new scenarios and security features

Browse files

- Update frontend with TTO branding and coverage dashboard
- Add 5 core scenarios: Patent Wake-Up, Agreement Safety, Partner Matching, License Compliance, Award Identification
- Add CriticAgent validation visibility and confidence scoring
- Create License Compliance Monitoring module (scenario3)
- Create Award Identification module (scenario4)
- Add .env.example with comprehensive API key template
- Enhance secrets.toml.example for Streamlit Cloud
- Add SECURITY.md with GDPR compliance documentation
- Add human-in-the-loop decision points and source verification

.env.example ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============================================================================
2
+ # SPARKNET Environment Configuration
3
+ # ============================================================================
4
+ # Copy this file to .env and fill in your API keys
5
+ # NEVER commit .env to version control!
6
+ #
7
+ # For Streamlit Cloud deployment, add these to .streamlit/secrets.toml instead
8
+ # ============================================================================
9
+
10
+ # ============================================================================
11
+ # LLM Provider API Keys (Configure at least one for AI features)
12
+ # ============================================================================
13
+
14
+ # Groq - Fastest inference, 14,400 requests/day free
15
+ # Get key: https://console.groq.com/keys
16
+ GROQ_API_KEY=
17
+
18
+ # Google Gemini/AI Studio - 15 requests/min free
19
+ # Get key: https://aistudio.google.com/apikey
20
+ GOOGLE_API_KEY=
21
+
22
+ # OpenRouter - Access to many free models with single API
23
+ # Get key: https://openrouter.ai/keys
24
+ OPENROUTER_API_KEY=
25
+
26
+ # GitHub Models - Free GPT-4o, Llama 3.1 access
27
+ # Get token: https://github.com/settings/tokens (enable 'models' scope)
28
+ GITHUB_TOKEN=
29
+
30
+ # HuggingFace - Thousands of free models, embeddings
31
+ # Get token: https://huggingface.co/settings/tokens
32
+ HF_TOKEN=
33
+
34
+ # Together AI - $25 free credits
35
+ # Get key: https://www.together.ai/
36
+ TOGETHER_API_KEY=
37
+
38
+ # Mistral AI - Free experiment plan
39
+ # Get key: https://console.mistral.ai/
40
+ MISTRAL_API_KEY=
41
+
42
+ # ============================================================================
43
+ # Premium/Paid Providers (Optional)
44
+ # ============================================================================
45
+
46
+ # OpenAI - For GPT-4, embeddings (paid)
47
+ # Get key: https://platform.openai.com/api-keys
48
+ OPENAI_API_KEY=
49
+
50
+ # Anthropic Claude - For Claude models (paid)
51
+ # Get key: https://console.anthropic.com/
52
+ ANTHROPIC_API_KEY=
53
+
54
+ # ============================================================================
55
+ # Local Inference (Ollama)
56
+ # ============================================================================
57
+
58
+ # Ollama server configuration (default: http://localhost:11434)
59
+ OLLAMA_HOST=http://localhost:11434
60
+ OLLAMA_DEFAULT_MODEL=llama3.2:latest
61
+
62
+ # ============================================================================
63
+ # Vector Store / Database Configuration
64
+ # ============================================================================
65
+
66
+ # ChromaDB settings (local by default)
67
+ CHROMA_PERSIST_DIR=./data/chroma
68
+
69
+ # PostgreSQL (for production deployments)
70
+ # DATABASE_URL=postgresql://user:password@localhost:5432/sparknet
71
+
72
+ # ============================================================================
73
+ # Security & Authentication
74
+ # ============================================================================
75
+
76
+ # Application secret key (generate with: python -c "import secrets; print(secrets.token_hex(32))")
77
+ SECRET_KEY=
78
+
79
+ # Demo authentication password (for Streamlit demo)
80
+ # For production, use proper authentication system
81
+ DEMO_PASSWORD=
82
+
83
+ # ============================================================================
84
+ # GDPR & Data Privacy Configuration
85
+ # ============================================================================
86
+ #
87
+ # IMPORTANT: For EU deployments, ensure compliance with:
88
+ # - GDPR (General Data Protection Regulation)
89
+ # - Law 25 (Quebec privacy law) if applicable
90
+ # - Local data residency requirements
91
+ #
92
+ # Options for private/on-premise deployment:
93
+ # 1. Use Ollama for 100% local inference (no data leaves your network)
94
+ # 2. Configure data retention policies in your database
95
+ # 3. Enable audit logging for data access tracking
96
+ # 4. Implement data anonymization for sensitive documents
97
+ #
98
+ # See SECURITY.md for detailed deployment guidelines
99
+ # ============================================================================
100
+
101
+ # Enable audit logging
102
+ AUDIT_LOG_ENABLED=false
103
+ AUDIT_LOG_PATH=./logs/audit.log
104
+
105
+ # Data retention (days, 0 = indefinite)
106
+ DATA_RETENTION_DAYS=0
107
+
108
+ # Enable PII detection and masking
109
+ PII_DETECTION_ENABLED=false
110
+
111
+ # ============================================================================
112
+ # Feature Flags
113
+ # ============================================================================
114
+
115
+ # Enable experimental features
116
+ ENABLE_EXPERIMENTAL=false
117
+
118
+ # Enable GPU acceleration
119
+ ENABLE_GPU=true
120
+
121
+ # Enable caching
122
+ ENABLE_CACHE=true
123
+ CACHE_TTL_SECONDS=3600
124
+
125
+ # ============================================================================
126
+ # Logging & Monitoring
127
+ # ============================================================================
128
+
129
+ # Log level: DEBUG, INFO, WARNING, ERROR
130
+ LOG_LEVEL=INFO
131
+
132
+ # Sentry DSN for error tracking (optional)
133
+ # SENTRY_DSN=
.streamlit/secrets.toml.example CHANGED
@@ -1,14 +1,139 @@
1
- # SPARKNET Secrets Configuration
2
- # Copy this to secrets.toml (DO NOT commit secrets.toml!)
 
 
 
 
 
 
3
 
4
- # Authentication - set your password
 
 
5
  [auth]
6
- password = "your-secure-password-here"
 
7
 
8
- # Or for multiple users:
9
- # [auth]
10
- # passwords = { admin = "admin123", viewer = "viewer456" }
 
 
11
 
12
- # API Keys (free tiers)
13
- GROQ_API_KEY = "your-groq-key"
14
- HF_TOKEN = "your-huggingface-token"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============================================================================
2
+ # SPARKNET - Streamlit Secrets Configuration
3
+ # ============================================================================
4
+ # Copy this file to secrets.toml (DO NOT commit secrets.toml!)
5
+ # For Streamlit Cloud: Add these via the Streamlit Cloud dashboard
6
+ #
7
+ # VISTA/Horizon EU Project - Technology Transfer Office Automation
8
+ # ============================================================================
9
 
10
+ # ============================================================================
11
+ # Authentication (Required)
12
+ # ============================================================================
13
  [auth]
14
+ # Single user mode
15
+ password = "your-secure-password"
16
 
17
+ # Multi-user mode (uncomment to use):
18
+ # [auth.users]
19
+ # admin = "admin-password-here"
20
+ # viewer = "viewer-password-here"
21
+ # analyst = "analyst-password-here"
22
 
23
+ # ============================================================================
24
+ # LLM Provider API Keys
25
+ # ============================================================================
26
+ # Add only the providers you want to use - system auto-selects best available
27
+ # Priority: Groq > Gemini > OpenRouter > GitHub > Together > Mistral > HuggingFace > Offline
28
+
29
+ # Groq - Fastest inference, 14,400 requests/day free
30
+ # Get key: https://console.groq.com/keys
31
+ GROQ_API_KEY = ""
32
+
33
+ # Google Gemini/AI Studio - 15 requests/min free
34
+ # Get key: https://aistudio.google.com/apikey
35
+ GOOGLE_API_KEY = ""
36
+
37
+ # OpenRouter - Access to many free models with single API
38
+ # Get key: https://openrouter.ai/keys
39
+ OPENROUTER_API_KEY = ""
40
+
41
+ # GitHub Models - Free GPT-4o, Llama 3.1 access
42
+ # Get token: https://github.com/settings/tokens (enable 'models' scope)
43
+ GITHUB_TOKEN = ""
44
+
45
+ # HuggingFace - Thousands of free models, embeddings
46
+ # Get token: https://huggingface.co/settings/tokens
47
+ HF_TOKEN = ""
48
+
49
+ # Together AI - $25 free credits
50
+ # Get key: https://www.together.ai/
51
+ TOGETHER_API_KEY = ""
52
+
53
+ # Mistral AI - Free experiment plan
54
+ # Get key: https://console.mistral.ai/
55
+ MISTRAL_API_KEY = ""
56
+
57
+ # ============================================================================
58
+ # Premium/Paid Providers (Optional)
59
+ # ============================================================================
60
+
61
+ # OpenAI - GPT-4, embeddings (paid)
62
+ # Get key: https://platform.openai.com/api-keys
63
+ OPENAI_API_KEY = ""
64
+
65
+ # Anthropic Claude (paid)
66
+ # Get key: https://console.anthropic.com/
67
+ ANTHROPIC_API_KEY = ""
68
+
69
+ # ============================================================================
70
+ # Database Configuration (Optional - for production)
71
+ # ============================================================================
72
+ [database]
73
+ # PostgreSQL connection (uncomment for production)
74
+ # url = "postgresql://user:password@host:5432/sparknet"
75
+
76
+ # ChromaDB persistence directory
77
+ chroma_persist_dir = "./data/chroma"
78
+
79
+ # ============================================================================
80
+ # Security Configuration
81
+ # ============================================================================
82
+ [security]
83
+ # Secret key for session management (generate with: python -c "import secrets; print(secrets.token_hex(32))")
84
+ secret_key = ""
85
+
86
+ # Enable audit logging
87
+ audit_logging = false
88
+
89
+ # ============================================================================
90
+ # GDPR & Data Privacy
91
+ # ============================================================================
92
+ # IMPORTANT: For EU/VISTA deployments, configure these settings
93
+ [privacy]
94
+ # Data retention in days (0 = indefinite)
95
+ data_retention_days = 0
96
+
97
+ # Enable PII detection and masking
98
+ pii_detection = false
99
+
100
+ # Enable data anonymization for exports
101
+ anonymize_exports = false
102
+
103
+ # ============================================================================
104
+ # Feature Flags
105
+ # ============================================================================
106
+ [features]
107
+ # Enable experimental scenarios
108
+ experimental_scenarios = false
109
+
110
+ # Enable GPU acceleration (requires CUDA)
111
+ gpu_enabled = true
112
+
113
+ # Enable response caching
114
+ caching_enabled = true
115
+ cache_ttl_seconds = 3600
116
+
117
+ # ============================================================================
118
+ # Private Deployment Notes
119
+ # ============================================================================
120
+ # For enterprise/private deployments:
121
+ #
122
+ # 1. LOCAL INFERENCE (Maximum Privacy):
123
+ # - Use Ollama for 100% on-premise inference
124
+ # - No data leaves your network
125
+ # - Set OLLAMA_HOST = "http://localhost:11434"
126
+ #
127
+ # 2. HYBRID DEPLOYMENT:
128
+ # - Use local Ollama for sensitive documents
129
+ # - Use cloud LLMs for non-sensitive queries
130
+ # - Configure document classification rules
131
+ #
132
+ # 3. CLOUD DEPLOYMENT (Streamlit Cloud):
133
+ # - Use secrets management (this file)
134
+ # - Enable audit logging
135
+ # - Configure data retention policies
136
+ # - Review GDPR compliance checklist
137
+ #
138
+ # See DEPLOYMENT.md for detailed instructions
139
+ # ============================================================================
SECURITY.md ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPARKNET Security Documentation
2
+
3
+ This document outlines security considerations, deployment options, and compliance
4
+ guidelines for the SPARKNET AI-Powered Technology Transfer Office Automation Platform.
5
+
6
+ ## Overview
7
+
8
+ SPARKNET handles sensitive data including:
9
+ - Patent documents and IP information
10
+ - License agreements and financial terms
11
+ - Partner/stakeholder contact information
12
+ - Research data and findings
13
+
14
+ Proper security measures are essential for production deployments.
15
+
16
+ ---
17
+
18
+ ## Deployment Options
19
+
20
+ ### 1. Fully Local Deployment (Maximum Privacy)
21
+
22
+ **Recommended for:** Organizations with strict data sovereignty requirements, classified research, or GDPR Article 17 obligations.
23
+
24
+ ```
25
+ ┌─────────────────────────────────────────────────────────────┐
26
+ │ Your Private Network │
27
+ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────┐ │
28
+ │ │ SPARKNET │──│ Ollama │──│ Local Vector Store │ │
29
+ │ │ (Streamlit)│ │ (LLM) │ │ (ChromaDB) │ │
30
+ │ └─────────────┘ └─────────────┘ └─────────────────────┘ │
31
+ │ │ │
32
+ │ ┌─────────────┐ ┌─────────────────────────────────────┐ │
33
+ │ │ PostgreSQL │ │ Document Storage (NFS/S3-compat) │ │
34
+ │ │ (metadata) │ │ │ │
35
+ │ └─────────────┘ └─────────────────────────────────────┘ │
36
+ └─────────────────────────────────────────────────────────────┘
37
+ ```
38
+
39
+ **Configuration:**
40
+ - Set no cloud API keys in `.env`
41
+ - System automatically uses Ollama for all inference
42
+ - All data remains within your network
43
+ - No external API calls for LLM inference
44
+
45
+ **Setup:**
46
+ ```bash
47
+ # Install Ollama
48
+ curl -fsSL https://ollama.com/install.sh | sh
49
+
50
+ # Pull required models
51
+ ollama pull llama3.2:latest
52
+ ollama pull nomic-embed-text
53
+
54
+ # Configure SPARKNET
55
+ cp .env.example .env
56
+ # Leave cloud API keys empty
57
+
58
+ # Run
59
+ streamlit run demo/app.py
60
+ ```
61
+
62
+ ### 2. Hybrid Deployment (Balanced)
63
+
64
+ **Recommended for:** Organizations that want cloud LLM capabilities for non-sensitive operations while keeping sensitive data local.
65
+
66
+ ```
67
+ ┌─────────────────────────────────────────────────────────────┐
68
+ │ Your Private Network │
69
+ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────┐ │
70
+ │ │ SPARKNET │──│ Ollama │──│ Document Storage │ │
71
+ │ │ (Streamlit)│ │ (Sensitive)│ │ (Encrypted) │ │
72
+ │ └──────┬──────┘ └─────────────┘ └─────────────────────┘ │
73
+ └─────────│───────────────────────────────────────────────────┘
74
+
75
+ │ (Non-sensitive queries only)
76
+
77
+ ┌─────────────────────────────────────────────────────────────┐
78
+ │ Cloud LLM Providers │
79
+ │ ┌─────────┐ ┌─────────┐ ┌─────────────┐ ┌───────────┐ │
80
+ │ │ Groq │ │ Gemini │ │ OpenRouter │ │ GitHub │ │
81
+ │ └─────────┘ └─────────┘ └─────────────┘ └───────────┘ │
82
+ └─────────────────────────────────────────────────────────────┘
83
+ ```
84
+
85
+ **Configuration:**
86
+ - Configure cloud API keys for general queries
87
+ - Use document sensitivity classification
88
+ - Route sensitive documents to local Ollama
89
+ - Implement data anonymization for cloud queries
90
+
91
+ ### 3. Cloud Deployment (Streamlit Cloud)
92
+
93
+ **Recommended for:** Public demos, non-sensitive research, or when local infrastructure is not available.
94
+
95
+ **Configuration:**
96
+ ```toml
97
+ # .streamlit/secrets.toml
98
+ [auth]
99
+ password = "your-secure-password"
100
+
101
+ GROQ_API_KEY = "your-key"
102
+ GOOGLE_API_KEY = "your-key"
103
+ ```
104
+
105
+ **Security Checklist:**
106
+ - [ ] Use secrets management (never commit API keys)
107
+ - [ ] Enable authentication
108
+ - [ ] Review provider data processing policies
109
+ - [ ] Consider data anonymization
110
+ - [ ] Implement session timeouts
111
+
112
+ ---
113
+
114
+ ## GDPR Compliance
115
+
116
+ ### Data Processing Principles
117
+
118
+ SPARKNET is designed to support GDPR compliance:
119
+
120
+ 1. **Lawfulness, Fairness, Transparency**
121
+ - Document all data processing activities
122
+ - Obtain appropriate consent for personal data
123
+ - Provide clear privacy notices
124
+
125
+ 2. **Purpose Limitation**
126
+ - Use data only for stated TTO purposes
127
+ - Do not repurpose data without consent
128
+
129
+ 3. **Data Minimization**
130
+ - Only process necessary data
131
+ - Anonymize data when possible
132
+ - Implement data retention policies
133
+
134
+ 4. **Accuracy**
135
+ - CriticAgent validation helps ensure accuracy
136
+ - Human-in-the-loop for critical decisions
137
+ - Source verification for claims
138
+
139
+ 5. **Storage Limitation**
140
+ - Configure `DATA_RETENTION_DAYS` in `.env`
141
+ - Implement automatic data purging
142
+ - Support data deletion requests
143
+
144
+ 6. **Integrity and Confidentiality**
145
+ - Encrypt data at rest
146
+ - Use TLS for data in transit
147
+ - Implement access controls
148
+
149
+ ### Data Subject Rights
150
+
151
+ Support for GDPR data subject rights:
152
+
153
+ | Right | Implementation |
154
+ |-------|----------------|
155
+ | Access | Export function for user data |
156
+ | Rectification | Edit capabilities in UI |
157
+ | Erasure | Delete user data on request |
158
+ | Portability | JSON/CSV export options |
159
+ | Objection | Opt-out from AI processing |
160
+
161
+ ### Cross-Border Data Transfers
162
+
163
+ When using cloud LLM providers:
164
+
165
+ 1. **EU-US Data Transfers:**
166
+ - Review provider's Data Processing Agreement
167
+ - Ensure Standard Contractual Clauses in place
168
+ - Consider EU-hosted alternatives
169
+
170
+ 2. **Recommended Approach:**
171
+ - Use Ollama for EU data residency
172
+ - Anonymize data before cloud API calls
173
+ - Implement geographic routing
174
+
175
+ ---
176
+
177
+ ## Security Best Practices
178
+
179
+ ### API Key Management
180
+
181
+ ```python
182
+ # GOOD: Load from environment/secrets
183
+ api_key = os.environ.get("GROQ_API_KEY")
184
+ # or
185
+ api_key = st.secrets.get("GROQ_API_KEY")
186
+
187
+ # BAD: Hardcoded keys
188
+ api_key = "gsk_abc123..." # NEVER DO THIS
189
+ ```
190
+
191
+ ### Authentication
192
+
193
+ Configure authentication in `.streamlit/secrets.toml`:
194
+
195
+ ```toml
196
+ [auth]
197
+ # Single user
198
+ password = "strong-password-here"
199
+
200
+ # Multi-user
201
+ [auth.users]
202
+ admin = "admin-password"
203
+ analyst = "analyst-password"
204
+ viewer = "viewer-password"
205
+ ```
206
+
207
+ ### Audit Logging
208
+
209
+ Enable audit logging for compliance:
210
+
211
+ ```env
212
+ AUDIT_LOG_ENABLED=true
213
+ AUDIT_LOG_PATH=./logs/audit.log
214
+ ```
215
+
216
+ Audit log includes:
217
+ - User authentication events
218
+ - Document access
219
+ - AI query/response pairs
220
+ - Decision point approvals
221
+
222
+ ### Network Security
223
+
224
+ For production deployments:
225
+
226
+ 1. **Firewall Rules:**
227
+ - Restrict Ollama to internal network
228
+ - Limit database access to app servers
229
+ - Use VPN for remote access
230
+
231
+ 2. **TLS/SSL:**
232
+ - Enable HTTPS for Streamlit
233
+ - Use encrypted database connections
234
+ - Secure WebSocket connections
235
+
236
+ 3. **Access Control:**
237
+ - Implement role-based access
238
+ - Use IP allowlisting where possible
239
+ - Enable MFA for admin access
240
+
241
+ ---
242
+
243
+ ## Sensitive Data Handling
244
+
245
+ ### Document Classification
246
+
247
+ SPARKNET can classify documents by sensitivity:
248
+
249
+ | Level | Description | Handling |
250
+ |-------|-------------|----------|
251
+ | Public | Non-confidential | Cloud LLM allowed |
252
+ | Internal | Business confidential | Prefer local |
253
+ | Confidential | Sensitive business | Local only |
254
+ | Restricted | Highly sensitive | Local + encryption |
255
+
256
+ ### PII Detection
257
+
258
+ Enable PII detection:
259
+
260
+ ```env
261
+ PII_DETECTION_ENABLED=true
262
+ ```
263
+
264
+ Detected PII types:
265
+ - Names (persons)
266
+ - Email addresses
267
+ - Phone numbers
268
+ - Addresses
269
+ - ID numbers
270
+
271
+ ### Data Anonymization
272
+
273
+ For cloud API calls, implement anonymization:
274
+
275
+ ```python
276
+ # Pseudonymization example
277
+ text = text.replace(real_name, "[PERSON_1]")
278
+ text = text.replace(company_name, "[COMPANY_1]")
279
+ ```
280
+
281
+ ---
282
+
283
+ ## Incident Response
284
+
285
+ ### Security Incident Procedure
286
+
287
+ 1. **Detection:** Monitor audit logs and alerts
288
+ 2. **Containment:** Isolate affected systems
289
+ 3. **Investigation:** Determine scope and impact
290
+ 4. **Notification:** Inform stakeholders (72h for GDPR)
291
+ 5. **Recovery:** Restore from clean backups
292
+ 6. **Lessons Learned:** Update security measures
293
+
294
+ ### Contact
295
+
296
+ For security issues:
297
+ - Review issue privately before public disclosure
298
+ - Report to project maintainers
299
+ - Follow responsible disclosure practices
300
+
301
+ ---
302
+
303
+ ## Compliance Checklist
304
+
305
+ ### Pre-Deployment
306
+
307
+ - [ ] API keys stored in secrets management
308
+ - [ ] Authentication configured
309
+ - [ ] Audit logging enabled
310
+ - [ ] Data retention policy defined
311
+ - [ ] Backup strategy implemented
312
+ - [ ] Network security reviewed
313
+
314
+ ### GDPR Compliance
315
+
316
+ - [ ] Data processing register updated
317
+ - [ ] Privacy notice published
318
+ - [ ] Data subject rights procedures in place
319
+ - [ ] Cross-border transfer safeguards
320
+ - [ ] Data Protection Impact Assessment (if required)
321
+
322
+ ### Ongoing
323
+
324
+ - [ ] Regular security audits
325
+ - [ ] Log review and monitoring
326
+ - [ ] Access control review
327
+ - [ ] Incident response testing
328
+ - [ ] Staff security training
329
+
330
+ ---
331
+
332
+ ## Additional Resources
333
+
334
+ - [GDPR Official Text](https://gdpr.eu/)
335
+ - [Ollama Documentation](https://ollama.com/)
336
+ - [Streamlit Security](https://docs.streamlit.io/deploy/streamlit-community-cloud/security)
337
+ - [OWASP Top 10](https://owasp.org/Top10/)
338
+
339
+ ---
340
+
341
+ *SPARKNET - VISTA/Horizon EU Project*
342
+ *Last Updated: 2025*
demo/app.py CHANGED
@@ -1,12 +1,24 @@
1
  """
2
- SPARKNET Demo Application
3
-
4
- A Streamlit-based demo showcasing:
5
- - Document Processing Pipeline
6
- - Field Extraction with Evidence
7
- - RAG Search and Q&A
8
- - Document Classification
9
- - Evidence Visualization
 
 
 
 
 
 
 
 
 
 
 
 
10
  """
11
 
12
  import streamlit as st
@@ -23,10 +35,13 @@ sys.path.insert(0, str(PROJECT_ROOT))
23
 
24
  # Page configuration - MUST be first Streamlit command
25
  st.set_page_config(
26
- page_title="SPARKNET Document Intelligence",
27
  page_icon="🔥",
28
  layout="wide",
29
  initial_sidebar_state="expanded",
 
 
 
30
  )
31
 
32
  # Authentication - require login before showing app
@@ -85,6 +100,95 @@ st.markdown("""
85
  background-color: #f0f2f6;
86
  border-radius: 8px;
87
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  </style>
89
  """, unsafe_allow_html=True)
90
 
@@ -107,14 +211,217 @@ def format_confidence(confidence: float) -> str:
107
  return f'<span class="confidence-low">{confidence:.1%}</span>'
108
 
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  def render_header():
111
- """Render the main header."""
112
  col1, col2 = st.columns([3, 1])
113
  with col1:
114
  st.markdown('<div class="main-header">🔥 SPARKNET</div>', unsafe_allow_html=True)
115
- st.markdown('<div class="sub-header">Agentic Document Intelligence Platform</div>', unsafe_allow_html=True)
 
 
 
 
 
 
 
116
  with col2:
117
- st.image("https://img.shields.io/badge/version-0.1.0-blue", width=100)
 
 
 
 
 
 
 
 
118
 
119
 
120
  def render_sidebar():
@@ -174,31 +481,222 @@ def check_chromadb_status():
174
 
175
 
176
  def render_home_page():
177
- """Render the home page."""
178
- st.markdown("## Welcome to SPARKNET")
179
 
180
  st.markdown("""
181
- SPARKNET is an enterprise-grade **Agentic Document Intelligence Platform** that combines:
182
-
183
- - **📄 Document Processing**: OCR with PaddleOCR/Tesseract, layout detection, semantic chunking
184
- - **🔍 RAG Subsystem**: Vector search with ChromaDB, grounded retrieval with citations
185
- - **🤖 Multi-Agent System**: ReAct-style agents with tool use and validation
186
- - **🏠 Local-First**: Privacy-preserving inference via Ollama
187
- - **📎 Evidence Grounding**: Every extraction includes bbox, page, chunk_id references
188
  """)
189
 
190
  st.markdown("---")
191
 
192
- # Feature cards
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  col1, col2, col3, col4 = st.columns(4)
194
 
195
  with col1:
196
  st.markdown("""
197
  <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
198
  border-radius: 12px; padding: 1.5rem; color: white; text-align: center;">
199
- <h3>📄</h3>
200
- <h4>Document Processing</h4>
201
- <p style="font-size: 0.9rem;">OCR, Layout Detection, Chunking</p>
202
  </div>
203
  """, unsafe_allow_html=True)
204
 
@@ -206,9 +704,9 @@ def render_home_page():
206
  st.markdown("""
207
  <div style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
208
  border-radius: 12px; padding: 1.5rem; color: white; text-align: center;">
209
- <h3>🔍</h3>
210
- <h4>Field Extraction</h4>
211
- <p style="font-size: 0.9rem;">Structured Data with Evidence</p>
212
  </div>
213
  """, unsafe_allow_html=True)
214
 
@@ -216,9 +714,9 @@ def render_home_page():
216
  st.markdown("""
217
  <div style="background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
218
  border-radius: 12px; padding: 1.5rem; color: white; text-align: center;">
219
- <h3>💬</h3>
220
- <h4>RAG Q&A</h4>
221
- <p style="font-size: 0.9rem;">Grounded Answers with Citations</p>
222
  </div>
223
  """, unsafe_allow_html=True)
224
 
@@ -226,29 +724,34 @@ def render_home_page():
226
  st.markdown("""
227
  <div style="background: linear-gradient(135deg, #43e97b 0%, #38f9d7 100%);
228
  border-radius: 12px; padding: 1.5rem; color: white; text-align: center;">
229
- <h3>🏷️</h3>
230
- <h4>Classification</h4>
231
- <p style="font-size: 0.9rem;">Document Type Detection</p>
232
  </div>
233
  """, unsafe_allow_html=True)
234
 
235
  st.markdown("---")
236
 
237
  # Quick start
238
- st.markdown("### Quick Start")
239
 
240
- with st.expander("📚 How to Use This Demo", expanded=True):
241
  st.markdown("""
242
- 1. **Document Processing**: Upload or select a PDF to process with OCR
243
- 2. **Field Extraction**: Define fields to extract with evidence grounding
244
- 3. **RAG Q&A**: Ask questions about indexed documents
245
- 4. **Classification**: Automatically classify document types
246
 
247
- **Sample Documents**: The demo includes real patent documents from major tech companies.
 
 
 
 
 
248
  """)
249
 
250
  # Sample documents preview
251
- st.markdown("### Available Sample Documents")
252
  docs = get_sample_documents()
253
 
254
  if docs:
@@ -624,20 +1127,42 @@ def extract_fields_demo(doc_name, fields, validate, include_evidence):
624
 
625
  st.markdown("")
626
 
627
- # Validation results
628
  if validate:
629
  st.markdown("---")
630
- st.markdown("### Validation Results")
631
 
632
- col1, col2, col3 = st.columns(3)
633
- with col1:
634
- st.metric("Fields Validated", len(fields))
635
- with col2:
636
- st.metric("Valid", len(fields) - 1)
637
- with col3:
638
- st.metric("Uncertain", 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
639
 
640
- st.info("💡 Critic validation: All fields have supporting evidence in the document.")
 
641
 
642
 
643
  def render_rag_page():
@@ -942,9 +1467,12 @@ def main():
942
  # Footer
943
  st.markdown("---")
944
  st.markdown(
945
- "<div style='text-align: center; color: #666;'>"
946
- "🔥 SPARKNET Document Intelligence Platform | Built with Streamlit"
947
- "</div>",
 
 
 
948
  unsafe_allow_html=True,
949
  )
950
 
 
1
  """
2
+ SPARKNET - AI-Powered Technology Transfer Office (TTO) Automation Platform
3
+
4
+ A comprehensive Streamlit-based platform for research valorization and IP management:
5
+
6
+ CORE TTO SCENARIOS:
7
+ 1. Patent Wake-Up: Transform dormant patents into commercialization opportunities
8
+ 2. Agreement Safety: AI-assisted legal document review with risk detection
9
+ 3. Partner Matching: Intelligent stakeholder matching for technology transfer
10
+ 4. License Compliance Monitoring: Payment tracking, milestone verification, revenue alerts
11
+ 5. Award Identification: Funding opportunity scanning and nomination assistance
12
+
13
+ FEATURES:
14
+ - Multi-agent AI orchestration with CriticAgent validation
15
+ - Document Intelligence with evidence grounding
16
+ - RAG-powered search and Q&A with source verification
17
+ - Confidence scoring and hallucination mitigation
18
+ - Human-in-the-loop decision points
19
+ - GDPR-compliant data handling options
20
+
21
+ VISTA/Horizon EU Project - Supporting European research valorization
22
  """
23
 
24
  import streamlit as st
 
35
 
36
  # Page configuration - MUST be first Streamlit command
37
  st.set_page_config(
38
+ page_title="SPARKNET - TTO Automation Platform",
39
  page_icon="🔥",
40
  layout="wide",
41
  initial_sidebar_state="expanded",
42
+ menu_items={
43
+ 'About': "SPARKNET: AI-Powered Technology Transfer Office Automation\n\nVISTA/Horizon EU Project"
44
+ }
45
  )
46
 
47
  # Authentication - require login before showing app
 
100
  background-color: #f0f2f6;
101
  border-radius: 8px;
102
  }
103
+ /* Coverage badges */
104
+ .coverage-full {
105
+ background: linear-gradient(135deg, #22c55e 0%, #16a34a 100%);
106
+ color: white;
107
+ padding: 0.3rem 0.8rem;
108
+ border-radius: 20px;
109
+ font-size: 0.75rem;
110
+ font-weight: bold;
111
+ }
112
+ .coverage-partial {
113
+ background: linear-gradient(135deg, #eab308 0%, #ca8a04 100%);
114
+ color: white;
115
+ padding: 0.3rem 0.8rem;
116
+ border-radius: 20px;
117
+ font-size: 0.75rem;
118
+ font-weight: bold;
119
+ }
120
+ .coverage-none {
121
+ background: linear-gradient(135deg, #94a3b8 0%, #64748b 100%);
122
+ color: white;
123
+ padding: 0.3rem 0.8rem;
124
+ border-radius: 20px;
125
+ font-size: 0.75rem;
126
+ font-weight: bold;
127
+ }
128
+ /* EU/VISTA badges */
129
+ .eu-badge {
130
+ background: linear-gradient(135deg, #003399 0%, #0052cc 100%);
131
+ color: #ffcc00;
132
+ padding: 0.4rem 1rem;
133
+ border-radius: 8px;
134
+ font-size: 0.8rem;
135
+ font-weight: bold;
136
+ display: inline-block;
137
+ margin: 0.2rem;
138
+ }
139
+ .vista-badge {
140
+ background: linear-gradient(135deg, #7c3aed 0%, #a855f7 100%);
141
+ color: white;
142
+ padding: 0.4rem 1rem;
143
+ border-radius: 8px;
144
+ font-size: 0.8rem;
145
+ font-weight: bold;
146
+ display: inline-block;
147
+ margin: 0.2rem;
148
+ }
149
+ /* Scenario cards */
150
+ .scenario-card {
151
+ background: white;
152
+ border: 1px solid #e5e7eb;
153
+ border-radius: 12px;
154
+ padding: 1.2rem;
155
+ margin: 0.5rem 0;
156
+ box-shadow: 0 2px 4px rgba(0,0,0,0.05);
157
+ transition: transform 0.2s, box-shadow 0.2s;
158
+ }
159
+ .scenario-card:hover {
160
+ transform: translateY(-2px);
161
+ box-shadow: 0 4px 12px rgba(0,0,0,0.1);
162
+ }
163
+ /* Validation indicator */
164
+ .validation-indicator {
165
+ display: inline-flex;
166
+ align-items: center;
167
+ gap: 0.5rem;
168
+ padding: 0.3rem 0.8rem;
169
+ border-radius: 6px;
170
+ font-size: 0.85rem;
171
+ }
172
+ .validation-pass {
173
+ background: #dcfce7;
174
+ color: #166534;
175
+ }
176
+ .validation-warn {
177
+ background: #fef3c7;
178
+ color: #92400e;
179
+ }
180
+ .validation-fail {
181
+ background: #fecaca;
182
+ color: #991b1b;
183
+ }
184
+ /* Human-in-the-loop button */
185
+ .hitl-prompt {
186
+ background: linear-gradient(135deg, #fef3c7 0%, #fde68a 100%);
187
+ border: 2px solid #f59e0b;
188
+ border-radius: 8px;
189
+ padding: 1rem;
190
+ margin: 1rem 0;
191
+ }
192
  </style>
193
  """, unsafe_allow_html=True)
194
 
 
211
  return f'<span class="confidence-low">{confidence:.1%}</span>'
212
 
213
 
214
+ def render_critic_validation(validation_result: dict) -> None:
215
+ """
216
+ Render CriticAgent validation results in the UI.
217
+
218
+ Displays validation scores, issues, and suggestions
219
+ with clear visual indicators.
220
+ """
221
+ overall_score = validation_result.get("overall_score", 0.0)
222
+ is_valid = validation_result.get("valid", False)
223
+ dimension_scores = validation_result.get("dimension_scores", {})
224
+ issues = validation_result.get("issues", [])
225
+ suggestions = validation_result.get("suggestions", [])
226
+
227
+ # Overall validation status
228
+ if is_valid and overall_score >= 0.85:
229
+ status_class = "validation-pass"
230
+ status_icon = "✓"
231
+ status_text = "Validated"
232
+ elif overall_score >= 0.6:
233
+ status_class = "validation-warn"
234
+ status_icon = "⚠"
235
+ status_text = "Review Recommended"
236
+ else:
237
+ status_class = "validation-fail"
238
+ status_icon = "✗"
239
+ status_text = "Validation Failed"
240
+
241
+ st.markdown(f"""
242
+ <div style="background: #f8fafc; border-radius: 12px; padding: 1rem; margin: 1rem 0; border: 1px solid #e2e8f0;">
243
+ <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem;">
244
+ <h4 style="margin: 0;">🛡️ CriticAgent Validation</h4>
245
+ <span class="validation-indicator {status_class}">{status_icon} {status_text}</span>
246
+ </div>
247
+ <div style="display: flex; gap: 1rem; flex-wrap: wrap;">
248
+ <div style="flex: 1; min-width: 200px;">
249
+ <strong>Overall Score</strong>
250
+ <div style="font-size: 2rem; font-weight: bold; color: {'#22c55e' if overall_score >= 0.8 else '#eab308' if overall_score >= 0.6 else '#ef4444'};">
251
+ {overall_score:.0%}
252
+ </div>
253
+ </div>
254
+ """, unsafe_allow_html=True)
255
+
256
+ # Dimension scores
257
+ if dimension_scores:
258
+ st.markdown("**Quality Dimensions:**")
259
+ cols = st.columns(len(dimension_scores))
260
+ for i, (dim, score) in enumerate(dimension_scores.items()):
261
+ with cols[i]:
262
+ st.metric(
263
+ dim.replace("_", " ").title(),
264
+ f"{score:.0%}",
265
+ delta=None,
266
+ )
267
+
268
+ # Issues
269
+ if issues:
270
+ with st.expander("⚠️ Issues Found", expanded=len(issues) <= 3):
271
+ for issue in issues:
272
+ st.markdown(f"- {issue}")
273
+
274
+ # Suggestions
275
+ if suggestions:
276
+ with st.expander("💡 Improvement Suggestions"):
277
+ for suggestion in suggestions:
278
+ st.markdown(f"- {suggestion}")
279
+
280
+ st.markdown("</div></div>", unsafe_allow_html=True)
281
+
282
+
283
+ def render_source_verification(sources: list, claim: str = "") -> None:
284
+ """
285
+ Render source verification for hallucination mitigation.
286
+
287
+ Shows the sources used to generate AI responses with
288
+ verification status.
289
+ """
290
+ st.markdown("""
291
+ <div style="background: #f0fdf4; border-radius: 8px; padding: 1rem; border: 1px solid #bbf7d0;">
292
+ <h5 style="margin: 0 0 0.5rem 0;">📎 Source Verification</h5>
293
+ """, unsafe_allow_html=True)
294
+
295
+ if sources:
296
+ verified_count = sum(1 for s in sources if s.get("verified", False))
297
+ total_count = len(sources)
298
+
299
+ st.markdown(f"""
300
+ <div style="margin-bottom: 0.5rem;">
301
+ <span style="color: #166534;">✓ {verified_count}/{total_count} sources verified</span>
302
+ </div>
303
+ """, unsafe_allow_html=True)
304
+
305
+ for i, source in enumerate(sources[:5]): # Show top 5 sources
306
+ verified = source.get("verified", False)
307
+ page = source.get("page", "N/A")
308
+ snippet = source.get("snippet", "")[:100]
309
+ confidence = source.get("confidence", 0.0)
310
+
311
+ st.markdown(f"""
312
+ <div style="background: white; border-radius: 4px; padding: 0.5rem; margin: 0.3rem 0; border-left: 3px solid {'#22c55e' if verified else '#eab308'};">
313
+ <small>
314
+ <strong>[{i+1}]</strong> Page {page} | Confidence: {confidence:.0%}
315
+ {' ✓' if verified else ' ⚠'}
316
+ <br>
317
+ <em>"{snippet}..."</em>
318
+ </small>
319
+ </div>
320
+ """, unsafe_allow_html=True)
321
+ else:
322
+ st.markdown("""
323
+ <p style="color: #666; margin: 0;">No source verification available for this response.</p>
324
+ """, unsafe_allow_html=True)
325
+
326
+ st.markdown("</div>", unsafe_allow_html=True)
327
+
328
+
329
+ def render_human_decision_point(
330
+ question: str,
331
+ options: list,
332
+ ai_recommendation: str = None,
333
+ ai_confidence: float = None,
334
+ ) -> str:
335
+ """
336
+ Render a human-in-the-loop decision point.
337
+
338
+ Shows AI recommendation but requires human approval
339
+ for critical decisions.
340
+
341
+ Returns:
342
+ Selected option from human
343
+ """
344
+ st.markdown("""
345
+ <div class="hitl-prompt">
346
+ <h4 style="margin: 0 0 0.5rem 0;">👤 Human Decision Required</h4>
347
+ """, unsafe_allow_html=True)
348
+
349
+ st.markdown(f"**{question}**")
350
+
351
+ if ai_recommendation and ai_confidence:
352
+ st.markdown(f"""
353
+ <div style="background: white; border-radius: 4px; padding: 0.5rem; margin: 0.5rem 0;">
354
+ <small>
355
+ <strong>AI Recommendation:</strong> {ai_recommendation}
356
+ (Confidence: {ai_confidence:.0%})
357
+ </small>
358
+ </div>
359
+ """, unsafe_allow_html=True)
360
+
361
+ selected = st.radio(
362
+ "Your decision:",
363
+ options,
364
+ label_visibility="collapsed",
365
+ key=f"hitl_{hash(question)}",
366
+ )
367
+
368
+ st.markdown("</div>", unsafe_allow_html=True)
369
+
370
+ return selected
371
+
372
+
373
+ def render_confidence_indicator(confidence: float, label: str = "Confidence") -> None:
374
+ """
375
+ Render a visual confidence indicator.
376
+
377
+ Shows confidence as a progress bar with color coding.
378
+ """
379
+ if confidence >= 0.8:
380
+ color = "#22c55e"
381
+ status = "High"
382
+ elif confidence >= 0.6:
383
+ color = "#eab308"
384
+ status = "Medium"
385
+ else:
386
+ color = "#ef4444"
387
+ status = "Low"
388
+
389
+ st.markdown(f"""
390
+ <div style="margin: 0.5rem 0;">
391
+ <div style="display: flex; justify-content: space-between; margin-bottom: 0.3rem;">
392
+ <small><strong>{label}</strong></small>
393
+ <small style="color: {color};">{status} ({confidence:.0%})</small>
394
+ </div>
395
+ <div style="background: #e5e7eb; border-radius: 4px; height: 8px; overflow: hidden;">
396
+ <div style="background: {color}; width: {confidence*100}%; height: 100%;"></div>
397
+ </div>
398
+ </div>
399
+ """, unsafe_allow_html=True)
400
+
401
+
402
  def render_header():
403
+ """Render the main header with TTO branding and EU badges."""
404
  col1, col2 = st.columns([3, 1])
405
  with col1:
406
  st.markdown('<div class="main-header">🔥 SPARKNET</div>', unsafe_allow_html=True)
407
+ st.markdown('<div class="sub-header">AI-Powered Technology Transfer Office Automation Platform</div>', unsafe_allow_html=True)
408
+ # EU/VISTA alignment badges
409
+ st.markdown('''
410
+ <div style="margin-top: 0.5rem;">
411
+ <span class="vista-badge">VISTA Project</span>
412
+ <span class="eu-badge">Horizon EU</span>
413
+ </div>
414
+ ''', unsafe_allow_html=True)
415
  with col2:
416
+ st.markdown('''
417
+ <div style="text-align: right;">
418
+ <img src="https://img.shields.io/badge/version-1.0.0-blue" style="margin: 2px;">
419
+ <br>
420
+ <img src="https://img.shields.io/badge/scenarios-5-green" style="margin: 2px;">
421
+ <br>
422
+ <img src="https://img.shields.io/badge/status-production-success" style="margin: 2px;">
423
+ </div>
424
+ ''', unsafe_allow_html=True)
425
 
426
 
427
  def render_sidebar():
 
481
 
482
 
483
  def render_home_page():
484
+ """Render the TTO dashboard home page with scenarios and coverage metrics."""
485
+ st.markdown("## Technology Transfer Office Dashboard")
486
 
487
  st.markdown("""
488
+ SPARKNET is a comprehensive **AI-Powered Technology Transfer Office (TTO) Automation Platform**
489
+ designed for research valorization and IP management. Built for the VISTA/Horizon EU project,
490
+ it combines multi-agent AI orchestration with document intelligence to automate key TTO workflows.
 
 
 
 
491
  """)
492
 
493
  st.markdown("---")
494
 
495
+ # =========================================================================
496
+ # COVERAGE METRICS DASHBOARD
497
+ # =========================================================================
498
+ st.markdown("### 📊 TTO Task Coverage Dashboard")
499
+
500
+ col1, col2, col3 = st.columns(3)
501
+
502
+ with col1:
503
+ st.markdown("""
504
+ <div style="background: linear-gradient(135deg, #22c55e 0%, #16a34a 100%);
505
+ border-radius: 12px; padding: 1.5rem; color: white; text-align: center;">
506
+ <h1 style="margin: 0; font-size: 3rem;">3</h1>
507
+ <h4 style="margin: 0.5rem 0;">Fully Covered</h4>
508
+ <p style="font-size: 0.85rem; opacity: 0.9;">Production-ready scenarios</p>
509
+ </div>
510
+ """, unsafe_allow_html=True)
511
+
512
+ with col2:
513
+ st.markdown("""
514
+ <div style="background: linear-gradient(135deg, #eab308 0%, #ca8a04 100%);
515
+ border-radius: 12px; padding: 1.5rem; color: white; text-align: center;">
516
+ <h1 style="margin: 0; font-size: 3rem;">5</h1>
517
+ <h4 style="margin: 0.5rem 0;">Partially Covered</h4>
518
+ <p style="font-size: 0.85rem; opacity: 0.9;">In development</p>
519
+ </div>
520
+ """, unsafe_allow_html=True)
521
+
522
+ with col3:
523
+ st.markdown("""
524
+ <div style="background: linear-gradient(135deg, #94a3b8 0%, #64748b 100%);
525
+ border-radius: 12px; padding: 1.5rem; color: white; text-align: center;">
526
+ <h1 style="margin: 0; font-size: 3rem;">2</h1>
527
+ <h4 style="margin: 0.5rem 0;">Not Covered</h4>
528
+ <p style="font-size: 0.85rem; opacity: 0.9;">Planned for future</p>
529
+ </div>
530
+ """, unsafe_allow_html=True)
531
+
532
+ st.markdown("---")
533
+
534
+ # =========================================================================
535
+ # CORE TTO SCENARIOS
536
+ # =========================================================================
537
+ st.markdown("### 🎯 Core TTO Scenarios")
538
+
539
+ # Fully Covered Scenarios
540
+ st.markdown("#### Fully Implemented")
541
+ col1, col2, col3 = st.columns(3)
542
+
543
+ with col1:
544
+ st.markdown("""
545
+ <div class="scenario-card">
546
+ <div style="display: flex; justify-content: space-between; align-items: start;">
547
+ <h4 style="margin: 0;">💡 Patent Wake-Up</h4>
548
+ <span class="coverage-full">LIVE</span>
549
+ </div>
550
+ <p style="color: #666; margin: 0.5rem 0;">Transform dormant patents into commercialization opportunities</p>
551
+ <hr style="margin: 0.8rem 0; opacity: 0.3;">
552
+ <small>
553
+ <strong>Features:</strong><br>
554
+ • TRL Assessment<br>
555
+ • Market Analysis<br>
556
+ • Partner Matching<br>
557
+ • Valorization Brief Generation
558
+ </small>
559
+ <div style="margin-top: 0.8rem;">
560
+ <span class="vista-badge" style="font-size: 0.7rem; padding: 0.2rem 0.5rem;">VISTA Aligned</span>
561
+ </div>
562
+ </div>
563
+ """, unsafe_allow_html=True)
564
+
565
+ with col2:
566
+ st.markdown("""
567
+ <div class="scenario-card">
568
+ <div style="display: flex; justify-content: space-between; align-items: start;">
569
+ <h4 style="margin: 0;">⚖️ Agreement Safety</h4>
570
+ <span class="coverage-full">LIVE</span>
571
+ </div>
572
+ <p style="color: #666; margin: 0.5rem 0;">AI-assisted legal document review with risk detection</p>
573
+ <hr style="margin: 0.8rem 0; opacity: 0.3;">
574
+ <small>
575
+ <strong>Features:</strong><br>
576
+ • Risk Clause Detection<br>
577
+ • GDPR Compliance Check<br>
578
+ • Law 25 Alignment<br>
579
+ • Remediation Suggestions
580
+ </small>
581
+ <div style="margin-top: 0.8rem;">
582
+ <span class="eu-badge" style="font-size: 0.7rem; padding: 0.2rem 0.5rem;">GDPR Ready</span>
583
+ </div>
584
+ </div>
585
+ """, unsafe_allow_html=True)
586
+
587
+ with col3:
588
+ st.markdown("""
589
+ <div class="scenario-card">
590
+ <div style="display: flex; justify-content: space-between; align-items: start;">
591
+ <h4 style="margin: 0;">🤝 Partner Matching</h4>
592
+ <span class="coverage-full">LIVE</span>
593
+ </div>
594
+ <p style="color: #666; margin: 0.5rem 0;">Intelligent stakeholder matching for technology transfer</p>
595
+ <hr style="margin: 0.8rem 0; opacity: 0.3;">
596
+ <small>
597
+ <strong>Features:</strong><br>
598
+ • Multi-criteria Scoring<br>
599
+ • Geographic Matching<br>
600
+ • Technical Fit Analysis<br>
601
+ • Outreach Recommendations
602
+ </small>
603
+ <div style="margin-top: 0.8rem;">
604
+ <span class="vista-badge" style="font-size: 0.7rem; padding: 0.2rem 0.5rem;">VISTA Aligned</span>
605
+ </div>
606
+ </div>
607
+ """, unsafe_allow_html=True)
608
+
609
+ # Partially Covered Scenarios
610
+ st.markdown("#### In Development")
611
+ col1, col2 = st.columns(2)
612
+
613
+ with col1:
614
+ st.markdown("""
615
+ <div class="scenario-card" style="border-left: 4px solid #eab308;">
616
+ <div style="display: flex; justify-content: space-between; align-items: start;">
617
+ <h4 style="margin: 0;">📋 License Compliance Monitoring</h4>
618
+ <span class="coverage-partial">DEV</span>
619
+ </div>
620
+ <p style="color: #666; margin: 0.5rem 0;">Track license agreements and ensure compliance</p>
621
+ <hr style="margin: 0.8rem 0; opacity: 0.3;">
622
+ <small>
623
+ <strong>Planned Features:</strong><br>
624
+ • Payment Tracking & Alerts<br>
625
+ • Milestone Verification<br>
626
+ • Revenue Monitoring<br>
627
+ • Compliance Reporting
628
+ </small>
629
+ </div>
630
+ """, unsafe_allow_html=True)
631
+
632
+ with col2:
633
+ st.markdown("""
634
+ <div class="scenario-card" style="border-left: 4px solid #eab308;">
635
+ <div style="display: flex; justify-content: space-between; align-items: start;">
636
+ <h4 style="margin: 0;">🏆 Award Identification</h4>
637
+ <span class="coverage-partial">DEV</span>
638
+ </div>
639
+ <p style="color: #666; margin: 0.5rem 0;">Discover funding opportunities and awards</p>
640
+ <hr style="margin: 0.8rem 0; opacity: 0.3;">
641
+ <small>
642
+ <strong>Planned Features:</strong><br>
643
+ • Opportunity Scanning<br>
644
+ • Nomination Assistance<br>
645
+ • Deadline Tracking<br>
646
+ • Application Support
647
+ </small>
648
+ </div>
649
+ """, unsafe_allow_html=True)
650
+
651
+ st.markdown("---")
652
+
653
+ # =========================================================================
654
+ # AI QUALITY ASSURANCE
655
+ # =========================================================================
656
+ st.markdown("### 🛡️ AI Quality Assurance")
657
+
658
+ col1, col2, col3 = st.columns(3)
659
+
660
+ with col1:
661
+ st.markdown("""
662
+ <div style="background: #f0f9ff; border-radius: 8px; padding: 1rem; border: 1px solid #bae6fd;">
663
+ <h4 style="margin: 0 0 0.5rem 0;">🔍 CriticAgent Validation</h4>
664
+ <p style="font-size: 0.9rem; margin: 0;">Every AI output is validated against VISTA quality standards with dimension-based scoring.</p>
665
+ </div>
666
+ """, unsafe_allow_html=True)
667
+
668
+ with col2:
669
+ st.markdown("""
670
+ <div style="background: #f0fdf4; border-radius: 8px; padding: 1rem; border: 1px solid #bbf7d0;">
671
+ <h4 style="margin: 0 0 0.5rem 0;">📊 Confidence Scoring</h4>
672
+ <p style="font-size: 0.9rem; margin: 0;">All extractions include confidence scores with automatic abstention for low-confidence results.</p>
673
+ </div>
674
+ """, unsafe_allow_html=True)
675
+
676
+ with col3:
677
+ st.markdown("""
678
+ <div style="background: #fefce8; border-radius: 8px; padding: 1rem; border: 1px solid #fef08a;">
679
+ <h4 style="margin: 0 0 0.5rem 0;">👤 Human-in-the-Loop</h4>
680
+ <p style="font-size: 0.9rem; margin: 0;">Critical decisions require human approval with clear decision points throughout workflows.</p>
681
+ </div>
682
+ """, unsafe_allow_html=True)
683
+
684
+ st.markdown("---")
685
+
686
+ # =========================================================================
687
+ # PLATFORM CAPABILITIES
688
+ # =========================================================================
689
+ st.markdown("### 🚀 Platform Capabilities")
690
+
691
  col1, col2, col3, col4 = st.columns(4)
692
 
693
  with col1:
694
  st.markdown("""
695
  <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
696
  border-radius: 12px; padding: 1.5rem; color: white; text-align: center;">
697
+ <h3 style="margin: 0;">📄</h3>
698
+ <h4 style="margin: 0.5rem 0;">Document Intelligence</h4>
699
+ <p style="font-size: 0.85rem; opacity: 0.9;">OCR, Layout, Chunking</p>
700
  </div>
701
  """, unsafe_allow_html=True)
702
 
 
704
  st.markdown("""
705
  <div style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
706
  border-radius: 12px; padding: 1.5rem; color: white; text-align: center;">
707
+ <h3 style="margin: 0;">🔍</h3>
708
+ <h4 style="margin: 0.5rem 0;">Evidence Grounding</h4>
709
+ <p style="font-size: 0.85rem; opacity: 0.9;">Source Verification</p>
710
  </div>
711
  """, unsafe_allow_html=True)
712
 
 
714
  st.markdown("""
715
  <div style="background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
716
  border-radius: 12px; padding: 1.5rem; color: white; text-align: center;">
717
+ <h3 style="margin: 0;">💬</h3>
718
+ <h4 style="margin: 0.5rem 0;">RAG Q&A</h4>
719
+ <p style="font-size: 0.85rem; opacity: 0.9;">Grounded Citations</p>
720
  </div>
721
  """, unsafe_allow_html=True)
722
 
 
724
  st.markdown("""
725
  <div style="background: linear-gradient(135deg, #43e97b 0%, #38f9d7 100%);
726
  border-radius: 12px; padding: 1.5rem; color: white; text-align: center;">
727
+ <h3 style="margin: 0;">🤖</h3>
728
+ <h4 style="margin: 0.5rem 0;">Multi-Agent AI</h4>
729
+ <p style="font-size: 0.85rem; opacity: 0.9;">Orchestrated Workflows</p>
730
  </div>
731
  """, unsafe_allow_html=True)
732
 
733
  st.markdown("---")
734
 
735
  # Quick start
736
+ st.markdown("### 📚 Quick Start Guide")
737
 
738
+ with st.expander("Getting Started with SPARKNET", expanded=True):
739
  st.markdown("""
740
+ **For TTO Staff:**
741
+ 1. **Patent Wake-Up**: Upload a dormant patent to generate a valorization roadmap
742
+ 2. **Agreement Safety**: Upload contracts/agreements for AI-assisted risk review
743
+ 3. **Partner Matching**: Find suitable industry partners for your technologies
744
 
745
+ **For Researchers:**
746
+ 1. **Document Processing**: Process research documents with OCR and extraction
747
+ 2. **RAG Q&A**: Ask questions about indexed documents
748
+ 3. **Evidence Viewer**: Verify AI responses with source grounding
749
+
750
+ **Sample Documents**: The demo includes patent documents from major tech companies for testing.
751
  """)
752
 
753
  # Sample documents preview
754
+ st.markdown("### 📁 Sample Documents")
755
  docs = get_sample_documents()
756
 
757
  if docs:
 
1127
 
1128
  st.markdown("")
1129
 
1130
+ # Validation results with CriticAgent visibility
1131
  if validate:
1132
  st.markdown("---")
 
1133
 
1134
+ # Demo validation result from CriticAgent
1135
+ demo_validation = {
1136
+ "valid": True,
1137
+ "overall_score": 0.87,
1138
+ "dimension_scores": {
1139
+ "completeness": 0.92,
1140
+ "clarity": 0.88,
1141
+ "accuracy": 0.85,
1142
+ "actionability": 0.82,
1143
+ },
1144
+ "issues": [
1145
+ "Effective date confidence is below threshold (0.85)",
1146
+ ],
1147
+ "suggestions": [
1148
+ "Consider manual verification of the effective date",
1149
+ "Cross-reference parties with external sources",
1150
+ ],
1151
+ }
1152
+
1153
+ render_critic_validation(demo_validation)
1154
+
1155
+ # Source verification
1156
+ demo_sources = [
1157
+ {"page": 1, "snippet": "PATENT PLEDGE - This Patent Pledge is made by...", "verified": True, "confidence": 0.95},
1158
+ {"page": 1, "snippet": "The company hereby pledges not to assert...", "verified": True, "confidence": 0.91},
1159
+ {"page": 2, "snippet": "Covered Patents means all patents...", "verified": True, "confidence": 0.88},
1160
+ ]
1161
+
1162
+ render_source_verification(demo_sources, "Patent pledge document analysis")
1163
 
1164
+ # Confidence indicator for overall extraction
1165
+ render_confidence_indicator(0.89, "Extraction Confidence")
1166
 
1167
 
1168
  def render_rag_page():
 
1467
  # Footer
1468
  st.markdown("---")
1469
  st.markdown(
1470
+ """<div style='text-align: center; color: #666;'>
1471
+ 🔥 SPARKNET - AI-Powered Technology Transfer Office Automation Platform<br>
1472
+ <small>VISTA/Horizon EU Project | Built with Streamlit</small><br>
1473
+ <span class="vista-badge" style="font-size: 0.7rem; padding: 0.2rem 0.5rem; margin-top: 0.5rem;">VISTA</span>
1474
+ <span class="eu-badge" style="font-size: 0.7rem; padding: 0.2rem 0.5rem; margin-top: 0.5rem;">Horizon EU</span>
1475
+ </div>""",
1476
  unsafe_allow_html=True,
1477
  )
1478
 
demo/auth.py CHANGED
@@ -2,6 +2,48 @@
2
  Simple Password Authentication for SPARKNET
3
 
4
  Provides password-based access control for the Streamlit app.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  """
6
 
7
  import streamlit as st
 
2
  Simple Password Authentication for SPARKNET
3
 
4
  Provides password-based access control for the Streamlit app.
5
+
6
+ SECURITY NOTES:
7
+ ---------------
8
+ This module provides basic password authentication suitable for demos
9
+ and internal deployments. For production use, consider:
10
+
11
+ 1. ENHANCED AUTHENTICATION:
12
+ - Integrate with OAuth/OIDC (Google, Azure AD, Okta)
13
+ - Use Streamlit's built-in OAuth support
14
+ - Implement multi-factor authentication (MFA)
15
+
16
+ 2. SESSION MANAGEMENT:
17
+ - Configure session timeouts (default: browser session)
18
+ - Implement session invalidation on logout
19
+ - Consider IP-based session binding
20
+
21
+ 3. PASSWORD SECURITY:
22
+ - Use strong password requirements
23
+ - Implement account lockout after failed attempts
24
+ - Store passwords hashed with bcrypt (not SHA-256) for production
25
+
26
+ 4. AUDIT LOGGING:
27
+ - Log authentication attempts (success/failure)
28
+ - Track user sessions
29
+ - Monitor for suspicious activity
30
+
31
+ GDPR CONSIDERATIONS:
32
+ -------------------
33
+ - Authentication logs may contain personal data (usernames, IPs)
34
+ - Implement data retention policies for auth logs
35
+ - Support right-to-erasure for user accounts
36
+ - Document authentication processing in GDPR records
37
+
38
+ PRIVATE DEPLOYMENT:
39
+ ------------------
40
+ For enterprise deployments:
41
+ - Integrate with existing identity providers
42
+ - Use LDAP/Active Directory for user management
43
+ - Implement role-based access control (RBAC)
44
+ - Enable single sign-on (SSO)
45
+
46
+ See SECURITY.md for comprehensive security documentation.
47
  """
48
 
49
  import streamlit as st
demo/llm_providers.py CHANGED
@@ -1,18 +1,67 @@
1
  """
2
  Free LLM Providers for SPARKNET
3
 
4
- Supports multiple free-tier LLM providers:
5
- 1. HuggingFace Inference API (free, no payment required)
6
- 2. Groq (free tier - very fast)
7
- 3. Google Gemini (free tier)
8
- 4. Local/Offline mode (simulated responses)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  """
10
 
11
  import os
12
  import requests
13
- from typing import Optional, Tuple, List
14
  from dataclasses import dataclass
15
  from loguru import logger
 
 
16
 
17
  @dataclass
18
  class LLMResponse:
@@ -21,69 +70,346 @@ class LLMResponse:
21
  provider: str
22
  success: bool
23
  error: Optional[str] = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
 
26
  class HuggingFaceProvider:
27
  """
28
- HuggingFace Inference API - FREE tier available.
29
 
30
- Models that work well on free tier:
31
- - microsoft/DialoGPT-medium
32
- - google/flan-t5-base
33
- - mistralai/Mistral-7B-Instruct-v0.2 (may need Pro for heavy use)
34
- - HuggingFaceH4/zephyr-7b-beta
35
  """
36
 
37
  API_URL = "https://api-inference.huggingface.co/models/"
38
 
39
- # Free-tier friendly models
40
  MODELS = {
41
- "chat": "HuggingFaceH4/zephyr-7b-beta",
42
- "chat_small": "microsoft/DialoGPT-medium",
43
- "instruct": "google/flan-t5-large",
 
44
  "embed": "sentence-transformers/all-MiniLM-L6-v2",
45
  }
46
 
47
- def __init__(self, api_token: Optional[str] = None):
48
- """
49
- Initialize HuggingFace provider.
50
-
51
- Args:
52
- api_token: HF token (optional but recommended for higher rate limits)
53
- Get free token at: https://huggingface.co/settings/tokens
54
- """
55
- self.api_token = api_token or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_TOKEN")
56
- self.headers = {}
57
- if self.api_token:
58
- self.headers["Authorization"] = f"Bearer {self.api_token}"
59
-
60
- def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 500) -> LLMResponse:
61
- """Generate text using HuggingFace Inference API."""
62
- model = model or self.MODELS["chat"]
63
  url = f"{self.API_URL}{model}"
64
 
65
- payload = {
66
- "inputs": prompt,
67
- "parameters": {
68
- "max_new_tokens": max_tokens,
69
- "temperature": 0.7,
70
- "do_sample": True,
71
- "return_full_text": False,
72
- }
73
- }
74
 
75
  try:
76
- response = requests.post(url, headers=self.headers, json=payload, timeout=60)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
  if response.status_code == 503:
79
- # Model is loading
80
- return LLMResponse(
81
- text="Model is loading, please try again in a moment...",
82
- model=model,
83
- provider="huggingface",
84
- success=False,
85
- error="Model loading"
86
- )
87
 
88
  response.raise_for_status()
89
  result = response.json()
@@ -93,164 +419,200 @@ class HuggingFaceProvider:
93
  else:
94
  text = str(result)
95
 
96
- return LLMResponse(
97
- text=text,
98
- model=model,
99
- provider="huggingface",
100
- success=True
101
- )
102
 
103
  except Exception as e:
104
- logger.error(f"HuggingFace API error: {e}")
105
- return LLMResponse(
106
- text="",
107
- model=model,
108
- provider="huggingface",
109
- success=False,
110
- error=str(e)
111
- )
112
 
113
  def embed(self, texts: List[str], model: Optional[str] = None) -> Tuple[List[List[float]], Optional[str]]:
114
- """Generate embeddings using HuggingFace."""
115
  model = model or self.MODELS["embed"]
116
  url = f"{self.API_URL}{model}"
117
 
118
- payload = {
119
- "inputs": texts,
120
- "options": {"wait_for_model": True}
121
- }
122
 
123
  try:
124
- response = requests.post(url, headers=self.headers, json=payload, timeout=60)
 
 
 
 
 
125
  response.raise_for_status()
126
- embeddings = response.json()
127
- return embeddings, None
128
  except Exception as e:
129
- logger.error(f"HuggingFace embed error: {e}")
130
  return [], str(e)
131
 
132
 
133
- class GroqProvider:
134
  """
135
- Groq - FREE tier with very fast inference.
136
 
137
- Free tier includes:
138
- - 14,400 requests/day for smaller models
139
- - Very fast inference (fastest available)
140
-
141
- Get free API key at: https://console.groq.com/keys
142
  """
143
 
144
- API_URL = "https://api.groq.com/openai/v1/chat/completions"
145
 
146
  MODELS = {
147
- "fast": "llama-3.1-8b-instant", # Fastest
148
- "smart": "llama-3.3-70b-versatile", # Best quality
149
- "small": "gemma2-9b-it", # Good balance
 
150
  }
151
 
152
  def __init__(self, api_key: Optional[str] = None):
153
- self.api_key = api_key or os.environ.get("GROQ_API_KEY")
154
- if not self.api_key:
155
- logger.warning("No Groq API key found. Get free key at: https://console.groq.com/keys")
156
 
157
- def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 500) -> LLMResponse:
158
- """Generate text using Groq API."""
159
- if not self.api_key:
160
- return LLMResponse(
161
- text="",
162
- model="",
163
- provider="groq",
164
- success=False,
165
- error="No Groq API key configured"
166
- )
167
 
168
- model = model or self.MODELS["fast"]
 
 
 
169
 
170
- headers = {
171
- "Authorization": f"Bearer {self.api_key}",
172
- "Content-Type": "application/json"
173
- }
174
 
175
- payload = {
176
- "model": model,
177
- "messages": [{"role": "user", "content": prompt}],
178
- "max_tokens": max_tokens,
179
- "temperature": 0.7,
180
- }
181
 
182
  try:
183
- response = requests.post(self.API_URL, headers=headers, json=payload, timeout=30)
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  response.raise_for_status()
185
  result = response.json()
186
 
187
- text = result["choices"][0]["message"]["content"]
188
-
189
  return LLMResponse(
190
- text=text,
191
  model=model,
192
- provider="groq",
193
- success=True
 
194
  )
195
-
196
  except Exception as e:
197
- logger.error(f"Groq API error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  return LLMResponse(
199
- text="",
200
  model=model,
201
- provider="groq",
202
- success=False,
203
- error=str(e)
204
  )
 
 
205
 
206
 
207
  class OfflineProvider:
208
  """
209
- Offline/Demo mode - no API required.
210
 
211
- Provides simulated responses for demonstration purposes.
212
  """
213
 
214
  def __init__(self):
215
- pass
216
 
217
- def generate(self, prompt: str, context: str = "", **kwargs) -> LLMResponse:
218
- """Generate a simulated response based on context."""
 
219
 
220
- # Extract key information from context if provided
221
  if context:
222
- # Simple extractive response
223
- sentences = context.split('.')
224
- relevant = [s.strip() for s in sentences if len(s.strip()) > 20][:3]
225
-
226
- if relevant:
227
- response = f"Based on the documents, {relevant[0].lower()}."
228
- if len(relevant) > 1:
229
- response += f" Additionally, {relevant[1].lower()}."
230
  else:
231
- response = "Based on the available documents, I found relevant information but cannot generate a detailed response in offline mode."
232
  else:
233
- response = "I'm running in offline demo mode. To get AI-powered responses, please configure a free LLM provider (HuggingFace or Groq)."
 
234
 
235
- return LLMResponse(
236
- text=response,
237
- model="offline",
238
- provider="offline",
239
- success=True
240
- )
241
 
242
  def embed(self, texts: List[str]) -> Tuple[List[List[float]], Optional[str]]:
243
- """Generate simple bag-of-words style embeddings for demo."""
244
  import hashlib
245
-
246
  embeddings = []
247
  for text in texts:
248
- # Create deterministic pseudo-embeddings based on text hash
249
  hash_bytes = hashlib.sha256(text.encode()).digest()
250
- # Convert to 384-dim vector (same as MiniLM)
251
- embedding = [((b % 200) - 100) / 100.0 for b in hash_bytes * 12][:384]
252
  embeddings.append(embedding)
253
-
254
  return embeddings, None
255
 
256
 
@@ -258,74 +620,104 @@ class UnifiedLLMProvider:
258
  """
259
  Unified interface for all LLM providers.
260
 
261
- Automatically selects the best available provider.
 
262
  """
263
 
264
  def __init__(self):
265
- self.providers = {}
266
- self.active_provider = None
267
- self.active_embed_provider = None
268
-
269
- # Try to initialize providers in order of preference
270
  self._init_providers()
271
 
272
  def _init_providers(self):
273
- """Initialize available providers."""
274
-
275
- # Check for Groq (fastest, generous free tier)
276
- groq_key = os.environ.get("GROQ_API_KEY")
277
- if groq_key:
278
- self.providers["groq"] = GroqProvider(groq_key)
279
- self.active_provider = "groq"
280
- logger.info("Using Groq provider (free tier)")
281
-
282
- # Check for HuggingFace (always available, even without token)
283
- hf_token = os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HF_TOKEN")
284
- self.providers["huggingface"] = HuggingFaceProvider(hf_token)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  if not self.active_provider:
286
- self.active_provider = "huggingface"
287
- logger.info("Using HuggingFace provider")
288
 
289
- # HuggingFace for embeddings (always free)
290
  self.active_embed_provider = "huggingface"
291
 
292
- # Offline fallback
293
- self.providers["offline"] = OfflineProvider()
 
294
 
295
- logger.info(f"LLM Provider: {self.active_provider}, Embed Provider: {self.active_embed_provider}")
296
-
297
- def generate(self, prompt: str, **kwargs) -> LLMResponse:
298
- """Generate text using the best available provider."""
299
- provider = self.providers.get(self.active_provider)
300
-
301
- if provider:
302
- response = provider.generate(prompt, **kwargs)
303
  if response.success:
304
  return response
 
 
 
 
 
 
 
 
305
 
306
- # Fallback to offline
307
  return self.providers["offline"].generate(prompt, **kwargs)
308
 
309
  def embed(self, texts: List[str]) -> Tuple[List[List[float]], Optional[str]]:
310
- """Generate embeddings using the best available provider."""
311
- if self.active_embed_provider == "huggingface":
312
- embeddings, error = self.providers["huggingface"].embed(texts)
313
- if not error:
314
- return embeddings, None
 
 
315
 
316
- # Fallback to offline embeddings
317
  return self.providers["offline"].embed(texts)
318
 
319
- def get_status(self) -> dict:
320
  """Get status of all providers."""
321
- return {
322
  "active_llm": self.active_provider,
 
323
  "active_embed": self.active_embed_provider,
324
- "available_providers": list(self.providers.keys()),
325
- "groq_configured": "groq" in self.providers and self.providers["groq"].api_key is not None,
326
- "huggingface_configured": self.providers["huggingface"].api_token is not None,
327
  }
328
 
 
 
 
 
 
 
 
 
 
 
 
 
329
 
330
  # Global instance
331
  _llm_provider: Optional[UnifiedLLMProvider] = None
@@ -337,3 +729,39 @@ def get_llm_provider() -> UnifiedLLMProvider:
337
  if _llm_provider is None:
338
  _llm_provider = UnifiedLLMProvider()
339
  return _llm_provider
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
  Free LLM Providers for SPARKNET
3
 
4
+ Supports multiple FREE-tier LLM providers:
5
+ 1. Groq - Very fast, generous free tier (14,400 req/day)
6
+ 2. Google Gemini - 15 req/min free
7
+ 3. OpenRouter - Access to many free models
8
+ 4. GitHub Models - Free GPT-4o, Llama access
9
+ 5. HuggingFace Inference API - Thousands of free models
10
+ 6. Together AI - $25 free credits
11
+ 7. Mistral AI - Free experiment plan
12
+ 8. Offline mode - No API required
13
+
14
+ SECURITY & PRIVACY CONSIDERATIONS
15
+ ==================================
16
+
17
+ GDPR COMPLIANCE:
18
+ - Cloud LLM providers may process data outside the EU
19
+ - For GDPR-sensitive workloads, use:
20
+ 1. Offline mode with local Ollama
21
+ 2. EU-hosted providers (when available)
22
+ 3. Data anonymization before API calls
23
+ - Consider data processing agreements with LLM providers
24
+ - Implement data minimization - only send necessary context
25
+
26
+ DATA ISOLATION OPTIONS:
27
+ 1. FULLY LOCAL (Maximum Privacy):
28
+ - Use Ollama for 100% on-premise inference
29
+ - No data transmitted to external services
30
+ - Configure: set no cloud API keys, system uses offline mode
31
+
32
+ 2. HYBRID (Balanced):
33
+ - Use local Ollama for sensitive documents
34
+ - Use cloud LLMs for general queries
35
+ - Implement document classification for routing
36
+
37
+ 3. CLOUD-ONLY (Convenience):
38
+ - All inference via cloud providers
39
+ - Suitable for non-sensitive/public data
40
+ - Review provider privacy policies
41
+
42
+ PRIVATE DEPLOYMENT NOTES:
43
+ - For enterprise deployments, configure Ollama on internal network
44
+ - Use VPN/private endpoints for database connections
45
+ - Enable audit logging for all LLM interactions
46
+ - Implement rate limiting and access controls
47
+
48
+ STREAMLIT CLOUD DEPLOYMENT:
49
+ - Store API keys in Streamlit secrets (secrets.toml)
50
+ - Never commit secrets to version control
51
+ - Use environment variables as fallback
52
+ - Enable session-based authentication
53
+
54
+ Author: SPARKNET Team
55
+ Project: VISTA/Horizon EU
56
  """
57
 
58
  import os
59
  import requests
60
+ from typing import Optional, Tuple, List, Dict, Any
61
  from dataclasses import dataclass
62
  from loguru import logger
63
+ import streamlit as st
64
+
65
 
66
  @dataclass
67
  class LLMResponse:
 
70
  provider: str
71
  success: bool
72
  error: Optional[str] = None
73
+ usage: Optional[Dict[str, int]] = None
74
+
75
+
76
+ def get_secret(key: str, default: str = None) -> Optional[str]:
77
+ """Get secret from Streamlit secrets or environment."""
78
+ # Try Streamlit secrets first
79
+ try:
80
+ if hasattr(st, 'secrets') and key in st.secrets:
81
+ return st.secrets[key]
82
+ except:
83
+ pass
84
+ # Fall back to environment
85
+ return os.environ.get(key, default)
86
+
87
+
88
+ class GroqProvider:
89
+ """
90
+ Groq - FREE tier with very fast inference.
91
+
92
+ Free tier: 14,400 requests/day, 300+ tokens/sec
93
+ Get free key: https://console.groq.com/keys
94
+ """
95
+
96
+ API_URL = "https://api.groq.com/openai/v1/chat/completions"
97
+
98
+ MODELS = {
99
+ "llama-3.3-70b": "llama-3.3-70b-versatile",
100
+ "llama-3.1-8b": "llama-3.1-8b-instant",
101
+ "mixtral": "mixtral-8x7b-32768",
102
+ "gemma2": "gemma2-9b-it",
103
+ }
104
+
105
+ def __init__(self, api_key: Optional[str] = None):
106
+ self.api_key = api_key or get_secret("GROQ_API_KEY")
107
+ self.name = "Groq"
108
+
109
+ @property
110
+ def is_configured(self) -> bool:
111
+ return bool(self.api_key)
112
+
113
+ def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024,
114
+ system_prompt: str = None) -> LLMResponse:
115
+ if not self.api_key:
116
+ return LLMResponse("", "", self.name, False, "No Groq API key")
117
+
118
+ model = model or self.MODELS["llama-3.1-8b"]
119
+
120
+ messages = []
121
+ if system_prompt:
122
+ messages.append({"role": "system", "content": system_prompt})
123
+ messages.append({"role": "user", "content": prompt})
124
+
125
+ try:
126
+ response = requests.post(
127
+ self.API_URL,
128
+ headers={
129
+ "Authorization": f"Bearer {self.api_key}",
130
+ "Content-Type": "application/json"
131
+ },
132
+ json={
133
+ "model": model,
134
+ "messages": messages,
135
+ "max_tokens": max_tokens,
136
+ "temperature": 0.7,
137
+ },
138
+ timeout=30
139
+ )
140
+ response.raise_for_status()
141
+ result = response.json()
142
+
143
+ return LLMResponse(
144
+ text=result["choices"][0]["message"]["content"],
145
+ model=model,
146
+ provider=self.name,
147
+ success=True,
148
+ usage=result.get("usage")
149
+ )
150
+ except Exception as e:
151
+ return LLMResponse("", model, self.name, False, str(e))
152
+
153
+
154
+ class GoogleGeminiProvider:
155
+ """
156
+ Google AI Studio (Gemini) - FREE tier.
157
+
158
+ Free tier: ~15 requests/min, Gemini 2.0 Flash & 1.5 Pro
159
+ Get free key: https://aistudio.google.com/apikey
160
+ """
161
+
162
+ API_URL = "https://generativelanguage.googleapis.com/v1beta/models"
163
+
164
+ MODELS = {
165
+ "gemini-2.0-flash": "gemini-2.0-flash-exp",
166
+ "gemini-1.5-flash": "gemini-1.5-flash",
167
+ "gemini-1.5-pro": "gemini-1.5-pro",
168
+ }
169
+
170
+ def __init__(self, api_key: Optional[str] = None):
171
+ self.api_key = api_key or get_secret("GOOGLE_API_KEY") or get_secret("GEMINI_API_KEY")
172
+ self.name = "Google Gemini"
173
+
174
+ @property
175
+ def is_configured(self) -> bool:
176
+ return bool(self.api_key)
177
+
178
+ def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024,
179
+ system_prompt: str = None) -> LLMResponse:
180
+ if not self.api_key:
181
+ return LLMResponse("", "", self.name, False, "No Google API key")
182
+
183
+ model = model or self.MODELS["gemini-1.5-flash"]
184
+
185
+ # Build content
186
+ contents = []
187
+ if system_prompt:
188
+ contents.append({"role": "user", "parts": [{"text": system_prompt}]})
189
+ contents.append({"role": "model", "parts": [{"text": "Understood. I will follow these instructions."}]})
190
+ contents.append({"role": "user", "parts": [{"text": prompt}]})
191
+
192
+ try:
193
+ url = f"{self.API_URL}/{model}:generateContent?key={self.api_key}"
194
+ response = requests.post(
195
+ url,
196
+ json={
197
+ "contents": contents,
198
+ "generationConfig": {
199
+ "maxOutputTokens": max_tokens,
200
+ "temperature": 0.7,
201
+ }
202
+ },
203
+ timeout=60
204
+ )
205
+ response.raise_for_status()
206
+ result = response.json()
207
+
208
+ text = result["candidates"][0]["content"]["parts"][0]["text"]
209
+
210
+ return LLMResponse(
211
+ text=text,
212
+ model=model,
213
+ provider=self.name,
214
+ success=True
215
+ )
216
+ except Exception as e:
217
+ return LLMResponse("", model, self.name, False, str(e))
218
+
219
+
220
+ class OpenRouterProvider:
221
+ """
222
+ OpenRouter - Access to many FREE models with single API key.
223
+
224
+ Free models include: Llama, Mistral, Gemma, and more
225
+ Get free key: https://openrouter.ai/keys
226
+ """
227
+
228
+ API_URL = "https://openrouter.ai/api/v1/chat/completions"
229
+
230
+ # Free models on OpenRouter
231
+ MODELS = {
232
+ "llama-3.1-8b": "meta-llama/llama-3.1-8b-instruct:free",
233
+ "gemma-2-9b": "google/gemma-2-9b-it:free",
234
+ "mistral-7b": "mistralai/mistral-7b-instruct:free",
235
+ "phi-3-mini": "microsoft/phi-3-mini-128k-instruct:free",
236
+ "qwen-2-7b": "qwen/qwen-2-7b-instruct:free",
237
+ }
238
+
239
+ def __init__(self, api_key: Optional[str] = None):
240
+ self.api_key = api_key or get_secret("OPENROUTER_API_KEY")
241
+ self.name = "OpenRouter"
242
+
243
+ @property
244
+ def is_configured(self) -> bool:
245
+ return bool(self.api_key)
246
+
247
+ def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024,
248
+ system_prompt: str = None) -> LLMResponse:
249
+ if not self.api_key:
250
+ return LLMResponse("", "", self.name, False, "No OpenRouter API key")
251
+
252
+ model = model or self.MODELS["llama-3.1-8b"]
253
+
254
+ messages = []
255
+ if system_prompt:
256
+ messages.append({"role": "system", "content": system_prompt})
257
+ messages.append({"role": "user", "content": prompt})
258
+
259
+ try:
260
+ response = requests.post(
261
+ self.API_URL,
262
+ headers={
263
+ "Authorization": f"Bearer {self.api_key}",
264
+ "Content-Type": "application/json",
265
+ "HTTP-Referer": "https://sparknet.streamlit.app",
266
+ "X-Title": "SPARKNET"
267
+ },
268
+ json={
269
+ "model": model,
270
+ "messages": messages,
271
+ "max_tokens": max_tokens,
272
+ },
273
+ timeout=60
274
+ )
275
+ response.raise_for_status()
276
+ result = response.json()
277
+
278
+ return LLMResponse(
279
+ text=result["choices"][0]["message"]["content"],
280
+ model=model,
281
+ provider=self.name,
282
+ success=True,
283
+ usage=result.get("usage")
284
+ )
285
+ except Exception as e:
286
+ return LLMResponse("", model, self.name, False, str(e))
287
+
288
+
289
+ class GitHubModelsProvider:
290
+ """
291
+ GitHub Models - FREE access to top-tier models.
292
+
293
+ Free models: GPT-4o, Llama 3.1, Mistral, and more
294
+ Get token: https://github.com/settings/tokens (with 'models' scope)
295
+ """
296
+
297
+ API_URL = "https://models.inference.ai.azure.com/chat/completions"
298
+
299
+ MODELS = {
300
+ "gpt-4o": "gpt-4o",
301
+ "gpt-4o-mini": "gpt-4o-mini",
302
+ "llama-3.1-70b": "Meta-Llama-3.1-70B-Instruct",
303
+ "llama-3.1-8b": "Meta-Llama-3.1-8B-Instruct",
304
+ "mistral-large": "Mistral-large",
305
+ }
306
+
307
+ def __init__(self, api_key: Optional[str] = None):
308
+ self.api_key = api_key or get_secret("GITHUB_TOKEN") or get_secret("GITHUB_MODELS_TOKEN")
309
+ self.name = "GitHub Models"
310
+
311
+ @property
312
+ def is_configured(self) -> bool:
313
+ return bool(self.api_key)
314
+
315
+ def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024,
316
+ system_prompt: str = None) -> LLMResponse:
317
+ if not self.api_key:
318
+ return LLMResponse("", "", self.name, False, "No GitHub token")
319
+
320
+ model = model or self.MODELS["gpt-4o-mini"]
321
+
322
+ messages = []
323
+ if system_prompt:
324
+ messages.append({"role": "system", "content": system_prompt})
325
+ messages.append({"role": "user", "content": prompt})
326
+
327
+ try:
328
+ response = requests.post(
329
+ self.API_URL,
330
+ headers={
331
+ "Authorization": f"Bearer {self.api_key}",
332
+ "Content-Type": "application/json"
333
+ },
334
+ json={
335
+ "model": model,
336
+ "messages": messages,
337
+ "max_tokens": max_tokens,
338
+ },
339
+ timeout=60
340
+ )
341
+ response.raise_for_status()
342
+ result = response.json()
343
+
344
+ return LLMResponse(
345
+ text=result["choices"][0]["message"]["content"],
346
+ model=model,
347
+ provider=self.name,
348
+ success=True,
349
+ usage=result.get("usage")
350
+ )
351
+ except Exception as e:
352
+ return LLMResponse("", model, self.name, False, str(e))
353
 
354
 
355
  class HuggingFaceProvider:
356
  """
357
+ HuggingFace Inference API - FREE access to thousands of models.
358
 
359
+ Get free token: https://huggingface.co/settings/tokens
 
 
 
 
360
  """
361
 
362
  API_URL = "https://api-inference.huggingface.co/models/"
363
 
 
364
  MODELS = {
365
+ "zephyr-7b": "HuggingFaceH4/zephyr-7b-beta",
366
+ "mistral-7b": "mistralai/Mistral-7B-Instruct-v0.2",
367
+ "llama-2-7b": "meta-llama/Llama-2-7b-chat-hf",
368
+ "flan-t5": "google/flan-t5-large",
369
  "embed": "sentence-transformers/all-MiniLM-L6-v2",
370
  }
371
 
372
+ def __init__(self, api_key: Optional[str] = None):
373
+ self.api_key = api_key or get_secret("HF_TOKEN") or get_secret("HUGGINGFACE_TOKEN")
374
+ self.name = "HuggingFace"
375
+
376
+ @property
377
+ def is_configured(self) -> bool:
378
+ return bool(self.api_key)
379
+
380
+ def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 500,
381
+ system_prompt: str = None) -> LLMResponse:
382
+ model = model or self.MODELS["zephyr-7b"]
 
 
 
 
 
383
  url = f"{self.API_URL}{model}"
384
 
385
+ # Format prompt with system instruction
386
+ full_prompt = prompt
387
+ if system_prompt:
388
+ full_prompt = f"{system_prompt}\n\nUser: {prompt}\nAssistant:"
389
+
390
+ headers = {"Content-Type": "application/json"}
391
+ if self.api_key:
392
+ headers["Authorization"] = f"Bearer {self.api_key}"
 
393
 
394
  try:
395
+ response = requests.post(
396
+ url,
397
+ headers=headers,
398
+ json={
399
+ "inputs": full_prompt,
400
+ "parameters": {
401
+ "max_new_tokens": max_tokens,
402
+ "temperature": 0.7,
403
+ "do_sample": True,
404
+ "return_full_text": False,
405
+ },
406
+ "options": {"wait_for_model": True}
407
+ },
408
+ timeout=120
409
+ )
410
 
411
  if response.status_code == 503:
412
+ return LLMResponse("", model, self.name, False, "Model is loading, try again")
 
 
 
 
 
 
 
413
 
414
  response.raise_for_status()
415
  result = response.json()
 
419
  else:
420
  text = str(result)
421
 
422
+ return LLMResponse(text=text, model=model, provider=self.name, success=True)
 
 
 
 
 
423
 
424
  except Exception as e:
425
+ return LLMResponse("", model, self.name, False, str(e))
 
 
 
 
 
 
 
426
 
427
  def embed(self, texts: List[str], model: Optional[str] = None) -> Tuple[List[List[float]], Optional[str]]:
428
+ """Generate embeddings."""
429
  model = model or self.MODELS["embed"]
430
  url = f"{self.API_URL}{model}"
431
 
432
+ headers = {"Content-Type": "application/json"}
433
+ if self.api_key:
434
+ headers["Authorization"] = f"Bearer {self.api_key}"
 
435
 
436
  try:
437
+ response = requests.post(
438
+ url,
439
+ headers=headers,
440
+ json={"inputs": texts, "options": {"wait_for_model": True}},
441
+ timeout=60
442
+ )
443
  response.raise_for_status()
444
+ return response.json(), None
 
445
  except Exception as e:
 
446
  return [], str(e)
447
 
448
 
449
+ class TogetherAIProvider:
450
  """
451
+ Together AI - $25 FREE credits.
452
 
453
+ Access to Llama, Mistral, and many other models
454
+ Get free credits: https://www.together.ai/
 
 
 
455
  """
456
 
457
+ API_URL = "https://api.together.xyz/v1/chat/completions"
458
 
459
  MODELS = {
460
+ "llama-3.1-8b": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
461
+ "llama-3.1-70b": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
462
+ "mistral-7b": "mistralai/Mistral-7B-Instruct-v0.3",
463
+ "qwen-2-72b": "Qwen/Qwen2-72B-Instruct",
464
  }
465
 
466
  def __init__(self, api_key: Optional[str] = None):
467
+ self.api_key = api_key or get_secret("TOGETHER_API_KEY")
468
+ self.name = "Together AI"
 
469
 
470
+ @property
471
+ def is_configured(self) -> bool:
472
+ return bool(self.api_key)
 
 
 
 
 
 
 
473
 
474
+ def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024,
475
+ system_prompt: str = None) -> LLMResponse:
476
+ if not self.api_key:
477
+ return LLMResponse("", "", self.name, False, "No Together AI API key")
478
 
479
+ model = model or self.MODELS["llama-3.1-8b"]
 
 
 
480
 
481
+ messages = []
482
+ if system_prompt:
483
+ messages.append({"role": "system", "content": system_prompt})
484
+ messages.append({"role": "user", "content": prompt})
 
 
485
 
486
  try:
487
+ response = requests.post(
488
+ self.API_URL,
489
+ headers={
490
+ "Authorization": f"Bearer {self.api_key}",
491
+ "Content-Type": "application/json"
492
+ },
493
+ json={
494
+ "model": model,
495
+ "messages": messages,
496
+ "max_tokens": max_tokens,
497
+ "temperature": 0.7,
498
+ },
499
+ timeout=60
500
+ )
501
  response.raise_for_status()
502
  result = response.json()
503
 
 
 
504
  return LLMResponse(
505
+ text=result["choices"][0]["message"]["content"],
506
  model=model,
507
+ provider=self.name,
508
+ success=True,
509
+ usage=result.get("usage")
510
  )
 
511
  except Exception as e:
512
+ return LLMResponse("", model, self.name, False, str(e))
513
+
514
+
515
+ class MistralAIProvider:
516
+ """
517
+ Mistral AI - FREE "Experiment" plan.
518
+
519
+ Get free access: https://console.mistral.ai/
520
+ """
521
+
522
+ API_URL = "https://api.mistral.ai/v1/chat/completions"
523
+
524
+ MODELS = {
525
+ "mistral-small": "mistral-small-latest",
526
+ "mistral-medium": "mistral-medium-latest",
527
+ "mistral-large": "mistral-large-latest",
528
+ "codestral": "codestral-latest",
529
+ }
530
+
531
+ def __init__(self, api_key: Optional[str] = None):
532
+ self.api_key = api_key or get_secret("MISTRAL_API_KEY")
533
+ self.name = "Mistral AI"
534
+
535
+ @property
536
+ def is_configured(self) -> bool:
537
+ return bool(self.api_key)
538
+
539
+ def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024,
540
+ system_prompt: str = None) -> LLMResponse:
541
+ if not self.api_key:
542
+ return LLMResponse("", "", self.name, False, "No Mistral API key")
543
+
544
+ model = model or self.MODELS["mistral-small"]
545
+
546
+ messages = []
547
+ if system_prompt:
548
+ messages.append({"role": "system", "content": system_prompt})
549
+ messages.append({"role": "user", "content": prompt})
550
+
551
+ try:
552
+ response = requests.post(
553
+ self.API_URL,
554
+ headers={
555
+ "Authorization": f"Bearer {self.api_key}",
556
+ "Content-Type": "application/json"
557
+ },
558
+ json={
559
+ "model": model,
560
+ "messages": messages,
561
+ "max_tokens": max_tokens,
562
+ },
563
+ timeout=60
564
+ )
565
+ response.raise_for_status()
566
+ result = response.json()
567
+
568
  return LLMResponse(
569
+ text=result["choices"][0]["message"]["content"],
570
  model=model,
571
+ provider=self.name,
572
+ success=True,
573
+ usage=result.get("usage")
574
  )
575
+ except Exception as e:
576
+ return LLMResponse("", model, self.name, False, str(e))
577
 
578
 
579
  class OfflineProvider:
580
  """
581
+ Offline/Demo mode - No API required.
582
 
583
+ Provides extractive responses from context for demonstration.
584
  """
585
 
586
  def __init__(self):
587
+ self.name = "Offline"
588
 
589
+ @property
590
+ def is_configured(self) -> bool:
591
+ return True
592
 
593
+ def generate(self, prompt: str, context: str = "", **kwargs) -> LLMResponse:
594
  if context:
595
+ sentences = [s.strip() for s in context.split('.') if len(s.strip()) > 20][:3]
596
+ if sentences:
597
+ response = f"Based on the documents: {sentences[0]}."
598
+ if len(sentences) > 1:
599
+ response += f" Additionally, {sentences[1].lower()}."
 
 
 
600
  else:
601
+ response = "I found relevant information but cannot generate a detailed response in offline mode."
602
  else:
603
+ response = ("I'm running in offline demo mode. Configure a free LLM provider "
604
+ "(Groq, Gemini, OpenRouter, etc.) for AI-powered responses.")
605
 
606
+ return LLMResponse(text=response, model="offline", provider=self.name, success=True)
 
 
 
 
 
607
 
608
  def embed(self, texts: List[str]) -> Tuple[List[List[float]], Optional[str]]:
609
+ """Generate simple hash-based embeddings for demo."""
610
  import hashlib
 
611
  embeddings = []
612
  for text in texts:
 
613
  hash_bytes = hashlib.sha256(text.encode()).digest()
614
+ embedding = [((b % 200) - 100) / 100.0 for b in (hash_bytes * 12)][:384]
 
615
  embeddings.append(embedding)
 
616
  return embeddings, None
617
 
618
 
 
620
  """
621
  Unified interface for all LLM providers.
622
 
623
+ Automatically selects the best available provider based on configured API keys.
624
+ Priority: Groq > Gemini > OpenRouter > GitHub > Together > Mistral > HuggingFace > Offline
625
  """
626
 
627
  def __init__(self):
628
+ self.providers: Dict[str, Any] = {}
629
+ self.active_provider: Optional[str] = None
630
+ self.active_embed_provider: Optional[str] = None
 
 
631
  self._init_providers()
632
 
633
  def _init_providers(self):
634
+ """Initialize all available providers."""
635
+
636
+ # Initialize providers in priority order
637
+ provider_classes = [
638
+ ("groq", GroqProvider),
639
+ ("gemini", GoogleGeminiProvider),
640
+ ("openrouter", OpenRouterProvider),
641
+ ("github", GitHubModelsProvider),
642
+ ("together", TogetherAIProvider),
643
+ ("mistral", MistralAIProvider),
644
+ ("huggingface", HuggingFaceProvider),
645
+ ("offline", OfflineProvider),
646
+ ]
647
+
648
+ for name, cls in provider_classes:
649
+ try:
650
+ provider = cls()
651
+ self.providers[name] = provider
652
+
653
+ # Set active provider (first configured one)
654
+ if provider.is_configured and not self.active_provider and name != "offline":
655
+ self.active_provider = name
656
+ logger.info(f"Active LLM provider: {provider.name}")
657
+
658
+ except Exception as e:
659
+ logger.warning(f"Failed to init {name}: {e}")
660
+
661
+ # Fallback to offline if nothing configured
662
  if not self.active_provider:
663
+ self.active_provider = "offline"
664
+ logger.warning("No LLM API configured, using offline mode")
665
 
666
+ # HuggingFace for embeddings (works without token too)
667
  self.active_embed_provider = "huggingface"
668
 
669
+ def generate(self, prompt: str, provider: str = None, **kwargs) -> LLMResponse:
670
+ """Generate text using specified or best available provider."""
671
+ provider_name = provider or self.active_provider
672
 
673
+ if provider_name and provider_name in self.providers:
674
+ response = self.providers[provider_name].generate(prompt, **kwargs)
 
 
 
 
 
 
675
  if response.success:
676
  return response
677
+ logger.warning(f"{provider_name} failed: {response.error}")
678
+
679
+ # Fallback chain
680
+ for name in ["groq", "gemini", "openrouter", "huggingface", "offline"]:
681
+ if name in self.providers and name != provider_name:
682
+ response = self.providers[name].generate(prompt, **kwargs)
683
+ if response.success:
684
+ return response
685
 
 
686
  return self.providers["offline"].generate(prompt, **kwargs)
687
 
688
  def embed(self, texts: List[str]) -> Tuple[List[List[float]], Optional[str]]:
689
+ """Generate embeddings."""
690
+ if self.active_embed_provider and self.active_embed_provider in self.providers:
691
+ provider = self.providers[self.active_embed_provider]
692
+ if hasattr(provider, 'embed'):
693
+ result, error = provider.embed(texts)
694
+ if not error:
695
+ return result, None
696
 
697
+ # Fallback to offline
698
  return self.providers["offline"].embed(texts)
699
 
700
+ def get_status(self) -> Dict[str, Any]:
701
  """Get status of all providers."""
702
+ status = {
703
  "active_llm": self.active_provider,
704
+ "active_llm_name": self.providers[self.active_provider].name if self.active_provider else "None",
705
  "active_embed": self.active_embed_provider,
706
+ "providers": {}
 
 
707
  }
708
 
709
+ for name, provider in self.providers.items():
710
+ status["providers"][name] = {
711
+ "name": provider.name,
712
+ "configured": provider.is_configured,
713
+ }
714
+
715
+ return status
716
+
717
+ def list_available(self) -> List[str]:
718
+ """List all configured providers."""
719
+ return [name for name, p in self.providers.items() if p.is_configured and name != "offline"]
720
+
721
 
722
  # Global instance
723
  _llm_provider: Optional[UnifiedLLMProvider] = None
 
729
  if _llm_provider is None:
730
  _llm_provider = UnifiedLLMProvider()
731
  return _llm_provider
732
+
733
+
734
+ def generate_response(prompt: str, context: str = "", system_prompt: str = None) -> Tuple[str, Optional[str]]:
735
+ """
736
+ Convenience function to generate a response.
737
+
738
+ Args:
739
+ prompt: User prompt
740
+ context: Optional context from retrieved documents
741
+ system_prompt: Optional system instruction
742
+
743
+ Returns:
744
+ Tuple of (response_text, error_message)
745
+ """
746
+ provider = get_llm_provider()
747
+
748
+ # Build full prompt with context
749
+ if context:
750
+ full_prompt = f"""Context from documents:
751
+ {context}
752
+
753
+ Question: {prompt}
754
+
755
+ Please answer based on the context provided. If the answer is not in the context, say so."""
756
+ else:
757
+ full_prompt = prompt
758
+
759
+ if not system_prompt:
760
+ system_prompt = "You are a helpful document analysis assistant. Provide accurate, concise answers based on the provided context."
761
+
762
+ response = provider.generate(full_prompt, system_prompt=system_prompt)
763
+
764
+ if response.success:
765
+ return response.text, None
766
+ else:
767
+ return "", response.error
src/agents/scenario3/__init__.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SPARKNET Scenario 3: License Compliance Monitoring
3
+
4
+ This module provides AI-powered license agreement monitoring and compliance tracking
5
+ for Technology Transfer Offices (TTOs).
6
+
7
+ FEATURES (Planned):
8
+ - Payment Tracking: Monitor royalty payments and fee schedules
9
+ - Milestone Verification: Track contractual milestones and deliverables
10
+ - Revenue Alerts: Automated alerts for payment anomalies and thresholds
11
+ - Compliance Reporting: Generate compliance reports for stakeholders
12
+
13
+ GDPR/PRIVACY CONSIDERATIONS:
14
+ - All license data should be stored with appropriate access controls
15
+ - Payment information requires encryption at rest and in transit
16
+ - Consider data retention policies for completed agreements
17
+ - Audit logging recommended for compliance tracking actions
18
+
19
+ VISTA/HORIZON EU ALIGNMENT:
20
+ - Supports European research valorization objectives
21
+ - Designed for university TTO workflows
22
+ - Integrates with existing agreement safety checks
23
+
24
+ DEPLOYMENT OPTIONS:
25
+ - Cloud: Use with encrypted secrets via Streamlit Cloud
26
+ - Private: On-premise deployment with local database
27
+ - Hybrid: Cloud UI with on-premise data storage
28
+
29
+ Author: SPARKNET Team
30
+ Project: VISTA/Horizon EU
31
+ Status: Placeholder - In Development
32
+ """
33
+
34
+ from .license_compliance_agent import LicenseComplianceAgent
35
+ from .payment_tracker import PaymentTracker
36
+ from .milestone_verifier import MilestoneVerifier
37
+
38
+ __all__ = [
39
+ "LicenseComplianceAgent",
40
+ "PaymentTracker",
41
+ "MilestoneVerifier",
42
+ ]
src/agents/scenario3/license_compliance_agent.py ADDED
@@ -0,0 +1,321 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ License Compliance Agent for SPARKNET
3
+
4
+ AI-powered license agreement monitoring and compliance verification.
5
+ Part of Scenario 3: License Compliance Monitoring.
6
+
7
+ PRIVACY & SECURITY NOTES:
8
+ -------------------------
9
+ This agent handles sensitive financial and contractual data. For production deployments:
10
+
11
+ 1. DATA ISOLATION:
12
+ - License data should be stored in isolated database schemas
13
+ - Implement row-level security for multi-tenant deployments
14
+ - Consider geographic data residency requirements
15
+
16
+ 2. GDPR COMPLIANCE:
17
+ - Implement right-to-erasure for terminated agreements
18
+ - Maintain data processing records
19
+ - Enable data portability exports
20
+
21
+ 3. AUDIT REQUIREMENTS:
22
+ - All compliance checks should be logged
23
+ - Maintain immutable audit trail
24
+ - Enable compliance report generation
25
+
26
+ 4. PRIVATE DEPLOYMENT:
27
+ - Use Ollama for local LLM inference (no data leaves network)
28
+ - Configure local vector store for document embeddings
29
+ - Implement on-premise authentication
30
+
31
+ Author: SPARKNET Team
32
+ Project: VISTA/Horizon EU
33
+ Status: Placeholder - In Development
34
+ """
35
+
36
+ from typing import Optional, Dict, Any, List
37
+ from dataclasses import dataclass
38
+ from datetime import datetime, date
39
+ from enum import Enum
40
+ from loguru import logger
41
+
42
+ # Note: These imports would be implemented when the module is fully developed
43
+ # from ..base_agent import BaseAgent, Task
44
+ # from ...llm.langchain_ollama_client import LangChainOllamaClient
45
+
46
+
47
+ class ComplianceStatus(str, Enum):
48
+ """License compliance status."""
49
+ COMPLIANT = "compliant"
50
+ NON_COMPLIANT = "non_compliant"
51
+ AT_RISK = "at_risk"
52
+ PENDING_REVIEW = "pending_review"
53
+ EXPIRED = "expired"
54
+
55
+
56
+ class PaymentStatus(str, Enum):
57
+ """Payment tracking status."""
58
+ PAID = "paid"
59
+ PENDING = "pending"
60
+ OVERDUE = "overdue"
61
+ DISPUTED = "disputed"
62
+ WAIVED = "waived"
63
+
64
+
65
+ @dataclass
66
+ class LicenseAgreement:
67
+ """
68
+ License agreement data model.
69
+
70
+ GDPR Note: Contains potentially sensitive business information.
71
+ Implement appropriate access controls and retention policies.
72
+ """
73
+ license_id: str
74
+ agreement_name: str
75
+ licensee_name: str
76
+ licensor_name: str
77
+ technology_name: str
78
+ effective_date: date
79
+ expiration_date: Optional[date]
80
+ status: ComplianceStatus
81
+ total_value: Optional[float]
82
+ currency: str = "EUR"
83
+ payment_schedule: Optional[List[Dict[str, Any]]] = None
84
+ milestones: Optional[List[Dict[str, Any]]] = None
85
+ metadata: Optional[Dict[str, Any]] = None
86
+
87
+
88
+ @dataclass
89
+ class PaymentRecord:
90
+ """
91
+ Payment tracking record.
92
+
93
+ GDPR Note: Financial data - ensure encryption and access logging.
94
+ """
95
+ payment_id: str
96
+ license_id: str
97
+ amount: float
98
+ currency: str
99
+ due_date: date
100
+ paid_date: Optional[date]
101
+ status: PaymentStatus
102
+ payment_type: str # royalty, upfront, milestone, etc.
103
+ notes: Optional[str] = None
104
+
105
+
106
+ @dataclass
107
+ class ComplianceAlert:
108
+ """
109
+ Compliance monitoring alert.
110
+
111
+ Used for notifying TTO staff of compliance issues.
112
+ """
113
+ alert_id: str
114
+ license_id: str
115
+ alert_type: str # payment_overdue, milestone_missed, expiring_soon, etc.
116
+ severity: str # low, medium, high, critical
117
+ message: str
118
+ created_at: datetime
119
+ resolved: bool = False
120
+ resolved_at: Optional[datetime] = None
121
+
122
+
123
+ class LicenseComplianceAgent:
124
+ """
125
+ Agent for monitoring license agreement compliance.
126
+
127
+ This agent tracks:
128
+ - Payment schedules and overdue payments
129
+ - Milestone completion and deadlines
130
+ - Agreement expiration dates
131
+ - Compliance violations and alerts
132
+
133
+ DEPLOYMENT CONSIDERATIONS:
134
+ --------------------------
135
+ For private/on-premise deployment:
136
+ 1. Configure local Ollama instance for LLM inference
137
+ 2. Use PostgreSQL with encryption for data storage
138
+ 3. Implement SSO integration for authentication
139
+ 4. Enable audit logging for all operations
140
+
141
+ For cloud deployment (Streamlit Cloud):
142
+ 1. Use secrets management for API keys
143
+ 2. Configure secure database connection
144
+ 3. Enable HTTPS for all communications
145
+ 4. Implement rate limiting for API calls
146
+ """
147
+
148
+ def __init__(
149
+ self,
150
+ llm_client: Optional[Any] = None, # LangChainOllamaClient when implemented
151
+ database_url: Optional[str] = None,
152
+ ):
153
+ """
154
+ Initialize License Compliance Agent.
155
+
156
+ Args:
157
+ llm_client: LangChain LLM client for AI analysis
158
+ database_url: Database connection URL (use secrets management)
159
+ """
160
+ self.llm_client = llm_client
161
+ self.database_url = database_url
162
+ self.name = "LicenseComplianceAgent"
163
+ self.description = "License agreement monitoring and compliance tracking"
164
+
165
+ logger.info(f"Initialized {self.name} (placeholder)")
166
+
167
+ async def check_payment_compliance(
168
+ self,
169
+ license_id: str,
170
+ ) -> Dict[str, Any]:
171
+ """
172
+ Check payment compliance for a license agreement.
173
+
174
+ Args:
175
+ license_id: License agreement identifier
176
+
177
+ Returns:
178
+ Compliance status with payment details
179
+
180
+ TODO: Implement actual payment tracking logic
181
+ """
182
+ logger.info(f"Checking payment compliance for license: {license_id}")
183
+
184
+ # Placeholder response
185
+ return {
186
+ "license_id": license_id,
187
+ "status": ComplianceStatus.PENDING_REVIEW.value,
188
+ "message": "Payment compliance check not yet implemented",
189
+ "payments_due": [],
190
+ "payments_overdue": [],
191
+ "next_payment_date": None,
192
+ "total_outstanding": 0.0,
193
+ }
194
+
195
+ async def verify_milestone(
196
+ self,
197
+ license_id: str,
198
+ milestone_id: str,
199
+ ) -> Dict[str, Any]:
200
+ """
201
+ Verify milestone completion for a license agreement.
202
+
203
+ Args:
204
+ license_id: License agreement identifier
205
+ milestone_id: Milestone identifier
206
+
207
+ Returns:
208
+ Milestone verification result
209
+
210
+ TODO: Implement actual milestone verification logic
211
+ """
212
+ logger.info(f"Verifying milestone {milestone_id} for license: {license_id}")
213
+
214
+ # Placeholder response
215
+ return {
216
+ "license_id": license_id,
217
+ "milestone_id": milestone_id,
218
+ "status": "pending_verification",
219
+ "message": "Milestone verification not yet implemented",
220
+ "evidence_required": True,
221
+ "verification_deadline": None,
222
+ }
223
+
224
+ async def generate_compliance_report(
225
+ self,
226
+ license_ids: Optional[List[str]] = None,
227
+ date_range: Optional[tuple] = None,
228
+ ) -> Dict[str, Any]:
229
+ """
230
+ Generate compliance report for license agreements.
231
+
232
+ Args:
233
+ license_ids: Optional list of specific licenses to report on
234
+ date_range: Optional (start_date, end_date) tuple
235
+
236
+ Returns:
237
+ Compliance report with summary and details
238
+
239
+ TODO: Implement actual report generation logic
240
+ """
241
+ logger.info("Generating compliance report")
242
+
243
+ # Placeholder response
244
+ return {
245
+ "report_id": f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
246
+ "generated_at": datetime.now().isoformat(),
247
+ "status": "placeholder",
248
+ "message": "Compliance report generation not yet implemented",
249
+ "summary": {
250
+ "total_licenses": 0,
251
+ "compliant": 0,
252
+ "non_compliant": 0,
253
+ "at_risk": 0,
254
+ },
255
+ "details": [],
256
+ }
257
+
258
+ async def create_alert(
259
+ self,
260
+ license_id: str,
261
+ alert_type: str,
262
+ severity: str,
263
+ message: str,
264
+ ) -> ComplianceAlert:
265
+ """
266
+ Create a compliance alert for TTO staff notification.
267
+
268
+ Args:
269
+ license_id: License agreement identifier
270
+ alert_type: Type of alert (payment_overdue, milestone_missed, etc.)
271
+ severity: Alert severity (low, medium, high, critical)
272
+ message: Alert message
273
+
274
+ Returns:
275
+ Created compliance alert
276
+
277
+ TODO: Implement actual alert creation and notification logic
278
+ """
279
+ logger.info(f"Creating {severity} alert for license: {license_id}")
280
+
281
+ alert = ComplianceAlert(
282
+ alert_id=f"alert_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
283
+ license_id=license_id,
284
+ alert_type=alert_type,
285
+ severity=severity,
286
+ message=message,
287
+ created_at=datetime.now(),
288
+ )
289
+
290
+ # TODO: Send notification (email, Slack, etc.)
291
+
292
+ return alert
293
+
294
+ def get_vista_quality_criteria(self) -> Dict[str, Any]:
295
+ """
296
+ Get VISTA quality criteria for compliance monitoring.
297
+
298
+ Returns quality thresholds aligned with VISTA project objectives.
299
+ """
300
+ return {
301
+ "payment_tracking": {
302
+ "weight": 0.30,
303
+ "threshold": 0.95,
304
+ "description": "Payment records must be accurate and complete",
305
+ },
306
+ "milestone_verification": {
307
+ "weight": 0.25,
308
+ "threshold": 0.90,
309
+ "description": "Milestone verification must include evidence",
310
+ },
311
+ "alert_timeliness": {
312
+ "weight": 0.25,
313
+ "threshold": 0.95,
314
+ "description": "Alerts must be generated within 24 hours of trigger",
315
+ },
316
+ "report_accuracy": {
317
+ "weight": 0.20,
318
+ "threshold": 0.98,
319
+ "description": "Reports must accurately reflect current state",
320
+ },
321
+ }
src/agents/scenario3/milestone_verifier.py ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Milestone Verifier for License Compliance Monitoring
3
+
4
+ Tracks and verifies contractual milestones for license agreements.
5
+
6
+ FEATURES (Planned):
7
+ - Milestone definition and tracking
8
+ - Evidence collection for verification
9
+ - Deadline monitoring and alerts
10
+ - Integration with CriticAgent for validation
11
+
12
+ VISTA/HORIZON EU ALIGNMENT:
13
+ - Supports milestone-based payment structures common in EU research
14
+ - Integrates with project management workflows
15
+ - Provides audit trail for milestone verification
16
+
17
+ Author: SPARKNET Team
18
+ Project: VISTA/Horizon EU
19
+ Status: Placeholder - In Development
20
+ """
21
+
22
+ from typing import Optional, Dict, Any, List
23
+ from dataclasses import dataclass, field
24
+ from datetime import datetime, date
25
+ from enum import Enum
26
+ from loguru import logger
27
+
28
+
29
+ class MilestoneStatus(str, Enum):
30
+ """Milestone tracking status."""
31
+ PENDING = "pending"
32
+ IN_PROGRESS = "in_progress"
33
+ SUBMITTED = "submitted" # Awaiting verification
34
+ VERIFIED = "verified"
35
+ REJECTED = "rejected"
36
+ WAIVED = "waived"
37
+ OVERDUE = "overdue"
38
+
39
+
40
+ class MilestoneType(str, Enum):
41
+ """Type of milestone."""
42
+ TECHNICAL = "technical" # Technical deliverable
43
+ COMMERCIAL = "commercial" # Commercial target
44
+ REGULATORY = "regulatory" # Regulatory approval
45
+ FINANCIAL = "financial" # Financial target
46
+ REPORTING = "reporting" # Report submission
47
+ OTHER = "other"
48
+
49
+
50
+ @dataclass
51
+ class Milestone:
52
+ """
53
+ License agreement milestone definition.
54
+
55
+ Represents a contractual milestone that must be achieved
56
+ for license compliance.
57
+ """
58
+ milestone_id: str
59
+ license_id: str
60
+ title: str
61
+ description: str
62
+ milestone_type: MilestoneType
63
+ due_date: date
64
+ status: MilestoneStatus = MilestoneStatus.PENDING
65
+ payment_trigger: bool = False # If true, triggers milestone payment
66
+ payment_amount: Optional[float] = None
67
+ currency: str = "EUR"
68
+ evidence_required: List[str] = field(default_factory=list)
69
+ evidence_submitted: List[Dict[str, Any]] = field(default_factory=list)
70
+ verified_by: Optional[str] = None
71
+ verified_at: Optional[datetime] = None
72
+ notes: Optional[str] = None
73
+ metadata: Dict[str, Any] = field(default_factory=dict)
74
+
75
+
76
+ @dataclass
77
+ class VerificationResult:
78
+ """
79
+ Result of milestone verification.
80
+
81
+ Includes CriticAgent validation scores when available.
82
+ """
83
+ verification_id: str
84
+ milestone_id: str
85
+ verified: bool
86
+ confidence_score: float # 0.0 to 1.0
87
+ verification_notes: str
88
+ evidence_review: List[Dict[str, Any]]
89
+ critic_validation: Optional[Dict[str, Any]] = None # CriticAgent output
90
+ human_review_required: bool = False
91
+ verified_at: datetime = field(default_factory=datetime.now)
92
+
93
+
94
+ class MilestoneVerifier:
95
+ """
96
+ Verifies milestone completion for license agreements.
97
+
98
+ This component:
99
+ - Tracks milestone deadlines
100
+ - Collects and reviews evidence
101
+ - Integrates with CriticAgent for AI validation
102
+ - Implements human-in-the-loop for critical decisions
103
+
104
+ HUMAN-IN-THE-LOOP CONSIDERATIONS:
105
+ ----------------------------------
106
+ Milestone verification often requires human judgment.
107
+ This component implements:
108
+
109
+ 1. AUTOMATED VERIFICATION:
110
+ - Document completeness checks
111
+ - Format and structure validation
112
+ - Cross-reference with requirements
113
+
114
+ 2. AI-ASSISTED REVIEW:
115
+ - CriticAgent evaluates evidence quality
116
+ - Confidence scoring for verification
117
+ - Anomaly detection in submissions
118
+
119
+ 3. HUMAN DECISION POINTS:
120
+ - Low-confidence verifications flagged for review
121
+ - High-value milestones require approval
122
+ - Rejection decisions need human confirmation
123
+
124
+ 4. AUDIT TRAIL:
125
+ - All decisions logged with reasoning
126
+ - Evidence preserved for compliance
127
+ - Verification history maintained
128
+ """
129
+
130
+ def __init__(
131
+ self,
132
+ llm_client: Optional[Any] = None,
133
+ critic_agent: Optional[Any] = None, # CriticAgent for validation
134
+ database_url: Optional[str] = None,
135
+ ):
136
+ """
137
+ Initialize Milestone Verifier.
138
+
139
+ Args:
140
+ llm_client: LangChain LLM client for AI analysis
141
+ critic_agent: CriticAgent for validation
142
+ database_url: Database connection URL
143
+ """
144
+ self.llm_client = llm_client
145
+ self.critic_agent = critic_agent
146
+ self.database_url = database_url
147
+ self.name = "MilestoneVerifier"
148
+
149
+ # Threshold for requiring human review
150
+ self.human_review_threshold = 0.7
151
+
152
+ logger.info(f"Initialized {self.name} (placeholder)")
153
+
154
+ async def create_milestone(
155
+ self,
156
+ license_id: str,
157
+ title: str,
158
+ description: str,
159
+ milestone_type: MilestoneType,
160
+ due_date: date,
161
+ evidence_required: List[str],
162
+ payment_trigger: bool = False,
163
+ payment_amount: Optional[float] = None,
164
+ ) -> Milestone:
165
+ """
166
+ Create a new milestone for a license agreement.
167
+
168
+ Args:
169
+ license_id: License agreement identifier
170
+ title: Milestone title
171
+ description: Detailed description
172
+ milestone_type: Type of milestone
173
+ due_date: Deadline for completion
174
+ evidence_required: List of required evidence types
175
+ payment_trigger: Whether completion triggers payment
176
+ payment_amount: Payment amount if payment_trigger is True
177
+
178
+ Returns:
179
+ Created milestone
180
+
181
+ TODO: Implement actual milestone creation logic
182
+ """
183
+ logger.info(f"Creating milestone '{title}' for license: {license_id}")
184
+
185
+ return Milestone(
186
+ milestone_id=f"ms_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
187
+ license_id=license_id,
188
+ title=title,
189
+ description=description,
190
+ milestone_type=milestone_type,
191
+ due_date=due_date,
192
+ evidence_required=evidence_required,
193
+ payment_trigger=payment_trigger,
194
+ payment_amount=payment_amount,
195
+ )
196
+
197
+ async def submit_evidence(
198
+ self,
199
+ milestone_id: str,
200
+ evidence_type: str,
201
+ evidence_data: Dict[str, Any],
202
+ submitted_by: str,
203
+ ) -> Dict[str, Any]:
204
+ """
205
+ Submit evidence for milestone verification.
206
+
207
+ Args:
208
+ milestone_id: Milestone identifier
209
+ evidence_type: Type of evidence being submitted
210
+ evidence_data: Evidence data (documents, metrics, etc.)
211
+ submitted_by: User/organization submitting
212
+
213
+ Returns:
214
+ Submission confirmation
215
+
216
+ TODO: Implement actual evidence submission logic
217
+ """
218
+ logger.info(f"Submitting {evidence_type} evidence for milestone: {milestone_id}")
219
+
220
+ # Placeholder response
221
+ return {
222
+ "submission_id": f"sub_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
223
+ "milestone_id": milestone_id,
224
+ "evidence_type": evidence_type,
225
+ "submitted_at": datetime.now().isoformat(),
226
+ "submitted_by": submitted_by,
227
+ "status": "received",
228
+ "message": "Evidence submission not yet fully implemented",
229
+ }
230
+
231
+ async def verify_milestone(
232
+ self,
233
+ milestone_id: str,
234
+ auto_approve: bool = False,
235
+ ) -> VerificationResult:
236
+ """
237
+ Verify milestone completion using AI and human review.
238
+
239
+ This method:
240
+ 1. Checks all required evidence is submitted
241
+ 2. Uses CriticAgent to validate evidence quality
242
+ 3. Calculates confidence score
243
+ 4. Determines if human review is needed
244
+
245
+ Args:
246
+ milestone_id: Milestone to verify
247
+ auto_approve: Whether to auto-approve high-confidence verifications
248
+
249
+ Returns:
250
+ Verification result with confidence score
251
+
252
+ TODO: Implement actual verification logic with CriticAgent
253
+ """
254
+ logger.info(f"Verifying milestone: {milestone_id}")
255
+
256
+ # Placeholder verification result
257
+ result = VerificationResult(
258
+ verification_id=f"ver_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
259
+ milestone_id=milestone_id,
260
+ verified=False,
261
+ confidence_score=0.0,
262
+ verification_notes="Verification not yet implemented",
263
+ evidence_review=[],
264
+ human_review_required=True,
265
+ )
266
+
267
+ return result
268
+
269
+ async def get_overdue_milestones(
270
+ self,
271
+ as_of_date: Optional[date] = None,
272
+ ) -> List[Milestone]:
273
+ """
274
+ Get list of overdue milestones.
275
+
276
+ Args:
277
+ as_of_date: Reference date (defaults to today)
278
+
279
+ Returns:
280
+ List of overdue milestones
281
+
282
+ TODO: Implement actual overdue milestone tracking
283
+ """
284
+ as_of_date = as_of_date or date.today()
285
+ logger.info(f"Checking overdue milestones as of {as_of_date}")
286
+
287
+ # Placeholder response
288
+ return []
289
+
290
+ async def get_upcoming_milestones(
291
+ self,
292
+ days_ahead: int = 30,
293
+ ) -> List[Milestone]:
294
+ """
295
+ Get milestones due in the near future.
296
+
297
+ Args:
298
+ days_ahead: Number of days to look ahead
299
+
300
+ Returns:
301
+ List of upcoming milestones
302
+
303
+ TODO: Implement actual upcoming milestone tracking
304
+ """
305
+ logger.info(f"Getting milestones due in next {days_ahead} days")
306
+
307
+ # Placeholder response
308
+ return []
309
+
310
+ def requires_human_review(
311
+ self,
312
+ confidence_score: float,
313
+ milestone: Milestone,
314
+ ) -> bool:
315
+ """
316
+ Determine if milestone verification requires human review.
317
+
318
+ Human review is required when:
319
+ - Confidence score is below threshold
320
+ - Milestone triggers large payment
321
+ - Milestone type is regulatory
322
+ - Evidence is incomplete or ambiguous
323
+
324
+ Args:
325
+ confidence_score: AI verification confidence
326
+ milestone: Milestone being verified
327
+
328
+ Returns:
329
+ True if human review required
330
+ """
331
+ # Low confidence requires review
332
+ if confidence_score < self.human_review_threshold:
333
+ return True
334
+
335
+ # Large payments require review
336
+ if milestone.payment_trigger and milestone.payment_amount:
337
+ if milestone.payment_amount > 50000: # EUR threshold
338
+ return True
339
+
340
+ # Regulatory milestones always require review
341
+ if milestone.milestone_type == MilestoneType.REGULATORY:
342
+ return True
343
+
344
+ return False
src/agents/scenario3/payment_tracker.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Payment Tracker for License Compliance Monitoring
3
+
4
+ Tracks royalty payments, fees, and financial obligations for license agreements.
5
+
6
+ SECURITY CONSIDERATIONS:
7
+ ------------------------
8
+ This module handles sensitive financial data. Ensure:
9
+ 1. All payment data is encrypted at rest (AES-256 recommended)
10
+ 2. Access is logged for audit compliance
11
+ 3. PCI-DSS guidelines are followed if processing card data
12
+ 4. Data retention policies are implemented
13
+
14
+ GDPR COMPLIANCE:
15
+ ---------------
16
+ - Payment records may contain personal data of signatories
17
+ - Implement data minimization - only store necessary fields
18
+ - Support data portability and right-to-erasure requests
19
+ - Maintain records of processing activities
20
+
21
+ Author: SPARKNET Team
22
+ Project: VISTA/Horizon EU
23
+ Status: Placeholder - In Development
24
+ """
25
+
26
+ from typing import Optional, Dict, Any, List
27
+ from dataclasses import dataclass, field
28
+ from datetime import datetime, date, timedelta
29
+ from enum import Enum
30
+ from loguru import logger
31
+
32
+
33
+ class PaymentFrequency(str, Enum):
34
+ """Payment schedule frequency."""
35
+ ONE_TIME = "one_time"
36
+ MONTHLY = "monthly"
37
+ QUARTERLY = "quarterly"
38
+ SEMI_ANNUAL = "semi_annual"
39
+ ANNUAL = "annual"
40
+ MILESTONE_BASED = "milestone_based"
41
+
42
+
43
+ class RevenueType(str, Enum):
44
+ """Type of revenue/payment."""
45
+ UPFRONT_FEE = "upfront_fee"
46
+ ROYALTY = "royalty"
47
+ MILESTONE_PAYMENT = "milestone_payment"
48
+ MAINTENANCE_FEE = "maintenance_fee"
49
+ SUBLICENSE_FEE = "sublicense_fee"
50
+ MINIMUM_PAYMENT = "minimum_payment"
51
+
52
+
53
+ @dataclass
54
+ class PaymentSchedule:
55
+ """
56
+ Payment schedule configuration for a license agreement.
57
+
58
+ GDPR Note: May reference personal data (contact info).
59
+ Implement appropriate access controls.
60
+ """
61
+ schedule_id: str
62
+ license_id: str
63
+ frequency: PaymentFrequency
64
+ revenue_type: RevenueType
65
+ base_amount: Optional[float] = None
66
+ percentage_rate: Optional[float] = None # For royalties
67
+ currency: str = "EUR"
68
+ start_date: Optional[date] = None
69
+ end_date: Optional[date] = None
70
+ payment_terms_days: int = 30 # Days until payment is due
71
+ late_fee_percentage: float = 0.0
72
+ minimum_payment: Optional[float] = None
73
+ metadata: Dict[str, Any] = field(default_factory=dict)
74
+
75
+
76
+ @dataclass
77
+ class RevenueAlert:
78
+ """
79
+ Revenue monitoring alert configuration.
80
+
81
+ Used to notify TTO staff of payment anomalies.
82
+ """
83
+ alert_id: str
84
+ license_id: str
85
+ alert_type: str # threshold_exceeded, payment_overdue, anomaly_detected
86
+ threshold_value: Optional[float] = None
87
+ comparison_operator: str = "greater_than" # greater_than, less_than, equals
88
+ notification_channels: List[str] = field(default_factory=list) # email, slack, sms
89
+ enabled: bool = True
90
+
91
+
92
+ class PaymentTracker:
93
+ """
94
+ Tracks payments and revenue for license agreements.
95
+
96
+ This component:
97
+ - Records incoming payments
98
+ - Tracks overdue payments
99
+ - Generates revenue alerts
100
+ - Produces financial reports
101
+
102
+ PRIVATE DEPLOYMENT NOTES:
103
+ -------------------------
104
+ For on-premise deployment:
105
+ 1. Use local PostgreSQL with SSL/TLS
106
+ 2. Implement database connection pooling
107
+ 3. Configure backup and disaster recovery
108
+ 4. Set up monitoring for payment processing
109
+
110
+ For enhanced security:
111
+ 1. Use hardware security modules (HSM) for encryption keys
112
+ 2. Implement IP allowlisting for database access
113
+ 3. Enable query auditing
114
+ 4. Configure intrusion detection
115
+ """
116
+
117
+ def __init__(
118
+ self,
119
+ database_url: Optional[str] = None,
120
+ notification_service: Optional[Any] = None,
121
+ ):
122
+ """
123
+ Initialize Payment Tracker.
124
+
125
+ Args:
126
+ database_url: Secure database connection URL
127
+ notification_service: Service for sending alerts
128
+ """
129
+ self.database_url = database_url
130
+ self.notification_service = notification_service
131
+ self.name = "PaymentTracker"
132
+
133
+ logger.info(f"Initialized {self.name} (placeholder)")
134
+
135
+ async def record_payment(
136
+ self,
137
+ license_id: str,
138
+ amount: float,
139
+ currency: str,
140
+ payment_date: date,
141
+ revenue_type: RevenueType,
142
+ reference: Optional[str] = None,
143
+ ) -> Dict[str, Any]:
144
+ """
145
+ Record a payment received for a license agreement.
146
+
147
+ Args:
148
+ license_id: License agreement identifier
149
+ amount: Payment amount
150
+ currency: Currency code (EUR, USD, etc.)
151
+ payment_date: Date payment was received
152
+ revenue_type: Type of revenue
153
+ reference: Payment reference/invoice number
154
+
155
+ Returns:
156
+ Payment record confirmation
157
+
158
+ TODO: Implement actual payment recording logic
159
+ """
160
+ logger.info(f"Recording payment of {amount} {currency} for license: {license_id}")
161
+
162
+ # Placeholder response
163
+ return {
164
+ "payment_id": f"pmt_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
165
+ "license_id": license_id,
166
+ "amount": amount,
167
+ "currency": currency,
168
+ "payment_date": payment_date.isoformat(),
169
+ "revenue_type": revenue_type.value,
170
+ "reference": reference,
171
+ "status": "recorded",
172
+ "message": "Payment recording not yet fully implemented",
173
+ }
174
+
175
+ async def get_overdue_payments(
176
+ self,
177
+ as_of_date: Optional[date] = None,
178
+ days_overdue: int = 0,
179
+ ) -> List[Dict[str, Any]]:
180
+ """
181
+ Get list of overdue payments.
182
+
183
+ Args:
184
+ as_of_date: Reference date (defaults to today)
185
+ days_overdue: Minimum days overdue to include
186
+
187
+ Returns:
188
+ List of overdue payment records
189
+
190
+ TODO: Implement actual overdue payment tracking
191
+ """
192
+ as_of_date = as_of_date or date.today()
193
+ logger.info(f"Checking overdue payments as of {as_of_date}")
194
+
195
+ # Placeholder response
196
+ return []
197
+
198
+ async def calculate_revenue_summary(
199
+ self,
200
+ start_date: date,
201
+ end_date: date,
202
+ group_by: str = "month",
203
+ ) -> Dict[str, Any]:
204
+ """
205
+ Calculate revenue summary for a date range.
206
+
207
+ Args:
208
+ start_date: Start of reporting period
209
+ end_date: End of reporting period
210
+ group_by: Grouping period (day, week, month, quarter, year)
211
+
212
+ Returns:
213
+ Revenue summary with breakdowns
214
+
215
+ TODO: Implement actual revenue calculation
216
+ """
217
+ logger.info(f"Calculating revenue from {start_date} to {end_date}")
218
+
219
+ # Placeholder response
220
+ return {
221
+ "period": {
222
+ "start": start_date.isoformat(),
223
+ "end": end_date.isoformat(),
224
+ },
225
+ "total_revenue": 0.0,
226
+ "currency": "EUR",
227
+ "by_period": [],
228
+ "by_revenue_type": {},
229
+ "by_license": {},
230
+ "status": "placeholder",
231
+ "message": "Revenue calculation not yet implemented",
232
+ }
233
+
234
+ async def create_revenue_alert(
235
+ self,
236
+ license_id: str,
237
+ alert_type: str,
238
+ threshold: float,
239
+ notification_channels: List[str],
240
+ ) -> RevenueAlert:
241
+ """
242
+ Create a revenue monitoring alert.
243
+
244
+ Args:
245
+ license_id: License to monitor
246
+ alert_type: Type of alert
247
+ threshold: Threshold value
248
+ notification_channels: Where to send alerts
249
+
250
+ Returns:
251
+ Created alert configuration
252
+
253
+ TODO: Implement actual alert creation logic
254
+ """
255
+ logger.info(f"Creating revenue alert for license: {license_id}")
256
+
257
+ return RevenueAlert(
258
+ alert_id=f"ralert_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
259
+ license_id=license_id,
260
+ alert_type=alert_type,
261
+ threshold_value=threshold,
262
+ notification_channels=notification_channels,
263
+ )
264
+
265
+ async def check_revenue_thresholds(self) -> List[Dict[str, Any]]:
266
+ """
267
+ Check all revenue alerts and generate notifications.
268
+
269
+ Returns:
270
+ List of triggered alerts
271
+
272
+ TODO: Implement actual threshold checking
273
+ """
274
+ logger.info("Checking revenue thresholds")
275
+
276
+ # Placeholder response
277
+ return []
src/agents/scenario4/__init__.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SPARKNET Scenario 4: Award Identification
3
+
4
+ This module provides AI-powered funding opportunity discovery and award
5
+ nomination assistance for Technology Transfer Offices (TTOs).
6
+
7
+ FEATURES (Planned):
8
+ - Opportunity Scanning: Automated discovery of funding opportunities
9
+ - Nomination Assistance: AI-assisted award nomination preparation
10
+ - Deadline Tracking: Calendar integration for application deadlines
11
+ - Application Support: Document preparation and review
12
+
13
+ VISTA/HORIZON EU ALIGNMENT:
14
+ - Designed for European research funding landscape
15
+ - Supports Horizon Europe, ERC, and national funding programs
16
+ - Integrates with TTO workflows for commercialization grants
17
+
18
+ DEPLOYMENT OPTIONS:
19
+ - Cloud: Streamlit Cloud with API integrations
20
+ - Private: On-premise with local databases
21
+ - Hybrid: Cloud scanning with on-premise data storage
22
+
23
+ Author: SPARKNET Team
24
+ Project: VISTA/Horizon EU
25
+ Status: Placeholder - In Development
26
+ """
27
+
28
+ from .award_identification_agent import AwardIdentificationAgent
29
+ from .opportunity_scanner import OpportunityScanner
30
+ from .nomination_assistant import NominationAssistant
31
+
32
+ __all__ = [
33
+ "AwardIdentificationAgent",
34
+ "OpportunityScanner",
35
+ "NominationAssistant",
36
+ ]
src/agents/scenario4/award_identification_agent.py ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Award Identification Agent for SPARKNET
3
+
4
+ AI-powered funding opportunity discovery and award nomination assistance.
5
+ Part of Scenario 4: Award Identification.
6
+
7
+ FEATURES:
8
+ ---------
9
+ 1. OPPORTUNITY DISCOVERY:
10
+ - Scan funding databases and announcements
11
+ - Match opportunities to research capabilities
12
+ - Track application deadlines
13
+
14
+ 2. NOMINATION ASSISTANCE:
15
+ - Prepare award nomination documents
16
+ - Review and validate submissions
17
+ - Generate supporting materials
18
+
19
+ 3. APPLICATION SUPPORT:
20
+ - Document preparation workflows
21
+ - Compliance checking
22
+ - Reviewer matching
23
+
24
+ INTEGRATIONS (Planned):
25
+ -----------------------
26
+ - Horizon Europe CORDIS database
27
+ - National funding agency APIs
28
+ - ERC portal integration
29
+ - Patent databases for innovation evidence
30
+
31
+ Author: SPARKNET Team
32
+ Project: VISTA/Horizon EU
33
+ Status: Placeholder - In Development
34
+ """
35
+
36
+ from typing import Optional, Dict, Any, List
37
+ from dataclasses import dataclass, field
38
+ from datetime import datetime, date
39
+ from enum import Enum
40
+ from loguru import logger
41
+
42
+
43
+ class OpportunityType(str, Enum):
44
+ """Type of funding opportunity."""
45
+ GRANT = "grant"
46
+ AWARD = "award"
47
+ FELLOWSHIP = "fellowship"
48
+ PRIZE = "prize"
49
+ INVESTMENT = "investment"
50
+ PARTNERSHIP = "partnership"
51
+
52
+
53
+ class OpportunityStatus(str, Enum):
54
+ """Opportunity tracking status."""
55
+ IDENTIFIED = "identified"
56
+ EVALUATING = "evaluating"
57
+ PREPARING = "preparing"
58
+ SUBMITTED = "submitted"
59
+ AWARDED = "awarded"
60
+ REJECTED = "rejected"
61
+ EXPIRED = "expired"
62
+
63
+
64
+ class EligibilityStatus(str, Enum):
65
+ """Eligibility assessment status."""
66
+ ELIGIBLE = "eligible"
67
+ INELIGIBLE = "ineligible"
68
+ PARTIAL = "partial" # Some criteria met
69
+ UNKNOWN = "unknown" # Needs review
70
+
71
+
72
+ @dataclass
73
+ class FundingOpportunity:
74
+ """
75
+ Funding opportunity data model.
76
+
77
+ Represents a grant, award, or other funding opportunity
78
+ identified by the scanning system.
79
+ """
80
+ opportunity_id: str
81
+ title: str
82
+ description: str
83
+ opportunity_type: OpportunityType
84
+ funder: str
85
+ funder_type: str # government, foundation, corporate, EU, etc.
86
+ amount_min: Optional[float] = None
87
+ amount_max: Optional[float] = None
88
+ currency: str = "EUR"
89
+ deadline: Optional[date] = None
90
+ url: Optional[str] = None
91
+ eligibility_criteria: List[str] = field(default_factory=list)
92
+ keywords: List[str] = field(default_factory=list)
93
+ status: OpportunityStatus = OpportunityStatus.IDENTIFIED
94
+ match_score: Optional[float] = None # How well it matches capabilities
95
+ notes: Optional[str] = None
96
+ metadata: Dict[str, Any] = field(default_factory=dict)
97
+
98
+
99
+ @dataclass
100
+ class OpportunityMatch:
101
+ """
102
+ Match between opportunity and research/technology.
103
+
104
+ Represents alignment between a funding opportunity
105
+ and institutional capabilities.
106
+ """
107
+ match_id: str
108
+ opportunity_id: str
109
+ technology_id: Optional[str] = None
110
+ research_area: Optional[str] = None
111
+ match_score: float = 0.0 # 0.0 to 1.0
112
+ match_rationale: str = ""
113
+ eligibility_status: EligibilityStatus = EligibilityStatus.UNKNOWN
114
+ eligibility_notes: List[str] = field(default_factory=list)
115
+ recommended_action: str = ""
116
+ confidence_score: float = 0.0
117
+
118
+
119
+ @dataclass
120
+ class NominationDocument:
121
+ """
122
+ Award nomination document.
123
+
124
+ Contains structured content for award/grant applications.
125
+ """
126
+ document_id: str
127
+ opportunity_id: str
128
+ document_type: str # proposal, nomination_letter, cv, budget, etc.
129
+ title: str
130
+ content: str
131
+ version: str = "1.0"
132
+ status: str = "draft" # draft, review, final
133
+ created_at: datetime = field(default_factory=datetime.now)
134
+ updated_at: datetime = field(default_factory=datetime.now)
135
+ created_by: Optional[str] = None
136
+ reviewer_comments: List[Dict[str, Any]] = field(default_factory=list)
137
+ critic_validation: Optional[Dict[str, Any]] = None
138
+
139
+
140
+ class AwardIdentificationAgent:
141
+ """
142
+ Agent for identifying funding opportunities and assisting nominations.
143
+
144
+ This agent:
145
+ - Scans funding databases for opportunities
146
+ - Matches opportunities to research capabilities
147
+ - Assists with nomination document preparation
148
+ - Tracks application deadlines and status
149
+
150
+ HUMAN-IN-THE-LOOP WORKFLOW:
151
+ ---------------------------
152
+ Award applications require human judgment. This agent implements:
153
+
154
+ 1. AUTOMATED SCANNING:
155
+ - Regular scans of funding databases
156
+ - Keyword matching and filtering
157
+ - Initial eligibility screening
158
+
159
+ 2. AI-ASSISTED MATCHING:
160
+ - Score opportunities against capabilities
161
+ - Generate match rationale
162
+ - Identify gaps and risks
163
+
164
+ 3. HUMAN DECISION POINTS:
165
+ - Approval to pursue opportunities
166
+ - Review of application documents
167
+ - Final submission authorization
168
+
169
+ 4. QUALITY ASSURANCE:
170
+ - CriticAgent validation of documents
171
+ - Compliance checking
172
+ - Reviewer feedback integration
173
+ """
174
+
175
+ def __init__(
176
+ self,
177
+ llm_client: Optional[Any] = None,
178
+ critic_agent: Optional[Any] = None,
179
+ database_url: Optional[str] = None,
180
+ ):
181
+ """
182
+ Initialize Award Identification Agent.
183
+
184
+ Args:
185
+ llm_client: LangChain LLM client for AI analysis
186
+ critic_agent: CriticAgent for document validation
187
+ database_url: Database connection URL
188
+ """
189
+ self.llm_client = llm_client
190
+ self.critic_agent = critic_agent
191
+ self.database_url = database_url
192
+ self.name = "AwardIdentificationAgent"
193
+ self.description = "Funding opportunity discovery and nomination assistance"
194
+
195
+ logger.info(f"Initialized {self.name} (placeholder)")
196
+
197
+ async def scan_opportunities(
198
+ self,
199
+ keywords: Optional[List[str]] = None,
200
+ opportunity_types: Optional[List[OpportunityType]] = None,
201
+ min_amount: Optional[float] = None,
202
+ max_deadline_days: Optional[int] = None,
203
+ ) -> List[FundingOpportunity]:
204
+ """
205
+ Scan for funding opportunities matching criteria.
206
+
207
+ Args:
208
+ keywords: Keywords to search for
209
+ opportunity_types: Types of opportunities to find
210
+ min_amount: Minimum funding amount
211
+ max_deadline_days: Maximum days until deadline
212
+
213
+ Returns:
214
+ List of matching opportunities
215
+
216
+ TODO: Implement actual opportunity scanning
217
+ """
218
+ logger.info(f"Scanning for opportunities with keywords: {keywords}")
219
+
220
+ # Placeholder - would integrate with funding databases
221
+ return []
222
+
223
+ async def match_opportunity(
224
+ self,
225
+ opportunity_id: str,
226
+ technology_ids: Optional[List[str]] = None,
227
+ research_areas: Optional[List[str]] = None,
228
+ ) -> OpportunityMatch:
229
+ """
230
+ Evaluate match between opportunity and capabilities.
231
+
232
+ Args:
233
+ opportunity_id: Opportunity to evaluate
234
+ technology_ids: Technologies to consider
235
+ research_areas: Research areas to consider
236
+
237
+ Returns:
238
+ Match result with score and rationale
239
+
240
+ TODO: Implement actual matching logic
241
+ """
242
+ logger.info(f"Matching opportunity: {opportunity_id}")
243
+
244
+ # Placeholder response
245
+ return OpportunityMatch(
246
+ match_id=f"match_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
247
+ opportunity_id=opportunity_id,
248
+ match_score=0.0,
249
+ match_rationale="Matching not yet implemented",
250
+ eligibility_status=EligibilityStatus.UNKNOWN,
251
+ recommended_action="Review manually",
252
+ confidence_score=0.0,
253
+ )
254
+
255
+ async def check_eligibility(
256
+ self,
257
+ opportunity_id: str,
258
+ applicant_profile: Dict[str, Any],
259
+ ) -> Dict[str, Any]:
260
+ """
261
+ Check eligibility for a funding opportunity.
262
+
263
+ Args:
264
+ opportunity_id: Opportunity to check
265
+ applicant_profile: Profile of potential applicant
266
+
267
+ Returns:
268
+ Eligibility assessment with details
269
+
270
+ TODO: Implement actual eligibility checking
271
+ """
272
+ logger.info(f"Checking eligibility for opportunity: {opportunity_id}")
273
+
274
+ # Placeholder response
275
+ return {
276
+ "opportunity_id": opportunity_id,
277
+ "status": EligibilityStatus.UNKNOWN.value,
278
+ "criteria_met": [],
279
+ "criteria_not_met": [],
280
+ "criteria_unknown": [],
281
+ "recommendation": "Manual review required",
282
+ "confidence": 0.0,
283
+ }
284
+
285
+ async def prepare_nomination(
286
+ self,
287
+ opportunity_id: str,
288
+ document_type: str,
289
+ context: Dict[str, Any],
290
+ ) -> NominationDocument:
291
+ """
292
+ Prepare a nomination/application document.
293
+
294
+ Args:
295
+ opportunity_id: Target opportunity
296
+ document_type: Type of document to prepare
297
+ context: Context information for document generation
298
+
299
+ Returns:
300
+ Generated nomination document
301
+
302
+ TODO: Implement actual document preparation with LLM
303
+ """
304
+ logger.info(f"Preparing {document_type} for opportunity: {opportunity_id}")
305
+
306
+ # Placeholder response
307
+ return NominationDocument(
308
+ document_id=f"doc_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
309
+ opportunity_id=opportunity_id,
310
+ document_type=document_type,
311
+ title=f"{document_type.replace('_', ' ').title()} - Draft",
312
+ content="[Document content to be generated]",
313
+ status="draft",
314
+ )
315
+
316
+ async def validate_document(
317
+ self,
318
+ document: NominationDocument,
319
+ ) -> Dict[str, Any]:
320
+ """
321
+ Validate nomination document using CriticAgent.
322
+
323
+ Args:
324
+ document: Document to validate
325
+
326
+ Returns:
327
+ Validation result with suggestions
328
+
329
+ TODO: Implement CriticAgent integration
330
+ """
331
+ logger.info(f"Validating document: {document.document_id}")
332
+
333
+ # Placeholder response
334
+ return {
335
+ "document_id": document.document_id,
336
+ "valid": False,
337
+ "overall_score": 0.0,
338
+ "dimension_scores": {},
339
+ "issues": ["Validation not yet implemented"],
340
+ "suggestions": ["Complete document implementation"],
341
+ "human_review_required": True,
342
+ }
343
+
344
+ async def get_upcoming_deadlines(
345
+ self,
346
+ days_ahead: int = 30,
347
+ ) -> List[FundingOpportunity]:
348
+ """
349
+ Get opportunities with upcoming deadlines.
350
+
351
+ Args:
352
+ days_ahead: Number of days to look ahead
353
+
354
+ Returns:
355
+ List of opportunities with deadlines
356
+
357
+ TODO: Implement actual deadline tracking
358
+ """
359
+ logger.info(f"Getting deadlines for next {days_ahead} days")
360
+
361
+ # Placeholder response
362
+ return []
363
+
364
+ def get_vista_quality_criteria(self) -> Dict[str, Any]:
365
+ """
366
+ Get VISTA quality criteria for award identification.
367
+
368
+ Returns quality thresholds for opportunity matching and
369
+ document preparation aligned with VISTA objectives.
370
+ """
371
+ return {
372
+ "opportunity_relevance": {
373
+ "weight": 0.30,
374
+ "threshold": 0.75,
375
+ "description": "Opportunities must be relevant to research capabilities",
376
+ },
377
+ "eligibility_accuracy": {
378
+ "weight": 0.25,
379
+ "threshold": 0.90,
380
+ "description": "Eligibility assessments must be accurate",
381
+ },
382
+ "document_quality": {
383
+ "weight": 0.25,
384
+ "threshold": 0.85,
385
+ "description": "Nomination documents must meet quality standards",
386
+ },
387
+ "deadline_tracking": {
388
+ "weight": 0.20,
389
+ "threshold": 0.95,
390
+ "description": "Deadlines must be tracked accurately",
391
+ },
392
+ }
src/agents/scenario4/nomination_assistant.py ADDED
@@ -0,0 +1,371 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Nomination Assistant for Award Identification
3
+
4
+ AI-assisted preparation of award nominations and grant applications.
5
+
6
+ FEATURES (Planned):
7
+ ------------------
8
+ 1. DOCUMENT GENERATION:
9
+ - Executive summaries
10
+ - Project descriptions
11
+ - Budget justifications
12
+ - Team CVs and bios
13
+
14
+ 2. TEMPLATE MATCHING:
15
+ - Match to funder templates
16
+ - Format compliance checking
17
+ - Character/word limit validation
18
+
19
+ 3. QUALITY ASSURANCE:
20
+ - CriticAgent validation
21
+ - Reviewer simulation
22
+ - Gap identification
23
+
24
+ 4. COLLABORATION:
25
+ - Multi-author editing
26
+ - Comment and review workflows
27
+ - Version control
28
+
29
+ HUMAN-IN-THE-LOOP:
30
+ -----------------
31
+ Document preparation requires extensive human input:
32
+ - Initial content drafting
33
+ - Review and revision cycles
34
+ - Final approval before submission
35
+
36
+ This assistant accelerates the process but doesn't replace
37
+ human expertise in grant writing.
38
+
39
+ Author: SPARKNET Team
40
+ Project: VISTA/Horizon EU
41
+ Status: Placeholder - In Development
42
+ """
43
+
44
+ from typing import Optional, Dict, Any, List
45
+ from dataclasses import dataclass, field
46
+ from datetime import datetime
47
+ from enum import Enum
48
+ from loguru import logger
49
+
50
+
51
+ class DocumentTemplate(str, Enum):
52
+ """Standard document templates."""
53
+ HORIZON_PROPOSAL = "horizon_proposal"
54
+ ERC_APPLICATION = "erc_application"
55
+ NATIONAL_GRANT = "national_grant"
56
+ AWARD_NOMINATION = "award_nomination"
57
+ LETTER_OF_INTENT = "letter_of_intent"
58
+ BUDGET_TEMPLATE = "budget_template"
59
+ CV_EUROPASS = "cv_europass"
60
+ CUSTOM = "custom"
61
+
62
+
63
+ class ReviewStatus(str, Enum):
64
+ """Document review status."""
65
+ DRAFT = "draft"
66
+ INTERNAL_REVIEW = "internal_review"
67
+ REVISION_NEEDED = "revision_needed"
68
+ APPROVED = "approved"
69
+ SUBMITTED = "submitted"
70
+
71
+
72
+ @dataclass
73
+ class DocumentSection:
74
+ """
75
+ Section of a nomination document.
76
+
77
+ Represents a structured section with content and metadata.
78
+ """
79
+ section_id: str
80
+ title: str
81
+ content: str
82
+ word_limit: Optional[int] = None
83
+ current_words: int = 0
84
+ status: str = "draft"
85
+ ai_generated: bool = False
86
+ human_reviewed: bool = False
87
+ reviewer_comments: List[str] = field(default_factory=list)
88
+ suggestions: List[str] = field(default_factory=list)
89
+
90
+
91
+ @dataclass
92
+ class DocumentReview:
93
+ """
94
+ Review of a nomination document.
95
+
96
+ Contains feedback from AI and human reviewers.
97
+ """
98
+ review_id: str
99
+ document_id: str
100
+ reviewer_type: str # "ai", "human", "external"
101
+ reviewer_name: Optional[str] = None
102
+ overall_score: Optional[float] = None
103
+ section_scores: Dict[str, float] = field(default_factory=dict)
104
+ strengths: List[str] = field(default_factory=list)
105
+ weaknesses: List[str] = field(default_factory=list)
106
+ suggestions: List[str] = field(default_factory=list)
107
+ decision: str = "pending" # approve, revise, reject
108
+ created_at: datetime = field(default_factory=datetime.now)
109
+
110
+
111
+ class NominationAssistant:
112
+ """
113
+ AI assistant for preparing nominations and applications.
114
+
115
+ This component:
116
+ - Generates document sections
117
+ - Checks format compliance
118
+ - Simulates reviewer feedback
119
+ - Manages revision workflows
120
+
121
+ INTEGRATION WITH CRITICAGENT:
122
+ -----------------------------
123
+ Uses CriticAgent for:
124
+ - Document quality validation
125
+ - Format compliance checking
126
+ - Reviewer perspective simulation
127
+ - Gap and weakness identification
128
+
129
+ CONFIDENCE SCORING:
130
+ ------------------
131
+ All AI-generated content includes:
132
+ - Confidence score (0.0-1.0)
133
+ - Source references where applicable
134
+ - Suggestions for improvement
135
+ - Flag for human review
136
+
137
+ Generated content with low confidence scores
138
+ is automatically flagged for human review.
139
+ """
140
+
141
+ def __init__(
142
+ self,
143
+ llm_client: Optional[Any] = None,
144
+ critic_agent: Optional[Any] = None,
145
+ template_library: Optional[Dict[str, Any]] = None,
146
+ ):
147
+ """
148
+ Initialize Nomination Assistant.
149
+
150
+ Args:
151
+ llm_client: LangChain LLM client for content generation
152
+ critic_agent: CriticAgent for validation
153
+ template_library: Library of document templates
154
+ """
155
+ self.llm_client = llm_client
156
+ self.critic_agent = critic_agent
157
+ self.template_library = template_library or {}
158
+ self.name = "NominationAssistant"
159
+
160
+ # Threshold for requiring human review
161
+ self.confidence_threshold = 0.7
162
+
163
+ logger.info(f"Initialized {self.name} (placeholder)")
164
+
165
+ async def generate_section(
166
+ self,
167
+ document_id: str,
168
+ section_type: str,
169
+ context: Dict[str, Any],
170
+ word_limit: Optional[int] = None,
171
+ ) -> DocumentSection:
172
+ """
173
+ Generate a document section using AI.
174
+
175
+ Args:
176
+ document_id: Parent document ID
177
+ section_type: Type of section to generate
178
+ context: Context information for generation
179
+ word_limit: Optional word limit
180
+
181
+ Returns:
182
+ Generated section with confidence score
183
+
184
+ TODO: Implement actual LLM generation
185
+ """
186
+ logger.info(f"Generating {section_type} section for document: {document_id}")
187
+
188
+ # Placeholder response
189
+ return DocumentSection(
190
+ section_id=f"sec_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
191
+ title=section_type.replace("_", " ").title(),
192
+ content="[AI-generated content placeholder]",
193
+ word_limit=word_limit,
194
+ current_words=0,
195
+ status="draft",
196
+ ai_generated=True,
197
+ human_reviewed=False,
198
+ suggestions=["Complete implementation with actual LLM generation"],
199
+ )
200
+
201
+ async def check_format_compliance(
202
+ self,
203
+ document_id: str,
204
+ template: DocumentTemplate,
205
+ ) -> Dict[str, Any]:
206
+ """
207
+ Check document compliance with template requirements.
208
+
209
+ Args:
210
+ document_id: Document to check
211
+ template: Template to check against
212
+
213
+ Returns:
214
+ Compliance report with issues
215
+
216
+ TODO: Implement actual compliance checking
217
+ """
218
+ logger.info(f"Checking format compliance for document: {document_id}")
219
+
220
+ # Placeholder response
221
+ return {
222
+ "document_id": document_id,
223
+ "template": template.value,
224
+ "compliant": False,
225
+ "issues": [
226
+ {
227
+ "type": "placeholder",
228
+ "message": "Compliance checking not yet implemented",
229
+ "severity": "info",
230
+ }
231
+ ],
232
+ "word_counts": {},
233
+ "missing_sections": [],
234
+ }
235
+
236
+ async def simulate_review(
237
+ self,
238
+ document_id: str,
239
+ reviewer_perspective: str = "general",
240
+ ) -> DocumentReview:
241
+ """
242
+ Simulate reviewer feedback using AI.
243
+
244
+ Generates feedback from the perspective of a grant
245
+ reviewer to identify potential weaknesses.
246
+
247
+ Args:
248
+ document_id: Document to review
249
+ reviewer_perspective: Type of reviewer to simulate
250
+
251
+ Returns:
252
+ Simulated review with scores and feedback
253
+
254
+ TODO: Implement actual review simulation
255
+ """
256
+ logger.info(f"Simulating {reviewer_perspective} review for document: {document_id}")
257
+
258
+ # Placeholder response
259
+ return DocumentReview(
260
+ review_id=f"rev_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
261
+ document_id=document_id,
262
+ reviewer_type="ai",
263
+ reviewer_name=f"AI ({reviewer_perspective})",
264
+ overall_score=0.0,
265
+ strengths=["Review simulation not yet implemented"],
266
+ weaknesses=["Cannot assess without implementation"],
267
+ suggestions=["Complete the AI review simulation feature"],
268
+ decision="pending",
269
+ )
270
+
271
+ async def suggest_improvements(
272
+ self,
273
+ section: DocumentSection,
274
+ ) -> List[str]:
275
+ """
276
+ Suggest improvements for a document section.
277
+
278
+ Uses CriticAgent to analyze section and generate
279
+ actionable improvement suggestions.
280
+
281
+ Args:
282
+ section: Section to analyze
283
+
284
+ Returns:
285
+ List of improvement suggestions
286
+
287
+ TODO: Implement CriticAgent integration
288
+ """
289
+ logger.info(f"Generating improvement suggestions for section: {section.section_id}")
290
+
291
+ # Placeholder response
292
+ return [
293
+ "Improvement suggestions not yet implemented",
294
+ "Will integrate with CriticAgent for validation",
295
+ ]
296
+
297
+ async def validate_with_critic(
298
+ self,
299
+ document_id: str,
300
+ ) -> Dict[str, Any]:
301
+ """
302
+ Validate document using CriticAgent.
303
+
304
+ Performs comprehensive validation including:
305
+ - Content quality assessment
306
+ - Format compliance
307
+ - Logical consistency
308
+ - Citation verification
309
+
310
+ Args:
311
+ document_id: Document to validate
312
+
313
+ Returns:
314
+ Validation result with scores and issues
315
+
316
+ TODO: Implement CriticAgent integration
317
+ """
318
+ logger.info(f"Validating document with CriticAgent: {document_id}")
319
+
320
+ # Placeholder response
321
+ return {
322
+ "document_id": document_id,
323
+ "valid": False,
324
+ "overall_score": 0.0,
325
+ "dimension_scores": {
326
+ "completeness": 0.0,
327
+ "clarity": 0.0,
328
+ "accuracy": 0.0,
329
+ "compliance": 0.0,
330
+ },
331
+ "issues": ["CriticAgent validation not yet implemented"],
332
+ "suggestions": ["Complete CriticAgent integration"],
333
+ "human_review_required": True,
334
+ "confidence": 0.0,
335
+ }
336
+
337
+ def requires_human_review(
338
+ self,
339
+ confidence_score: float,
340
+ section_type: str,
341
+ ) -> bool:
342
+ """
343
+ Determine if content requires human review.
344
+
345
+ Human review is required when:
346
+ - Confidence is below threshold
347
+ - Section is critical (executive summary, budget)
348
+ - Content makes claims about capabilities
349
+
350
+ Args:
351
+ confidence_score: AI confidence score
352
+ section_type: Type of section
353
+
354
+ Returns:
355
+ True if human review required
356
+ """
357
+ # Low confidence always requires review
358
+ if confidence_score < self.confidence_threshold:
359
+ return True
360
+
361
+ # Critical sections always require review
362
+ critical_sections = [
363
+ "executive_summary",
364
+ "budget",
365
+ "team_qualifications",
366
+ "methodology",
367
+ ]
368
+ if section_type.lower() in critical_sections:
369
+ return True
370
+
371
+ return False
src/agents/scenario4/opportunity_scanner.py ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Opportunity Scanner for Award Identification
3
+
4
+ Scans funding databases and announcement sources for opportunities.
5
+
6
+ PLANNED DATA SOURCES:
7
+ ---------------------
8
+ - Horizon Europe / CORDIS
9
+ - European Research Council (ERC)
10
+ - National funding agencies (DFG, ANR, UKRI, etc.)
11
+ - Foundation databases
12
+ - Corporate R&D partnerships
13
+ - Innovation prizes and awards
14
+
15
+ SCANNING STRATEGY:
16
+ -----------------
17
+ 1. KEYWORD MATCHING:
18
+ - Technology-specific terms
19
+ - Research area keywords
20
+ - Institution eligibility terms
21
+
22
+ 2. SEMANTIC SEARCH:
23
+ - Vector similarity to capability descriptions
24
+ - Cross-lingual matching for EU opportunities
25
+
26
+ 3. FILTERING:
27
+ - Deadline filtering (exclude expired)
28
+ - Amount thresholds
29
+ - Eligibility pre-screening
30
+
31
+ Author: SPARKNET Team
32
+ Project: VISTA/Horizon EU
33
+ Status: Placeholder - In Development
34
+ """
35
+
36
+ from typing import Optional, Dict, Any, List
37
+ from dataclasses import dataclass, field
38
+ from datetime import datetime, date
39
+ from enum import Enum
40
+ from loguru import logger
41
+
42
+
43
+ class DataSource(str, Enum):
44
+ """Funding data sources."""
45
+ CORDIS = "cordis" # Horizon Europe
46
+ ERC = "erc" # European Research Council
47
+ NATIONAL = "national" # National agencies
48
+ FOUNDATION = "foundation"
49
+ CORPORATE = "corporate"
50
+ CUSTOM = "custom"
51
+
52
+
53
+ @dataclass
54
+ class ScanConfiguration:
55
+ """
56
+ Configuration for opportunity scanning.
57
+
58
+ Defines what and how to scan for opportunities.
59
+ """
60
+ config_id: str
61
+ name: str
62
+ sources: List[DataSource]
63
+ keywords: List[str]
64
+ research_areas: List[str]
65
+ min_amount: Optional[float] = None
66
+ max_amount: Optional[float] = None
67
+ currency: str = "EUR"
68
+ exclude_expired: bool = True
69
+ include_rolling: bool = True # Include opportunities with no fixed deadline
70
+ scan_frequency_hours: int = 24
71
+ last_scan: Optional[datetime] = None
72
+ enabled: bool = True
73
+
74
+
75
+ @dataclass
76
+ class ScanResult:
77
+ """
78
+ Result of an opportunity scan.
79
+
80
+ Contains discovered opportunities and scan metadata.
81
+ """
82
+ scan_id: str
83
+ config_id: str
84
+ started_at: datetime
85
+ completed_at: Optional[datetime] = None
86
+ sources_scanned: List[str] = field(default_factory=list)
87
+ opportunities_found: int = 0
88
+ new_opportunities: int = 0
89
+ updated_opportunities: int = 0
90
+ errors: List[str] = field(default_factory=list)
91
+ status: str = "in_progress"
92
+
93
+
94
+ class OpportunityScanner:
95
+ """
96
+ Scans funding databases for opportunities.
97
+
98
+ This component:
99
+ - Connects to funding data sources
100
+ - Runs periodic scans
101
+ - Identifies new opportunities
102
+ - Updates existing opportunity data
103
+
104
+ INTEGRATION NOTES:
105
+ -----------------
106
+ For production deployment, integrate with:
107
+
108
+ 1. HORIZON EUROPE (CORDIS):
109
+ - Use CORDIS API for call announcements
110
+ - Parse work programme documents
111
+ - Track topic deadlines
112
+
113
+ 2. NATIONAL AGENCIES:
114
+ - DFG (Germany): RSS feeds
115
+ - ANR (France): Open data portal
116
+ - UKRI (UK): Gateway API
117
+
118
+ 3. FOUNDATIONS:
119
+ - Scrape foundation websites
120
+ - Monitor RSS/newsletter feeds
121
+ - Parse PDF announcements
122
+
123
+ 4. CUSTOM SOURCES:
124
+ - Support for institution-specific sources
125
+ - Private funding networks
126
+ - Industry partnership programs
127
+ """
128
+
129
+ def __init__(
130
+ self,
131
+ database_url: Optional[str] = None,
132
+ embedding_client: Optional[Any] = None,
133
+ ):
134
+ """
135
+ Initialize Opportunity Scanner.
136
+
137
+ Args:
138
+ database_url: Database for storing opportunities
139
+ embedding_client: Client for semantic search embeddings
140
+ """
141
+ self.database_url = database_url
142
+ self.embedding_client = embedding_client
143
+ self.name = "OpportunityScanner"
144
+
145
+ # Registered scan configurations
146
+ self.configurations: Dict[str, ScanConfiguration] = {}
147
+
148
+ logger.info(f"Initialized {self.name} (placeholder)")
149
+
150
+ async def register_configuration(
151
+ self,
152
+ config: ScanConfiguration,
153
+ ) -> None:
154
+ """
155
+ Register a scan configuration.
156
+
157
+ Args:
158
+ config: Scan configuration to register
159
+ """
160
+ self.configurations[config.config_id] = config
161
+ logger.info(f"Registered scan configuration: {config.name}")
162
+
163
+ async def run_scan(
164
+ self,
165
+ config_id: Optional[str] = None,
166
+ ) -> ScanResult:
167
+ """
168
+ Run an opportunity scan.
169
+
170
+ Args:
171
+ config_id: Specific configuration to use (or all if None)
172
+
173
+ Returns:
174
+ Scan result with discovered opportunities
175
+
176
+ TODO: Implement actual scanning logic
177
+ """
178
+ logger.info(f"Running opportunity scan (config: {config_id or 'all'})")
179
+
180
+ # Placeholder response
181
+ return ScanResult(
182
+ scan_id=f"scan_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
183
+ config_id=config_id or "all",
184
+ started_at=datetime.now(),
185
+ completed_at=datetime.now(),
186
+ sources_scanned=[],
187
+ opportunities_found=0,
188
+ new_opportunities=0,
189
+ updated_opportunities=0,
190
+ status="placeholder",
191
+ )
192
+
193
+ async def scan_cordis(
194
+ self,
195
+ keywords: List[str],
196
+ ) -> List[Dict[str, Any]]:
197
+ """
198
+ Scan CORDIS for Horizon Europe opportunities.
199
+
200
+ Args:
201
+ keywords: Keywords to search for
202
+
203
+ Returns:
204
+ List of opportunities from CORDIS
205
+
206
+ TODO: Implement CORDIS API integration
207
+ """
208
+ logger.info(f"Scanning CORDIS with keywords: {keywords}")
209
+
210
+ # Placeholder - would use CORDIS API
211
+ return []
212
+
213
+ async def scan_erc(
214
+ self,
215
+ research_areas: List[str],
216
+ ) -> List[Dict[str, Any]]:
217
+ """
218
+ Scan ERC for grant opportunities.
219
+
220
+ Args:
221
+ research_areas: Research areas to match
222
+
223
+ Returns:
224
+ List of ERC opportunities
225
+
226
+ TODO: Implement ERC portal integration
227
+ """
228
+ logger.info(f"Scanning ERC for research areas: {research_areas}")
229
+
230
+ # Placeholder - would scrape ERC portal
231
+ return []
232
+
233
+ async def semantic_search(
234
+ self,
235
+ query: str,
236
+ sources: Optional[List[DataSource]] = None,
237
+ top_k: int = 10,
238
+ ) -> List[Dict[str, Any]]:
239
+ """
240
+ Semantic search for relevant opportunities.
241
+
242
+ Uses vector similarity to find opportunities
243
+ matching natural language descriptions.
244
+
245
+ Args:
246
+ query: Natural language query
247
+ sources: Data sources to search
248
+ top_k: Number of results to return
249
+
250
+ Returns:
251
+ List of matching opportunities with scores
252
+
253
+ TODO: Implement embedding-based search
254
+ """
255
+ logger.info(f"Semantic search: {query[:50]}...")
256
+
257
+ # Placeholder - would use embedding similarity
258
+ return []
259
+
260
+ async def get_scan_history(
261
+ self,
262
+ limit: int = 10,
263
+ ) -> List[ScanResult]:
264
+ """
265
+ Get history of recent scans.
266
+
267
+ Args:
268
+ limit: Maximum number of results
269
+
270
+ Returns:
271
+ List of recent scan results
272
+
273
+ TODO: Implement scan history retrieval
274
+ """
275
+ logger.info(f"Getting scan history (limit: {limit})")
276
+
277
+ # Placeholder
278
+ return []
src/workflow/langgraph_state.py CHANGED
@@ -13,12 +13,29 @@ from langgraph.graph.message import add_messages
13
 
14
  class ScenarioType(str, Enum):
15
  """
16
- VISTA scenario types.
17
- Each scenario has a dedicated multi-agent workflow.
 
 
 
 
 
18
  """
 
19
  PATENT_WAKEUP = "patent_wakeup" # Scenario 1: Dormant IP valorization
20
  AGREEMENT_SAFETY = "agreement_safety" # Scenario 2: Legal agreement review
21
  PARTNER_MATCHING = "partner_matching" # Scenario 5: Stakeholder matching
 
 
 
 
 
 
 
 
 
 
 
22
  GENERAL = "general" # Custom/general purpose tasks
23
 
24
 
@@ -461,3 +478,217 @@ class ValorizationBrief(BaseModel):
461
  # Metadata
462
  generated_date: str = Field(..., description="Generation date")
463
  version: str = Field(default="1.0", description="Document version")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  class ScenarioType(str, Enum):
15
  """
16
+ VISTA/Horizon EU scenario types for Technology Transfer Office (TTO) automation.
17
+ Each scenario has a dedicated multi-agent workflow aligned with TTO operations.
18
+
19
+ Coverage Status:
20
+ - FULLY COVERED (3): Patent Wake-Up, Agreement Safety, Partner Matching
21
+ - PARTIALLY COVERED (5): License Compliance, Award Identification, IP Portfolio, Due Diligence, Reporting
22
+ - NOT COVERED (2): Grant Writing, Negotiation Support
23
  """
24
+ # Fully Implemented Scenarios
25
  PATENT_WAKEUP = "patent_wakeup" # Scenario 1: Dormant IP valorization
26
  AGREEMENT_SAFETY = "agreement_safety" # Scenario 2: Legal agreement review
27
  PARTNER_MATCHING = "partner_matching" # Scenario 5: Stakeholder matching
28
+
29
+ # New Scenarios (Placeholder - Partially Implemented)
30
+ LICENSE_COMPLIANCE = "license_compliance" # Scenario 3: License tracking & compliance
31
+ AWARD_IDENTIFICATION = "award_identification" # Scenario 4: Funding & award opportunities
32
+
33
+ # Future Scenarios (Not Yet Implemented)
34
+ IP_PORTFOLIO = "ip_portfolio" # IP portfolio management
35
+ DUE_DILIGENCE = "due_diligence" # Technology due diligence
36
+ REPORTING = "reporting" # TTO metrics and reporting
37
+
38
+ # General Purpose
39
  GENERAL = "general" # Custom/general purpose tasks
40
 
41
 
 
478
  # Metadata
479
  generated_date: str = Field(..., description="Generation date")
480
  version: str = Field(default="1.0", description="Document version")
481
+
482
+
483
+ # ============================================================================
484
+ # License Compliance Monitoring Models (Scenario 3)
485
+ # ============================================================================
486
+
487
+ class ComplianceStatus(str, Enum):
488
+ """License compliance status for monitoring."""
489
+ COMPLIANT = "compliant"
490
+ NON_COMPLIANT = "non_compliant"
491
+ AT_RISK = "at_risk"
492
+ PENDING_REVIEW = "pending_review"
493
+ EXPIRED = "expired"
494
+
495
+
496
+ class LicenseComplianceAnalysis(BaseModel):
497
+ """
498
+ License compliance analysis output from LicenseComplianceAgent.
499
+
500
+ GDPR Note: This model may contain references to personal data
501
+ (licensee contacts, payment info). Implement appropriate access
502
+ controls and data retention policies.
503
+ """
504
+ license_id: str = Field(..., description="License agreement identifier")
505
+ agreement_name: str = Field(..., description="Name of the agreement")
506
+ licensee: str = Field(..., description="Licensee organization name")
507
+
508
+ # Compliance status
509
+ overall_status: ComplianceStatus = Field(..., description="Overall compliance status")
510
+ compliance_score: float = Field(..., ge=0.0, le=1.0, description="Compliance score 0-1")
511
+
512
+ # Payment compliance
513
+ payments_current: bool = Field(..., description="All payments up to date")
514
+ payments_overdue: int = Field(default=0, description="Number of overdue payments")
515
+ total_outstanding: float = Field(default=0.0, description="Total outstanding amount")
516
+ currency: str = Field(default="EUR", description="Currency code")
517
+
518
+ # Milestone compliance
519
+ milestones_on_track: bool = Field(..., description="All milestones on track")
520
+ milestones_overdue: int = Field(default=0, description="Number of overdue milestones")
521
+ next_milestone_date: Optional[str] = Field(None, description="Next milestone due date")
522
+
523
+ # Alerts and issues
524
+ active_alerts: List[str] = Field(default_factory=list, description="Active compliance alerts")
525
+ issues_identified: List[str] = Field(default_factory=list, description="Identified issues")
526
+ recommendations: List[str] = Field(default_factory=list, description="Compliance recommendations")
527
+
528
+ # Confidence and validation
529
+ confidence_score: float = Field(..., ge=0.0, le=1.0, description="Analysis confidence")
530
+ human_review_required: bool = Field(default=False, description="Requires human review")
531
+ last_reviewed: Optional[str] = Field(None, description="Last human review date")
532
+
533
+
534
+ class RevenueReport(BaseModel):
535
+ """Revenue report for license portfolio."""
536
+ report_id: str = Field(..., description="Report identifier")
537
+ period_start: str = Field(..., description="Reporting period start")
538
+ period_end: str = Field(..., description="Reporting period end")
539
+
540
+ # Revenue summary
541
+ total_revenue: float = Field(..., description="Total revenue in period")
542
+ currency: str = Field(default="EUR", description="Currency code")
543
+ by_license: Dict[str, float] = Field(default_factory=dict, description="Revenue by license")
544
+ by_type: Dict[str, float] = Field(default_factory=dict, description="Revenue by type")
545
+
546
+ # Comparisons
547
+ vs_previous_period: Optional[float] = Field(None, description="% change vs previous period")
548
+ vs_forecast: Optional[float] = Field(None, description="% vs forecast")
549
+
550
+ # Analysis quality
551
+ confidence_score: float = Field(..., ge=0.0, le=1.0, description="Report confidence")
552
+
553
+
554
+ # ============================================================================
555
+ # Award Identification Models (Scenario 4)
556
+ # ============================================================================
557
+
558
+ class FundingOpportunity(BaseModel):
559
+ """
560
+ Funding opportunity identified by the award scanning system.
561
+
562
+ Represents grants, awards, and other funding opportunities
563
+ matched to research capabilities.
564
+ """
565
+ opportunity_id: str = Field(..., description="Opportunity identifier")
566
+ title: str = Field(..., description="Opportunity title")
567
+ description: str = Field(..., description="Full description")
568
+
569
+ # Funder information
570
+ funder: str = Field(..., description="Funding organization name")
571
+ funder_type: str = Field(..., description="Type: government, EU, foundation, corporate")
572
+ program_name: Optional[str] = Field(None, description="Funding program name")
573
+
574
+ # Funding details
575
+ amount_min: Optional[float] = Field(None, description="Minimum funding amount")
576
+ amount_max: Optional[float] = Field(None, description="Maximum funding amount")
577
+ currency: str = Field(default="EUR", description="Currency code")
578
+ funding_type: str = Field(..., description="Type: grant, award, prize, fellowship")
579
+
580
+ # Timing
581
+ deadline: Optional[str] = Field(None, description="Application deadline")
582
+ duration_months: Optional[int] = Field(None, description="Funding duration in months")
583
+ decision_date: Optional[str] = Field(None, description="Expected decision date")
584
+
585
+ # Matching
586
+ match_score: float = Field(..., ge=0.0, le=1.0, description="Match score with capabilities")
587
+ match_rationale: str = Field(..., description="Why this is a good match")
588
+ eligibility_status: str = Field(..., description="eligible, ineligible, partial, unknown")
589
+ eligibility_notes: List[str] = Field(default_factory=list, description="Eligibility details")
590
+
591
+ # Next steps
592
+ recommended_action: str = Field(..., description="Recommended next step")
593
+ application_effort: str = Field(..., description="Low, Medium, High effort required")
594
+ success_likelihood: str = Field(..., description="Low, Medium, High likelihood")
595
+
596
+ # Metadata
597
+ url: Optional[str] = Field(None, description="Opportunity URL")
598
+ keywords: List[str] = Field(default_factory=list, description="Relevant keywords")
599
+ research_areas: List[str] = Field(default_factory=list, description="Matching research areas")
600
+ discovered_date: str = Field(..., description="When opportunity was discovered")
601
+
602
+ # Quality
603
+ confidence_score: float = Field(..., ge=0.0, le=1.0, description="Analysis confidence")
604
+
605
+
606
+ class AwardApplicationStatus(BaseModel):
607
+ """Status tracking for award/grant applications."""
608
+ application_id: str = Field(..., description="Application identifier")
609
+ opportunity_id: str = Field(..., description="Target opportunity")
610
+
611
+ # Status
612
+ status: str = Field(..., description="draft, internal_review, submitted, under_review, awarded, rejected")
613
+ submitted_date: Optional[str] = Field(None, description="Submission date")
614
+ decision_date: Optional[str] = Field(None, description="Decision received date")
615
+
616
+ # Documents
617
+ documents_completed: int = Field(default=0, description="Completed documents")
618
+ documents_required: int = Field(default=0, description="Total required documents")
619
+ documents_pending_review: int = Field(default=0, description="Documents pending review")
620
+
621
+ # Quality
622
+ overall_score: Optional[float] = Field(None, ge=0.0, le=1.0, description="Application quality score")
623
+ critic_validation: Optional[Dict[str, Any]] = Field(None, description="CriticAgent validation result")
624
+ human_approved: bool = Field(default=False, description="Human approval received")
625
+
626
+ # Notes
627
+ internal_notes: List[str] = Field(default_factory=list, description="Internal notes")
628
+ feedback: Optional[str] = Field(None, description="Feedback from funder if received")
629
+
630
+
631
+ # ============================================================================
632
+ # Human-in-the-Loop Decision Models
633
+ # ============================================================================
634
+
635
+ class HumanDecisionPoint(BaseModel):
636
+ """
637
+ Human-in-the-loop decision point for workflow orchestration.
638
+
639
+ Captures when and why human input is required, and tracks
640
+ the decision made.
641
+ """
642
+ decision_id: str = Field(..., description="Decision point identifier")
643
+ workflow_id: str = Field(..., description="Parent workflow ID")
644
+ scenario: ScenarioType = Field(..., description="Scenario requiring decision")
645
+
646
+ # Decision context
647
+ decision_type: str = Field(..., description="Type: approval, selection, verification, override")
648
+ question: str = Field(..., description="Decision question for human")
649
+ context: str = Field(..., description="Context and background for decision")
650
+ options: List[str] = Field(default_factory=list, description="Available options")
651
+
652
+ # AI recommendation
653
+ ai_recommendation: Optional[str] = Field(None, description="AI recommended option")
654
+ ai_confidence: Optional[float] = Field(None, ge=0.0, le=1.0, description="AI confidence in recommendation")
655
+ ai_rationale: Optional[str] = Field(None, description="Rationale for AI recommendation")
656
+
657
+ # Human decision
658
+ human_decision: Optional[str] = Field(None, description="Human selected option")
659
+ human_rationale: Optional[str] = Field(None, description="Human provided rationale")
660
+ decided_by: Optional[str] = Field(None, description="User who made decision")
661
+ decided_at: Optional[str] = Field(None, description="Timestamp of decision")
662
+
663
+ # Status
664
+ status: str = Field(default="pending", description="pending, decided, expired, skipped")
665
+ expires_at: Optional[str] = Field(None, description="When decision times out")
666
+
667
+ # Audit
668
+ created_at: str = Field(..., description="When decision point was created")
669
+
670
+
671
+ class SourceVerification(BaseModel):
672
+ """
673
+ Source verification for hallucination mitigation.
674
+
675
+ Tracks sources used by AI agents and their verification status.
676
+ """
677
+ verification_id: str = Field(..., description="Verification identifier")
678
+ claim: str = Field(..., description="AI-generated claim to verify")
679
+
680
+ # Sources
681
+ sources: List[Dict[str, Any]] = Field(default_factory=list, description="Supporting sources")
682
+ source_count: int = Field(default=0, description="Number of sources found")
683
+
684
+ # Verification
685
+ verified: bool = Field(..., description="Claim is verified by sources")
686
+ verification_score: float = Field(..., ge=0.0, le=1.0, description="Verification confidence")
687
+ verification_method: str = Field(..., description="How verification was performed")
688
+
689
+ # Issues
690
+ discrepancies: List[str] = Field(default_factory=list, description="Discrepancies found")
691
+ warnings: List[str] = Field(default_factory=list, description="Verification warnings")
692
+
693
+ # Metadata
694
+ verified_at: str = Field(..., description="When verification was performed")