| """Wiki compiler — uses Claude to integrate raw sources into structured wiki articles.""" |
|
|
| import json |
| import datetime |
| import anthropic |
|
|
| COMPILE_SYSTEM_PROMPT = """You are a clinical knowledge wiki curator for the Nursing Citizen Development Organisation. |
| Your job is to integrate new source material into an existing nursing knowledge base (wiki). |
| |
| The wiki is a collection of markdown articles organised by category. Each article has: |
| - A title, category, tags, and backlinks to other articles |
| - Substantive clinical content aligned with NMC Standards of Proficiency (2018), UK law, and NHS frameworks |
| |
| When given new source material, you must: |
| 1. Identify key nursing concepts, frameworks, guidelines, or clinical information in the source |
| 2. Decide which existing articles should be UPDATED with new information |
| 3. Identify any new articles that should be CREATED for concepts not yet covered |
| 4. Integrate the information accurately and clinically appropriately |
| 5. Add/update backlinks between related articles |
| 6. Always cite the source in any updated/created articles |
| |
| Return a JSON object with this structure: |
| { |
| "summary": "Brief summary of what was integrated and why", |
| "articles_updated": [ |
| { |
| "slug": "article_slug", |
| "title": "Article Title", |
| "category": "category_name", |
| "tags": ["tag1", "tag2"], |
| "content": "Full markdown content of the updated article" |
| } |
| ], |
| "articles_created": [ |
| { |
| "slug": "new_slug", |
| "title": "New Article Title", |
| "category": "category_name", |
| "tags": ["tag1", "tag2"], |
| "content": "Full markdown content of the new article" |
| } |
| ], |
| "index_updates": "Updated one-line entries for the index (markdown format)", |
| "log_entry": "Log entry text for this compilation" |
| } |
| |
| Categories to use: standards, clinical, pharmacology, evidence, frameworks, safety, law, mental_health, research, ethics |
| |
| Clinical content must: |
| - Be accurate and evidence-based |
| - Include NMC proficiency mappings where relevant |
| - Include UK-specific references (NICE, NMC, NHS, BNF) |
| - Include the disclaimer: "This tool supports but does not replace clinical judgment." |
| - Use UK spellings (organisation, anaesthesia, etc.) |
| """ |
|
|
|
|
| CHUNK_SIZE = 7000 |
|
|
|
|
| def _chunk_text(text: str, chunk_size: int = CHUNK_SIZE) -> list[str]: |
| """Split text into chunks at paragraph boundaries.""" |
| if len(text) <= chunk_size: |
| return [text] |
| chunks = [] |
| paragraphs = text.split("\n\n") |
| current = [] |
| current_len = 0 |
| for para in paragraphs: |
| if current_len + len(para) > chunk_size and current: |
| chunks.append("\n\n".join(current)) |
| current = [para] |
| current_len = len(para) |
| else: |
| current.append(para) |
| current_len += len(para) |
| if current: |
| chunks.append("\n\n".join(current)) |
| return chunks |
|
|
|
|
| def compile_source(client: anthropic.Anthropic, source_title: str, source_content: str, |
| existing_index: str, existing_articles: dict, model: str = "claude-sonnet-4-6") -> dict: |
| """ |
| Integrate a new source into the wiki. |
| |
| Large documents are automatically split into chunks and compiled sequentially, |
| with the wiki state updated between chunks so each pass builds on the last. |
| |
| Returns a merged dict with all updated/created articles and metadata. |
| """ |
| chunks = _chunk_text(source_content) |
| total_chunks = len(chunks) |
|
|
| merged: dict = {"articles_updated": [], "articles_created": [], "summary": "", "index_updates": "", "log_entry": ""} |
|
|
| for chunk_num, chunk in enumerate(chunks, 1): |
| chunk_label = f"{source_title} (part {chunk_num}/{total_chunks})" if total_chunks > 1 else source_title |
|
|
| |
| articles_context = "" |
| if existing_articles: |
| for slug, art in list(existing_articles.items())[:8]: |
| preview = art["content"][:400].replace("\n", " ") |
| articles_context += f"\n- **{art['title']}** ({art['category']}): {preview}...\n" |
|
|
| user_prompt = f"""## Existing Wiki Index |
| {existing_index} |
| |
| ## Sample of Existing Articles (previews) |
| {articles_context} |
| |
| ## New Source to Integrate |
| **Title**: {chunk_label} |
| {"**(Large document — this is chunk " + str(chunk_num) + " of " + str(total_chunks) + ")**" if total_chunks > 1 else ""} |
| |
| **Content**: |
| {chunk} |
| |
| Please integrate this source into the wiki. Return valid JSON only, no markdown code fences.""" |
|
|
| response = client.messages.create( |
| model=model, |
| max_tokens=4096, |
| system=COMPILE_SYSTEM_PROMPT, |
| messages=[{"role": "user", "content": user_prompt}], |
| ) |
|
|
| raw = response.content[0].text.strip() |
| if raw.startswith("```"): |
| raw = raw.split("\n", 1)[1] |
| if raw.endswith("```"): |
| raw = raw.rsplit("```", 1)[0] |
|
|
| result = json.loads(raw) |
|
|
| |
| today = datetime.date.today().isoformat() |
| for art in result.get("articles_updated", []) + result.get("articles_created", []): |
| art["last_updated"] = today |
| art["sources"] = art.get("sources", []) + [source_title] |
| |
| existing_articles[art["slug"]] = art |
|
|
| merged["articles_updated"].extend(result.get("articles_updated", [])) |
| merged["articles_created"].extend(result.get("articles_created", [])) |
| if result.get("summary"): |
| merged["summary"] += f"[Part {chunk_num}] {result['summary']} " |
| if result.get("log_entry"): |
| merged["log_entry"] = result["log_entry"] |
|
|
| |
| seen: dict = {} |
| for art in merged["articles_updated"] + merged["articles_created"]: |
| seen[art["slug"]] = art |
| merged["articles_updated"] = list(seen.values()) |
| merged["articles_created"] = [] |
|
|
| return merged |
|
|
|
|
| def rebuild_index(client: anthropic.Anthropic, articles: dict, model: str = "claude-sonnet-4-6") -> str: |
| """Regenerate the wiki index from all articles.""" |
| article_list = [] |
| for slug, art in articles.items(): |
| article_list.append(f"- **{art['title']}** ({art['category']}): {', '.join(art.get('tags', []))}") |
|
|
| prompt = f"""Regenerate a well-organised wiki index for these nursing knowledge articles. |
| Group them by category. Each entry should be a one-line summary. |
| Format as markdown with category headers (##). |
| |
| Articles: |
| {chr(10).join(article_list)} |
| |
| Return only the markdown index content.""" |
|
|
| response = client.messages.create( |
| model=model, |
| max_tokens=2048, |
| messages=[{"role": "user", "content": prompt}], |
| ) |
| return response.content[0].text.strip() |
|
|