| from typing import Any |
|
|
|
|
| |
| |
|
|
| def build_retrieved_chunks( |
| contexts: list[str], |
| chunk_lookup: dict[str, dict[str, Any]], |
| ) -> list[dict[str, Any]]: |
| if not contexts: |
| return [] |
|
|
| retrieved_chunks: list[dict[str, Any]] = [] |
|
|
| for idx, text in enumerate(contexts, start=1): |
| meta = chunk_lookup.get(text, {}) |
| title = meta.get("title") or "Untitled" |
| url = meta.get("url") or "" |
| chunk_index = meta.get("chunk_index") |
| page = meta.get("page") |
| section = meta.get("section") |
| source_type = meta.get("source_type") or meta.get("source") |
| image_url = ( |
| meta.get("image_url") |
| or meta.get("image") |
| or meta.get("thumbnail_url") |
| or meta.get("media_url") |
| ) |
|
|
| extra_metadata = { |
| k: v |
| for k, v in meta.items() |
| if k not in {"title", "url", "chunk_index", "text", "technique", "chunking_technique"} |
| } |
|
|
| retrieved_chunks.append( |
| { |
| "rank": idx, |
| "text": text, |
| "source_title": title, |
| "source_url": url, |
| "chunk_index": chunk_index, |
| "page": page, |
| "section": section, |
| "source_type": source_type, |
| "image_url": image_url, |
| "extra_metadata": extra_metadata, |
| } |
| ) |
|
|
| return retrieved_chunks |
|
|