File size: 1,557 Bytes
c7256ee | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | from typing import Any
# might need to touch this to get the additional metadata for retrieved chunks, like title and url
# --@Qamar
def build_retrieved_chunks(
contexts: list[str],
chunk_lookup: dict[str, dict[str, Any]],
) -> list[dict[str, Any]]:
if not contexts:
return []
retrieved_chunks: list[dict[str, Any]] = []
for idx, text in enumerate(contexts, start=1):
meta = chunk_lookup.get(text, {})
title = meta.get("title") or "Untitled"
url = meta.get("url") or ""
chunk_index = meta.get("chunk_index")
page = meta.get("page")
section = meta.get("section")
source_type = meta.get("source_type") or meta.get("source")
image_url = (
meta.get("image_url")
or meta.get("image")
or meta.get("thumbnail_url")
or meta.get("media_url")
)
extra_metadata = {
k: v
for k, v in meta.items()
if k not in {"title", "url", "chunk_index", "text", "technique", "chunking_technique"}
}
retrieved_chunks.append(
{
"rank": idx,
"text": text,
"source_title": title,
"source_url": url,
"chunk_index": chunk_index,
"page": page,
"section": section,
"source_type": source_type,
"image_url": image_url,
"extra_metadata": extra_metadata,
}
)
return retrieved_chunks
|