File size: 1,557 Bytes
c7256ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from typing import Any


# might need to touch this to get the additional metadata for retrieved chunks, like title and url
# --@Qamar

def build_retrieved_chunks(
    contexts: list[str],
    chunk_lookup: dict[str, dict[str, Any]],
) -> list[dict[str, Any]]:
    if not contexts:
        return []

    retrieved_chunks: list[dict[str, Any]] = []

    for idx, text in enumerate(contexts, start=1):
        meta = chunk_lookup.get(text, {})
        title = meta.get("title") or "Untitled"
        url = meta.get("url") or ""
        chunk_index = meta.get("chunk_index")
        page = meta.get("page")
        section = meta.get("section")
        source_type = meta.get("source_type") or meta.get("source")
        image_url = (
            meta.get("image_url")
            or meta.get("image")
            or meta.get("thumbnail_url")
            or meta.get("media_url")
        )

        extra_metadata = {
            k: v
            for k, v in meta.items()
            if k not in {"title", "url", "chunk_index", "text", "technique", "chunking_technique"}
        }

        retrieved_chunks.append(
            {
                "rank": idx,
                "text": text,
                "source_title": title,
                "source_url": url,
                "chunk_index": chunk_index,
                "page": page,
                "section": section,
                "source_type": source_type,
                "image_url": image_url,
                "extra_metadata": extra_metadata,
            }
        )

    return retrieved_chunks