Spaces:

Qar-Raz
/

NLP-RAG

Running

hf-space: deploy branch without frontend/data/results

c7256ee about 21 hours ago

1.56 kB

	from typing import Any


	# might need to touch this to get the additional metadata for retrieved chunks, like title and url
	# --@Qamar

	def build_retrieved_chunks(
	contexts: list[str],
	chunk_lookup: dict[str, dict[str, Any]],
	) -> list[dict[str, Any]]:
	if not contexts:
	return []

	retrieved_chunks: list[dict[str, Any]] = []

	for idx, text in enumerate(contexts, start=1):
	meta = chunk_lookup.get(text, {})
	title = meta.get("title") or "Untitled"
	url = meta.get("url") or ""
	chunk_index = meta.get("chunk_index")
	page = meta.get("page")
	section = meta.get("section")
	source_type = meta.get("source_type") or meta.get("source")
	image_url = (
	meta.get("image_url")
	or meta.get("image")
	or meta.get("thumbnail_url")
	or meta.get("media_url")
	)

	extra_metadata = {
	k: v
	for k, v in meta.items()
	if k not in {"title", "url", "chunk_index", "text", "technique", "chunking_technique"}
	}

	retrieved_chunks.append(
	{
	"rank": idx,
	"text": text,
	"source_title": title,
	"source_url": url,
	"chunk_index": chunk_index,
	"page": page,
	"section": section,
	"source_type": source_type,
	"image_url": image_url,
	"extra_metadata": extra_metadata,
	}
	)

	return retrieved_chunks