| from copy import deepcopy |
|
|
| from typing import List, Dict, Optional, Any |
|
|
| from flows.base_flows import AtomicFlow |
|
|
| from flows.utils import logging |
| from .wikipediaAPI import WikipediaAPIWrapper |
|
|
| log = logging.get_logger(__name__) |
|
|
|
|
| class WikiSearchAtomicFlow(AtomicFlow): |
| REQUIRED_KEYS_CONFIG = ["lang", "top_k_results", "doc_content_chars_max"] |
| REQUIRED_KEYS_CONSTRUCTOR = [] |
|
|
| SUPPORTS_CACHING: bool = True |
|
|
| api_wrapper: WikipediaAPIWrapper |
|
|
| def __init__(self, **kwargs): |
| super().__init__(**kwargs) |
|
|
| def run(self, |
| input_data: Dict[str, Any]) -> Dict[str, Any]: |
|
|
| |
| term = input_data.get("search_term", None) |
| api_wrapper = WikipediaAPIWrapper( |
| lang=self.flow_config["lang"], |
| top_k_results=self.flow_config["top_k_results"], |
| doc_content_chars_max=self.flow_config["doc_content_chars_max"] |
| ) |
|
|
| |
| if page_content := api_wrapper._fetch_page(term): |
| search_response = {"wiki_content": page_content, "relevant_pages": None} |
| else: |
| page_titles = api_wrapper.search_page_titles(term) |
| search_response = {"wiki_content": None, "relevant_pages": f"Could not find [{term}]. similar: {page_titles}"} |
|
|
| |
| observation = search_response["wiki_content"] if search_response["wiki_content"] else search_response["relevant_pages"] |
| return {"wiki_content": observation} |
|
|