| | """Base schema for data structures.""" |
| | from abc import abstractmethod |
| | from dataclasses import dataclass |
| | from typing import Any, Dict, List, Optional |
| |
|
| | from dataclasses_json import DataClassJsonMixin |
| |
|
| |
|
| | @dataclass |
| | class BaseDocument(DataClassJsonMixin): |
| | """Base document. |
| | |
| | Generic abstract interfaces that captures both index structs |
| | as well as documents. |
| | |
| | """ |
| |
|
| | |
| | text: Optional[str] = None |
| | doc_id: Optional[str] = None |
| | embedding: Optional[List[float]] = None |
| |
|
| | |
| | extra_info: Optional[Dict[str, Any]] = None |
| |
|
| | @classmethod |
| | @abstractmethod |
| | def get_type(cls) -> str: |
| | """Get Document type.""" |
| |
|
| | def get_text(self) -> str: |
| | """Get text.""" |
| | if self.text is None: |
| | raise ValueError("text field not set.") |
| | return self.text |
| |
|
| | def get_doc_id(self) -> str: |
| | """Get doc_id.""" |
| | if self.doc_id is None: |
| | raise ValueError("doc_id not set.") |
| | return self.doc_id |
| |
|
| | @property |
| | def is_doc_id_none(self) -> bool: |
| | """Check if doc_id is None.""" |
| | return self.doc_id is None |
| |
|
| | def get_embedding(self) -> List[float]: |
| | """Get embedding. |
| | |
| | Errors if embedding is None. |
| | |
| | """ |
| | if self.embedding is None: |
| | raise ValueError("embedding not set.") |
| | return self.embedding |
| |
|
| | @property |
| | def extra_info_str(self) -> Optional[str]: |
| | """Extra info string.""" |
| | if self.extra_info is None: |
| | return None |
| |
|
| | return "\n".join([f"{k}: {str(v)}" for k, v in self.extra_info.items()]) |
| |
|