| | |
| |
|
| | from pinecone import Pinecone |
| | from langchain_openai import AzureOpenAIEmbeddings |
| | import uuid |
| | import pandas as pd |
| | import streamlit as st |
| | import os |
| | |
| | pc = Pinecone(api_key=st.secrets["PC_API_KEY"]) |
| |
|
| | index = pc.Index("openai-serverless") |
| |
|
| | |
| | os.environ["AZURE_OPENAI_API_KEY"] = st.secrets["api_key"] |
| | os.environ["AZURE_OPENAI_ENDPOINT"] = "https://davidfearn-gpt4.openai.azure.com/" |
| | os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"] = "text-embedding-ada-002" |
| | os.environ["AZURE_OPENAI_API_VERSION"] = "2024-08-01-preview" |
| |
|
| | |
| | embeddings_model = AzureOpenAIEmbeddings( |
| | azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"], |
| | azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"], |
| | openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"], |
| | ) |
| |
|
| | def retriever(query, k): |
| |
|
| | namespace="gskRegIntel" |
| | """ |
| | Embeds a query string and searches the vector database for similar entries. |
| | |
| | :param query: The string to embed and search for. |
| | :param namespace: Pinecone namespace to search within. |
| | :param top_k: Number of top results to retrieve. |
| | :return: List of search results with metadata and scores. |
| | """ |
| | try: |
| | |
| | query_embedding = embeddings_model.embed_query(query) |
| |
|
| | |
| | results = index.query(vector=query_embedding, top_k=k, namespace=namespace, include_metadata=True) |
| |
|
| | return results.matches |
| |
|
| | except Exception as e: |
| | print(f"Error during search: {e}") |
| | return [] |