Spaces:

parthtamu
/

rag-code-assistant

Sleeping

0-Parth-D

Initial Commit

dcac338 about 1 month ago

1.61 kB

	from sentence_transformers import SentenceTransformer
	import numpy as np
	import hashlib
	import chromadb

	# Initialize the model
	model = SentenceTransformer("all-MiniLM-L6-v2")

	# Initialize the Chroma client
	client = chromadb.PersistentClient(path="chroma_db")

	# Initialize the collection
	collection = client.get_or_create_collection("test_collection", metadata={"hnsw:space": "cosine"})

	# Function to embed and store the sentences in the collection
	def embed_and_store(sentences, collection):
	embeddings = model.encode(sentences)
	ids = [hashlib.sha256(sentence.encode()).hexdigest() for sentence in sentences]
	collection.upsert(
	documents=sentences,
	ids=ids,
	embeddings=embeddings,
	)
	return ids

	# Function to perform semantic search
	def semantic_search(query, model=model, collection=collection, n_results=3):
	query_embedding = model.encode(query)
	results = collection.query(
	query_embeddings=query_embedding,
	n_results=n_results,
	)
	return results

	sentences = [
	"How do I read a file in Python?",
	"Open a file using the open() function",
	"Python file I/O tutorial",
	"What is a for loop?",
	"Iterating over a list in Python",
	"How to use list comprehensions",
	"Docker container vs Docker image",
	"What is a Dockerfile?",
	"Sort a list in Python",
	"The sky is blue and the sun is yellow"
	]

	ids = embed_and_store(sentences, collection)

	user_query = "reading and writing files in Python"
	results = semantic_search(user_query, model=model, collection=collection, n_results=3)
	print(results)