from app.core.llm import get_llm from app.memory.qdrant_memory import search_memory async def generate_response( user_id, user_query ): memories = search_memory( user_id, user_query ) memory_context = "\n".join(memories) SYSTEM_PROMPT = f""" You are NeuroFlora, an intelligent plant disease assistant. Previous user memories: {memory_context} Help users with: - plant diseases - crop health - farming guidance - pesticide awareness - plant care """ client = get_llm() response = await client.chat.completions.create( model="meta-llama/Llama-3.1-70B-Instruct:scaleway", messages=[ { "role": "system", "content": SYSTEM_PROMPT }, { "role": "user", "content": user_query } ], stream=True ) return response