Mixtral-8x7B-Instruct-FastAPI

Sleeping

App Files Files Community

PinkAlpaca commited on Aug 6, 2024

Commit

a87331a

verified ·

1 Parent(s): c4aa949

Update main.py

Browse files

Files changed (1) hide show

main.py +29 -20

main.py CHANGED Viewed

@@ -2,12 +2,14 @@ import os
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 import uvicorn
-import requests  # Use requests for HTTP calls to the Gemini API
 app = FastAPI()
 # Define the Gemini API endpoint for the primary model
-primary_url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"
 # Define the data model for the request body
 class Item(BaseModel):
@@ -22,6 +24,13 @@ class Item(BaseModel):
     repetition_penalty: float = 1.0
     key: str = None
 # Function to generate the response JSON
 def generate_response_json(item, output, tokens, model_name):
     return {
@@ -47,18 +56,19 @@ def generate_response_json(item, output, tokens, model_name):
 # Function to call the Gemini API
 def call_gemini_api(url, input_text, generate_kwargs):
     headers = {
-        "Authorization": f"Bearer {os.getenv('GEMINI_API_KEY')}",  # Ensure the API key is set in the environment
         "Content-Type": "application/json"
     }
     data = {
-        "prompt": input_text,
-        "temperature": generate_kwargs['temperature'],
-        "max_output_tokens": generate_kwargs['max_new_tokens'],
-        "top_p": generate_kwargs['top_p'],
-        "repetition_penalty": generate_kwargs['repetition_penalty'],
-        "do_sample": generate_kwargs['do_sample'],
-        "seed": generate_kwargs['seed'],
     }
     try:
         response = requests.post(url, headers=headers, json=data)
@@ -79,14 +89,14 @@ async def generate_text(item: Item = None):
             raise HTTPException(status_code=400, detail="Parameter `input` or `system prompt` is required.")
         input_ = ""
-        if item.system_prompt != None and item.system_output != None:
             input_ = f"<s>[INST] {item.system_prompt} [/INST] {item.system_output}</s>"
-        elif item.system_prompt != None:
             input_ = f"<s>[INST] {item.system_prompt} [/INST]</s>"
-        elif item.system_output != None:
             input_ = f"<s>{item.system_output}</s>"
-        if item.templates != None:
             for num, template in enumerate(item.templates, start=1):
                 input_ += f"\n<s>[INST] Beginning of archived conversation {num} [/INST]</s>"
                 for i in range(0, len(template), 2):
@@ -95,7 +105,7 @@ async def generate_text(item: Item = None):
                 input_ += f"\n<s>[INST] End of archived conversation {num} [/INST]</s>"
         input_ += f"\n<s>[INST] Beginning of active conversation [/INST]</s>"
-        if item.history != None:
             for input_text, output_text in item.history:
                 input_ += f"\n<s>[INST] {input_text} [/INST]"
                 input_ += f"\n{output_text}"
@@ -117,8 +127,8 @@ async def generate_text(item: Item = None):
         tokens = 0
         response_data = call_gemini_api(primary_url, input_, generate_kwargs)
-        output = response_data['responses'][0]['content']
-        tokens = len(response_data['responses'][0]['tokens'])
         return generate_response_json(item, output, tokens, primary_url)
@@ -128,6 +138,5 @@ async def generate_text(item: Item = None):
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
-if "KEY" in os.environ:
-    if item.key != os.environ["KEY"]:
-        raise HTTPException(status_code=401, detail="Valid key is required.")

 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 import uvicorn
+import requests
+from google.auth.transport.requests import Request
+from google.oauth2 import id_token
 app = FastAPI()
 # Define the Gemini API endpoint for the primary model
+primary_url = "https://us-east4-aiplatform.googleapis.com/v1/projects/gen-lang-client-0770444709/locations/us-east4/publishers/google/models/gemini-1.5-flash-latest:generateContent"
 # Define the data model for the request body
 class Item(BaseModel):
     repetition_penalty: float = 1.0
     key: str = None
+# Function to obtain an OAuth 2.0 access token
+def get_access_token():
+    auth_req = Request()
+    target_audience = primary_url.split("models")[0]  # Extract the base URL
+    token = id_token.fetch_id_token(auth_req, target_audience)
+    return token
 # Function to generate the response JSON
 def generate_response_json(item, output, tokens, model_name):
     return {
 # Function to call the Gemini API
 def call_gemini_api(url, input_text, generate_kwargs):
+    access_token = get_access_token()
     headers = {
+        "Authorization": f"Bearer {access_token}",
         "Content-Type": "application/json"
     }
     data = {
+        "contents": [{"role": "user", "parts": [{"text": input_text}]}],
+        "generationConfig": {
+            "temperature": generate_kwargs['temperature'],
+            "maxOutputTokens": generate_kwargs['max_new_tokens'],
+            "topP": generate_kwargs['top_p'],
+            "repetitionPenalty": generate_kwargs['repetition_penalty'],
+        }
     }
     try:
         response = requests.post(url, headers=headers, json=data)
             raise HTTPException(status_code=400, detail="Parameter `input` or `system prompt` is required.")
         input_ = ""
+        if item.system_prompt is not None and item.system_output is not None:
             input_ = f"<s>[INST] {item.system_prompt} [/INST] {item.system_output}</s>"
+        elif item.system_prompt is not None:
             input_ = f"<s>[INST] {item.system_prompt} [/INST]</s>"
+        elif item.system_output is not None:
             input_ = f"<s>{item.system_output}</s>"
+        if item.templates is not None:
             for num, template in enumerate(item.templates, start=1):
                 input_ += f"\n<s>[INST] Beginning of archived conversation {num} [/INST]</s>"
                 for i in range(0, len(template), 2):
                 input_ += f"\n<s>[INST] End of archived conversation {num} [/INST]</s>"
         input_ += f"\n<s>[INST] Beginning of active conversation [/INST]</s>"
+        if item.history is not None:
             for input_text, output_text in item.history:
                 input_ += f"\n<s>[INST] {input_text} [/INST]"
                 input_ += f"\n{output_text}"
         tokens = 0
         response_data = call_gemini_api(primary_url, input_, generate_kwargs)
+        output = response_data['candidates'][0]['content']['parts'][0]['text']
+        tokens = response_data['usageMetadata']['totalTokenCount']
         return generate_response_json(item, output, tokens, primary_url)
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)