| | |
| | import os |
| |
|
| | from openai import OpenAI |
| |
|
| | os.environ['CUDA_VISIBLE_DEVICES'] = '0' |
| |
|
| |
|
| | def get_infer_request(): |
| | messages = [{'role': 'user', 'content': "How's the weather in Beijing today?"}] |
| | tools = [{ |
| | 'name': 'get_current_weather', |
| | 'description': 'Get the current weather in a given location', |
| | 'parameters': { |
| | 'type': 'object', |
| | 'properties': { |
| | 'location': { |
| | 'type': 'string', |
| | 'description': 'The city and state, e.g. San Francisco, CA' |
| | }, |
| | 'unit': { |
| | 'type': 'string', |
| | 'enum': ['celsius', 'fahrenheit'] |
| | } |
| | }, |
| | 'required': ['location'] |
| | } |
| | }] |
| | return messages, tools |
| |
|
| |
|
| | def infer(client, model: str, messages, tools): |
| | messages = messages.copy() |
| | query = messages[0]['content'] |
| | resp = client.chat.completions.create(model=model, messages=messages, tools=tools, max_tokens=512, temperature=0) |
| | response = resp.choices[0].message.content |
| | print(f'query: {query}') |
| | print(f'response: {response}') |
| | print(f'tool_calls: {resp.choices[0].message.tool_calls}') |
| |
|
| | tool = '{"temperature": 32, "condition": "Sunny", "humidity": 50}' |
| | print(f'tool_response: {tool}') |
| | messages += [{'role': 'assistant', 'content': response}, {'role': 'tool', 'content': tool}] |
| | resp = client.chat.completions.create(model=model, messages=messages, tools=tools, max_tokens=512, temperature=0) |
| | response2 = resp.choices[0].message.content |
| | print(f'response2: {response2}') |
| |
|
| |
|
| | |
| | def infer_stream(client, model: str, messages, tools): |
| | messages = messages.copy() |
| | query = messages[0]['content'] |
| | gen = client.chat.completions.create( |
| | model=model, messages=messages, tools=tools, max_tokens=512, temperature=0, stream=True) |
| | response = '' |
| | print(f'query: {query}\nresponse: ', end='') |
| | for chunk in gen: |
| | delta = chunk.choices[0].delta.content |
| | response += delta |
| | print(delta, end='', flush=True) |
| | print() |
| | print(f'tool_calls: {chunk.choices[0].delta.tool_calls}') |
| |
|
| | tool = '{"temperature": 32, "condition": "Sunny", "humidity": 50}' |
| | print(f'tool_response: {tool}') |
| | messages += [{'role': 'assistant', 'content': response}, {'role': 'tool', 'content': tool}] |
| | gen = client.chat.completions.create( |
| | model=model, messages=messages, tools=tools, max_tokens=512, temperature=0, stream=True) |
| | print(f'query: {query}\nresponse2: ', end='') |
| | for chunk in gen: |
| | print(chunk.choices[0].delta.content, end='', flush=True) |
| | print() |
| |
|
| |
|
| | if __name__ == '__main__': |
| | host: str = '127.0.0.1' |
| | port: int = 8000 |
| | client = OpenAI( |
| | api_key='EMPTY', |
| | base_url=f'http://{host}:{port}/v1', |
| | ) |
| | model = client.models.list().data[0].id |
| | print(f'model: {model}') |
| |
|
| | messages, tools = get_infer_request() |
| | infer(client, model, messages, tools) |
| | infer_stream(client, model, messages, tools) |
| |
|