| | from swift.llm import InferClient, InferRequest, RequestConfig |
| |
|
| |
|
| | def infer_multilora(engine: InferClient, infer_request: InferRequest): |
| | |
| | models = engine.models |
| | print(f'models: {models}') |
| | request_config = RequestConfig(max_tokens=512, temperature=0) |
| |
|
| | |
| | resp_list = engine.infer([infer_request], request_config, model=models[1]) |
| | response = resp_list[0].choices[0].message.content |
| | print(f'lora1-response: {response}') |
| | |
| | resp_list = engine.infer([infer_request], request_config, model=models[0]) |
| | response = resp_list[0].choices[0].message.content |
| | print(f'response: {response}') |
| | |
| | resp_list = engine.infer([infer_request], request_config, model=models[2]) |
| | response = resp_list[0].choices[0].message.content |
| | print(f'lora2-response: {response}') |
| |
|
| |
|
| | if __name__ == '__main__': |
| | engine = InferClient(host='127.0.0.1', port=8000) |
| | infer_request = InferRequest(messages=[{'role': 'user', 'content': 'who are you?'}]) |
| | infer_multilora(engine, infer_request) |
| |
|