| | import os |
| |
|
| | os.environ['CUDA_VISIBLE_DEVICES'] = '0' |
| |
|
| | infer_backend = 'vllm' |
| |
|
| |
|
| | def test_eval_native(): |
| | from swift.llm import EvalArguments, eval_main |
| | eval_main( |
| | EvalArguments( |
| | model='Qwen/Qwen2.5-0.5B-Instruct', |
| | eval_dataset='arc', |
| | infer_backend=infer_backend, |
| | eval_backend='Native', |
| | eval_limit=10, |
| | eval_generation_config={ |
| | 'max_new_tokens': 128, |
| | 'temperature': 0.1 |
| | }, |
| | extra_eval_args={ |
| | 'stream': True, |
| | 'ignore_errors': True |
| | }, |
| | )) |
| |
|
| |
|
| | def test_eval_llm(): |
| | from swift.llm import EvalArguments, eval_main |
| | eval_main( |
| | EvalArguments( |
| | model='Qwen/Qwen2-7B-Instruct', |
| | eval_dataset='arc_c', |
| | infer_backend=infer_backend, |
| | eval_backend='OpenCompass', |
| | eval_limit=10)) |
| |
|
| |
|
| | def test_eval_mllm(): |
| | from swift.llm import EvalArguments, eval_main |
| | eval_main( |
| | EvalArguments( |
| | model='Qwen/Qwen2.5-VL-3B-Instruct', |
| | eval_dataset=['realWorldQA'], |
| | infer_backend='pt', |
| | eval_backend='VLMEvalKit', |
| | eval_limit=10, |
| | eval_generation_config={ |
| | 'max_new_tokens': 128, |
| | 'temperature': 0.1 |
| | })) |
| |
|
| |
|
| | def test_eval_url(): |
| | from swift.llm import EvalArguments, eval_main, DeployArguments, run_deploy |
| | deploy_args = DeployArguments(model='Qwen/Qwen2-VL-7B-Instruct', infer_backend=infer_backend, verbose=False) |
| |
|
| | with run_deploy(deploy_args, return_url=True) as url: |
| | eval_main(EvalArguments(model='Qwen2-VL-7B-Instruct', eval_url=url, eval_dataset=['arc_c'])) |
| |
|
| |
|
| | if __name__ == '__main__': |
| | |
| | test_eval_mllm() |
| | |
| | |
| |
|