| import os |
|
|
| os.environ['CUDA_VISIBLE_DEVICES'] = '0' |
|
|
| infer_backend = 'vllm' |
|
|
|
|
| def test_eval_native(): |
| from swift.llm import EvalArguments, eval_main |
| eval_main( |
| EvalArguments( |
| model='Qwen/Qwen2.5-0.5B-Instruct', |
| eval_dataset='arc', |
| infer_backend=infer_backend, |
| eval_backend='Native', |
| eval_limit=10, |
| eval_generation_config={ |
| 'max_new_tokens': 128, |
| 'temperature': 0.1 |
| }, |
| extra_eval_args={ |
| 'stream': True, |
| 'ignore_errors': True |
| }, |
| )) |
|
|
|
|
| def test_eval_llm(): |
| from swift.llm import EvalArguments, eval_main |
| eval_main( |
| EvalArguments( |
| model='Qwen/Qwen2-7B-Instruct', |
| eval_dataset='arc_c', |
| infer_backend=infer_backend, |
| eval_backend='OpenCompass', |
| eval_limit=10)) |
|
|
|
|
| def test_eval_mllm(): |
| from swift.llm import EvalArguments, eval_main |
| eval_main( |
| EvalArguments( |
| model='Qwen/Qwen2.5-VL-3B-Instruct', |
| eval_dataset=['realWorldQA'], |
| infer_backend='pt', |
| eval_backend='VLMEvalKit', |
| eval_limit=10, |
| eval_generation_config={ |
| 'max_new_tokens': 128, |
| 'temperature': 0.1 |
| })) |
|
|
|
|
| def test_eval_url(): |
| from swift.llm import EvalArguments, eval_main, DeployArguments, run_deploy |
| deploy_args = DeployArguments(model='Qwen/Qwen2-VL-7B-Instruct', infer_backend=infer_backend, verbose=False) |
|
|
| with run_deploy(deploy_args, return_url=True) as url: |
| eval_main(EvalArguments(model='Qwen2-VL-7B-Instruct', eval_url=url, eval_dataset=['arc_c'])) |
|
|
|
|
| if __name__ == '__main__': |
| |
| test_eval_mllm() |
| |
| |
|
|