| | def check_math_result_and_give_tips(inputs): |
| | from .orm import MathAccuracy |
| | acc = MathAccuracy() |
| | |
| | prompt = 'But wait... It seems I made a mistake,' |
| | contents = [input['messages'][-1]['content'] for input in inputs] |
| | rewards = acc(contents, [input['solution'] for input in inputs]) |
| | for reward, input in zip(rewards, inputs): |
| | content = input['messages'][-1]['content'] |
| | if reward < 1 and prompt not in content: |
| | if '<answer>' in content: |
| | content = content[:content.index('<answer>')] |
| | if '</think>' in content: |
| | content = content[:content.index('</think>')] |
| | content += prompt |
| | input['messages'][-1]['content'] = content |
| | input['finished'] = False |
| | else: |
| | input['finished'] = True |
| | return inputs |
| |
|
| |
|
| | def check_math_result_and_give_tips_multi_turn(inputs): |
| | from .orm import MathAccuracy |
| | acc = MathAccuracy() |
| | prompt = 'The answer is not correct, It seems You made a mistake, you need to recheck very carefully.' |
| | contents = [input['messages'][-1]['content'] for input in inputs] |
| | rewards = acc(contents, [input['solution'] for input in inputs]) |
| | for reward, input in zip(rewards, inputs): |
| | content = input['messages'][-2]['content'] |
| | if reward < 1 and prompt not in content: |
| | input['messages'].append({'role': 'user', 'content': prompt}) |
| | input['finished'] = False |
| | else: |
| | input['finished'] = True |
| | return inputs |
| |
|
| |
|
| | multi_turns = { |
| | 'math_tip_trick': check_math_result_and_give_tips, |
| | 'math_tip_trick_multi_turn': check_math_result_and_give_tips_multi_turn, |
| | } |
| |
|