| import re |
| from services.model_visitor import ModelVisitor |
|
|
|
|
| class IbmExtractCodeblock(ModelVisitor): |
|
|
| def visit(self, _, data): |
| return self._get_code_block(data) |
|
|
| def _get_code_block(self, data): |
| r""" |
| Extracts text blocks from the input string based on a specific pattern. |
| Args: |
| data (str): The input string containing text blocks. |
| Returns: |
| str: A text block of output which contains code extracted from the input string. |
| Regex Pattern: |
| (?:### Output: ([\s\S]*?))(?:\<\|endoftext\|\>|\Z)|```(?:\w+)?\n(.*?)\n``` |
| - (?:### Output: ([\s\S]*?)): This part matches patterns that start with '### Output:' |
| followed by any characters including newlines, capturing them within a group. |
| - (?:\<\|endoftext\|\>|\Z): This part matches either the string <|endoftext|> |
| or the end of the string (\Z). |
| - |: This is an OR operator, meaning the regex will match either the pattern |
| before or after it. |
| - ```(?:\w+)?\n(.*?)\n```: This part matches patterns enclosed within backticks (```), |
| possibly preceded by one or more word characters (\w+), capturing any characters |
| including newlines. |
| """ |
| pattern = r'(?:### Output: ([\s\S]*?))(?:\<\|endoftext\|\>|\Z)|```(?:\w+)?\n(.*?)\n```' |
| matches = re.findall(pattern, data, re.DOTALL) |
| code = [] |
| for match in matches: |
| if match[0]: |
| code.append(match[0].strip()) |
| elif match[1]: |
| code.append(match[1].strip()) |
| return ''.join(code) |
|
|