Buckets:
MisterAI/LocalAI_Demo_backends / cpu-diffusers.upgrade-tmp /venv /lib /python3.10 /site-packages /lark /reconstruct.py
| """This is an experimental tool for reconstructing text from a shaped tree, based on a Lark grammar. | |
| """ | |
| from typing import Dict, Callable, Iterable, Optional | |
| from .lark import Lark | |
| from .tree import Tree, ParseTree | |
| from .visitors import Transformer_InPlace | |
| from .lexer import Token, PatternStr, TerminalDef | |
| from .grammar import Terminal, NonTerminal, Symbol | |
| from .tree_matcher import TreeMatcher, is_discarded_terminal | |
| from .utils import is_id_continue | |
| def is_iter_empty(i): | |
| try: | |
| _ = next(i) | |
| return False | |
| except StopIteration: | |
| return True | |
| class WriteTokensTransformer(Transformer_InPlace): | |
| "Inserts discarded tokens into their correct place, according to the rules of grammar" | |
| tokens: Dict[str, TerminalDef] | |
| term_subs: Dict[str, Callable[[Symbol], str]] | |
| def __init__(self, tokens: Dict[str, TerminalDef], term_subs: Dict[str, Callable[[Symbol], str]]) -> None: | |
| self.tokens = tokens | |
| self.term_subs = term_subs | |
| def __default__(self, data, children, meta): | |
| if not getattr(meta, 'match_tree', False): | |
| return Tree(data, children) | |
| iter_args = iter(children) | |
| to_write = [] | |
| for sym in meta.orig_expansion: | |
| if is_discarded_terminal(sym): | |
| try: | |
| v = self.term_subs[sym.name](sym) | |
| except KeyError: | |
| t = self.tokens[sym.name] | |
| if not isinstance(t.pattern, PatternStr): | |
| raise NotImplementedError("Reconstructing regexps not supported yet: %s" % t) | |
| v = t.pattern.value | |
| to_write.append(v) | |
| else: | |
| x = next(iter_args) | |
| if isinstance(x, list): | |
| to_write += x | |
| else: | |
| if isinstance(x, Token): | |
| assert Terminal(x.type) == sym, x | |
| else: | |
| assert NonTerminal(x.data) == sym, (sym, x) | |
| to_write.append(x) | |
| assert is_iter_empty(iter_args) | |
| return to_write | |
| class Reconstructor(TreeMatcher): | |
| """ | |
| A Reconstructor that will, given a full parse Tree, generate source code. | |
| Note: | |
| The reconstructor cannot generate values from regexps. If you need to produce discarded | |
| regexes, such as newlines, use `term_subs` and provide default values for them. | |
| Parameters: | |
| parser: a Lark instance | |
| term_subs: a dictionary of [Terminal name as str] to [output text as str] | |
| """ | |
| write_tokens: WriteTokensTransformer | |
| def __init__(self, parser: Lark, term_subs: Optional[Dict[str, Callable[[Symbol], str]]]=None) -> None: | |
| TreeMatcher.__init__(self, parser) | |
| self.write_tokens = WriteTokensTransformer({t.name:t for t in self.tokens}, term_subs or {}) | |
| def _reconstruct(self, tree): | |
| unreduced_tree = self.match_tree(tree, tree.data) | |
| res = self.write_tokens.transform(unreduced_tree) | |
| for item in res: | |
| if isinstance(item, Tree): | |
| # TODO use orig_expansion.rulename to support templates | |
| yield from self._reconstruct(item) | |
| else: | |
| yield item | |
| def reconstruct(self, tree: ParseTree, postproc: Optional[Callable[[Iterable[str]], Iterable[str]]]=None, insert_spaces: bool=True) -> str: | |
| x = self._reconstruct(tree) | |
| if postproc: | |
| x = postproc(x) | |
| y = [] | |
| prev_item = '' | |
| for item in x: | |
| if insert_spaces and prev_item and item and is_id_continue(prev_item[-1]) and is_id_continue(item[0]): | |
| y.append(' ') | |
| y.append(item) | |
| prev_item = item | |
| return ''.join(y) | |
Xet Storage Details
- Size:
- 3.76 kB
- Xet hash:
- a4fd41b21d78d5e8a6bca982d1caa68660d7a499b9c305aadf94970b07fa6c95
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.