Instructions to use Sefaria/en_subref_ner with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- spaCy
How to use Sefaria/en_subref_ner with spaCy:
!pip install https://huggingface.co/Sefaria/en_subref_ner/resolve/main/en_subref_ner-any-py3-none-any.whl # Using spacy.load(). import spacy nlp = spacy.load("en_subref_ner") # Importing as module. import en_subref_ner nlp = en_subref_ner.load() - Notebooks
- Google Colab
- Kaggle
| import spacy, re | |
| from spacy.tokenizer import Tokenizer | |
| """ | |
| python -m spacy package ref_he packages -c /Users/nss/sefaria/project/sefaria/spacy_function_registry.py -b wheel,sdist -n ref_ner -v 1.0.0 | |
| python -m spacy huggingface-hub push packages/he_ref_ner-1.0.0/dist/he_ref_ner-1.0.0-py3-none-any.whl -o Sefaria | |
| """ | |
| def inner_punct_tokenizer_factory(): | |
| def inner_punct_tokenizer(nlp): | |
| # infix_re = spacy.util.compile_infix_regex(nlp.Defaults.infixes) | |
| infix_re = re.compile(r'''[.,?!:;β¦ββ`ββ"'~β\-/()<>]''') | |
| prefix_re = spacy.util.compile_prefix_regex(nlp.Defaults.prefixes) | |
| suffix_re = spacy.util.compile_suffix_regex(nlp.Defaults.suffixes) | |
| return Tokenizer(nlp.vocab, prefix_search=prefix_re.search, | |
| suffix_search=suffix_re.search, | |
| infix_finditer=infix_re.finditer, | |
| token_match=None) | |
| return inner_punct_tokenizer |