| | --- |
| | license: apache-2.0 |
| | --- |
| | from transformers import pipeline |
| | tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-finetuned-conll03-english") |
| | model = AutoModelForTokenClassification.from_pretrained("xlm-roberta-large-finetuned-conll03-english") |
| | classifier = pipeline("ner", model=model, tokenizer=tokenizer) |
| | classifier("Alya told Jasmine that Andrew could pay with cash..") |
| | [{'end': 2, |
| | 'entity': 'I-PER', |
| | 'index': 1, |
| | 'score': 0.9997861, |
| | 'start': 0, |
| | 'word': '▁Al'}, |
| | {'end': 4, |
| | 'entity': 'I-PER', |
| | 'index': 2, |
| | 'score': 0.9998591, |
| | 'start': 2, |
| | 'word': 'ya'}, |
| | {'end': 16, |
| | 'entity': 'I-PER', |
| | 'index': 4, |
| | 'score': 0.99995816, |
| | 'start': 10, |
| | 'word': '▁Jasmin'}, |
| | {'end': 17, |
| | 'entity': 'I-PER', |
| | 'index': 5, |
| | 'score': 0.9999584, |
| | 'start': 16, |
| | 'word': 'e'}, |
| | {'end': 29, |
| | 'entity': 'I-PER', |
| | 'index': 7, |
| | 'score': 0.99998057, |
| | 'start': 23, |
| | 'word': '▁Andrew'}] |
| |
|
| | Recommendations |
| | Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. |
| |
|
| | Training |
| | See the following resources for training data and training procedure details: |
| |
|
| | XLM-RoBERTa-large model card |
| | CoNLL-2003 data card |
| | Associated paper |
| | Evaluation |
| | See the associated paper for evaluation details. |
| |
|
| | Environmental Impact |
| | Carbon emissions can be estimated using the Machine Learning Impact calculator presented in Lacoste et al. (2019). |
| |
|
| | Hardware Type: 500 32GB Nvidia V100 GPUs (from the associated paper) |
| | Hours used: More information needed |
| | Cloud Provider: More information needed |
| | Compute Region: More information needed |
| | Carbon Emitted: More information needed |
| | Technical Specifications |
| | See the associated paper for further details. |
| |
|
| | Citation |
| | BibTeX: |
| |
|
| | @article{conneau2019unsupervised, |
| | title={Unsupervised Cross-lingual Representation Learning at Scale}, |
| | author={Conneau, Alexis and Khandelwal, Kartikay and Goyal, Naman and Chaudhary, Vishrav and Wenzek, Guillaume and Guzm{\'a}n, Francisco and Grave, Edouard and Ott, Myle and Zettlemoyer, Luke and Stoyanov, Veselin}, |
| | journal={arXiv preprint arXiv:1911.02116}, |
| | year={2019} |
| | } |
| |
|
| | APA: |
| |
|
| | Conneau, A., Khandelwal, K., Goyal, N., Chaudhary, V., Wenzek, G., Guzmán, F., ... & Stoyanov, V. (2019). Unsupervised cross-lingual representation learning at scale. arXiv preprint arXiv:1911.02116. |
| | Model Card Authors |
| | This model card was written by the team at Hugging Face. |
| |
|
| | How to Get Started with the Model |
| | Use the code below to get started with the model. You can use this model directly within a pipeline for NER. |
| |
|
| | Click to expand |
| | from transformers import AutoTokenizer, AutoModelForTokenClassification |
| | from transformers import pipeline |
| | tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-finetuned-conll03-english") |
| | model = AutoModelForTokenClassification.from_pretrained("xlm-roberta-large-finetuned-conll03-english") |
| | classifier = pipeline("ner", model=model, tokenizer=tokenizer) |
| | classifier("Hello I'm Omar and I live in Zürich.") |
| |
|
| | [{'end': 14, |
| | 'entity': 'I-PER', |
| | 'index': 5, |
| | 'score': 0.9999175, |
| | 'start': 10, |
| | 'word': '▁Omar'}, |
| | {'end': 35, |
| | 'entity': 'I-LOC', |
| | 'index': 10, |
| | 'score': 0.9999906, |
| | 'start': 29, |
| | 'word': '▁Zürich'}] |
| | from transformers import pipeline |
| | tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-finetuned-conll03-english") |
| | model = AutoModelForTokenClassification.from_pretrained("xlm-roberta-large-finetuned-conll03-english") |
| | classifier = pipeline("ner", model=model, tokenizer=tokenizer) |
| | classifier("Alya told Jasmine that Andrew could pay with cash..") |
| | [{'end': 2, |
| | 'entity': 'I-PER', |
| | 'index': 1, |
| | 'score': 0.9997861, |
| | 'start': 0, |
| | 'word': '▁Al'}, |
| | {'end': 4, |
| | 'entity': 'I-PER', |
| | 'index': 2, |
| | 'score': 0.9998591, |
| | 'start': 2, |
| | 'word': 'ya'}, |
| | {'end': 16, |
| | 'entity': 'I-PER', |
| | 'index': 4, |
| | 'score': 0.99995816, |
| | 'start': 10, |
| | 'word': '▁Jasmin'}, |
| | {'end': 17, |
| | 'entity': 'I-PER', |
| | 'index': 5, |
| | 'score': 0.9999584, |
| | 'start': 16, |
| | 'word': 'e'}, |
| | {'end': 29, |
| | 'entity': 'I-PER', |
| | 'index': 7, |
| | 'score': 0.99998057, |
| | 'start': 23, |
| | 'word': '▁Andrew'}] |
| |
|
| | Recommendations |
| | Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. |
| |
|
| | Training |
| | See the following resources for training data and training procedure details: |
| |
|
| | XLM-RoBERTa-large model card |
| | CoNLL-2003 data card |
| | Associated paper |
| | Evaluation |
| | See the associated paper for evaluation details. |
| |
|
| | Environmental Impact |
| | Carbon emissions can be estimated using the Machine Learning Impact calculator presented in Lacoste et al. (2019). |
| |
|
| | Hardware Type: 500 32GB Nvidia V100 GPUs (from the associated paper) |
| | Hours used: More information needed |
| | Cloud Provider: More information needed |
| | Compute Region: More information needed |
| | Carbon Emitted: More information needed |
| | Technical Specifications |
| | See the associated paper for further details. |
| |
|
| | Citation |
| | BibTeX: |
| |
|
| | @article{conneau2019unsupervised, |
| | title={Unsupervised Cross-lingual Representation Learning at Scale}, |
| | author={Conneau, Alexis and Khandelwal, Kartikay and Goyal, Naman and Chaudhary, Vishrav and Wenzek, Guillaume and Guzm{\'a}n, Francisco and Grave, Edouard and Ott, Myle and Zettlemoyer, Luke and Stoyanov, Veselin}, |
| | journal={arXiv preprint arXiv:1911.02116}, |
| | year={2019} |
| | } |
| |
|
| | APA: |
| |
|
| | Conneau, A., Khandelwal, K., Goyal, N., Chaudhary, V., Wenzek, G., Guzmán, F., ... & Stoyanov, V. (2019). Unsupervised cross-lingual representation learning at scale. arXiv preprint arXiv:1911.02116. |
| | Model Card Authors |
| | This model card was written by the team at Hugging Face. |
| |
|
| | How to Get Started with the Model |
| | Use the code below to get started with the model. You can use this model directly within a pipeline for NER. |
| |
|
| | Click to expand |
| | from transformers import AutoTokenizer, AutoModelForTokenClassification |
| | from transformers import pipeline |
| | tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-finetuned-conll03-english") |
| | model = AutoModelForTokenClassification.from_pretrained("xlm-roberta-large-finetuned-conll03-english") |
| | classifier = pipeline("ner", model=model, tokenizer=tokenizer) |
| | classifier("Hello I'm Omar and I live in Zürich.") |
| |
|
| | [{'end': 14, |
| | 'entity': 'I-PER', |
| | 'index': 5, |
| | 'score': 0.9999175, |
| | 'start': 10, |
| | 'word': '▁Omar'}, |
| | {'end': 35, |
| | 'entity': 'I-LOC', |
| | 'index': 10, |
| | 'score': 0.9999906, |
| | 'start': 29, |
| | 'word': '▁Zürich'} |
| | ]from datasets import load_dataset |
| | |
| | dataset = load_dataset("debatelab/deepa2") |