|
--- |
|
license: apache-2.0 |
|
datasets: debatelab/deepa2 |
|
--- |
|
from transformers import pipeline |
|
tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-finetuned-conll03-english") |
|
model = AutoModelForTokenClassification.from_pretrained("xlm-roberta-large-finetuned-conll03-english") |
|
classifier = pipeline("ner", model=model, tokenizer=tokenizer) |
|
classifier("Alya told Jasmine that Andrew could pay with cash..") |
|
[{'end': 2, |
|
'entity': 'I-PER', |
|
'index': 1, |
|
'score': 0.9997861, |
|
'start': 0, |
|
'word': '▁Al'}, |
|
{'end': 4, |
|
'entity': 'I-PER', |
|
'index': 2, |
|
'score': 0.9998591, |
|
'start': 2, |
|
'word': 'ya'}, |
|
{'end': 16, |
|
'entity': 'I-PER', |
|
'index': 4, |
|
'score': 0.99995816, |
|
'start': 10, |
|
'word': '▁Jasmin'}, |
|
{'end': 17, |
|
'entity': 'I-PER', |
|
'index': 5, |
|
'score': 0.9999584, |
|
'start': 16, |
|
'word': 'e'}, |
|
{'end': 29, |
|
'entity': 'I-PER', |
|
'index': 7, |
|
'score': 0.99998057, |
|
'start': 23, |
|
'word': '▁Andrew'}] |
|
|
|
Recommendations |
|
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. |
|
|
|
Training |
|
See the following resources for training data and training procedure details: |
|
|
|
XLM-RoBERTa-large model card |
|
CoNLL-2003 data card |
|
Associated paper |
|
Evaluation |
|
See the associated paper for evaluation details. |
|
|
|
Environmental Impact |
|
Carbon emissions can be estimated using the Machine Learning Impact calculator presented in Lacoste et al. (2019). |
|
|
|
Hardware Type: 500 32GB Nvidia V100 GPUs (from the associated paper) |
|
Hours used: More information needed |
|
Cloud Provider: More information needed |
|
Compute Region: More information needed |
|
Carbon Emitted: More information needed |
|
Technical Specifications |
|
See the associated paper for further details. |
|
|
|
Citation |
|
BibTeX: |
|
|
|
@article{conneau2019unsupervised, |
|
title={Unsupervised Cross-lingual Representation Learning at Scale}, |
|
author={Conneau, Alexis and Khandelwal, Kartikay and Goyal, Naman and Chaudhary, Vishrav and Wenzek, Guillaume and Guzm{\'a}n, Francisco and Grave, Edouard and Ott, Myle and Zettlemoyer, Luke and Stoyanov, Veselin}, |
|
journal={arXiv preprint arXiv:1911.02116}, |
|
year={2019} |
|
} |
|
|
|
APA: |
|
|
|
Conneau, A., Khandelwal, K., Goyal, N., Chaudhary, V., Wenzek, G., Guzmán, F., ... & Stoyanov, V. (2019). Unsupervised cross-lingual representation learning at scale. arXiv preprint arXiv:1911.02116. |
|
Model Card Authors |
|
This model card was written by the team at Hugging Face. |
|
|
|
How to Get Started with the Model |
|
Use the code below to get started with the model. You can use this model directly within a pipeline for NER. |
|
|
|
Click to expand |
|
from transformers import AutoTokenizer, AutoModelForTokenClassification |
|
from transformers import pipeline |
|
tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-finetuned-conll03-english") |
|
model = AutoModelForTokenClassification.from_pretrained("xlm-roberta-large-finetuned-conll03-english") |
|
classifier = pipeline("ner", model=model, tokenizer=tokenizer) |
|
classifier("Hello I'm Omar and I live in Zürich.") |
|
|
|
[{'end': 14, |
|
'entity': 'I-PER', |
|
'index': 5, |
|
'score': 0.9999175, |
|
'start': 10, |
|
'word': '▁Omar'}, |
|
{'end': 35, |
|
'entity': 'I-LOC', |
|
'index': 10, |
|
'score': 0.9999906, |
|
'start': 29, |
|
'word': '▁Zürich'}] |
|
from transformers import pipeline |
|
tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-finetuned-conll03-english") |
|
model = AutoModelForTokenClassification.from_pretrained("xlm-roberta-large-finetuned-conll03-english") |
|
classifier = pipeline("ner", model=model, tokenizer=tokenizer) |
|
classifier("Alya told Jasmine that Andrew could pay with cash..") |
|
[{'end': 2, |
|
'entity': 'I-PER', |
|
'index': 1, |
|
'score': 0.9997861, |
|
'start': 0, |
|
'word': '▁Al'}, |
|
{'end': 4, |
|
'entity': 'I-PER', |
|
'index': 2, |
|
'score': 0.9998591, |
|
'start': 2, |
|
'word': 'ya'}, |
|
{'end': 16, |
|
'entity': 'I-PER', |
|
'index': 4, |
|
'score': 0.99995816, |
|
'start': 10, |
|
'word': '▁Jasmin'}, |
|
{'end': 17, |
|
'entity': 'I-PER', |
|
'index': 5, |
|
'score': 0.9999584, |
|
'start': 16, |
|
'word': 'e'}, |
|
{'end': 29, |
|
'entity': 'I-PER', |
|
'index': 7, |
|
'score': 0.99998057, |
|
'start': 23, |
|
'word': '▁Andrew'}] |
|
|
|
Recommendations |
|
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. |
|
|
|
Training |
|
See the following resources for training data and training procedure details: |
|
|
|
XLM-RoBERTa-large model card |
|
CoNLL-2003 data card |
|
Associated paper |
|
Evaluation |
|
See the associated paper for evaluation details. |
|
|
|
Environmental Impact |
|
Carbon emissions can be estimated using the Machine Learning Impact calculator presented in Lacoste et al. (2019). |
|
|
|
Hardware Type: 500 32GB Nvidia V100 GPUs (from the associated paper) |
|
Hours used: More information needed |
|
Cloud Provider: More information needed |
|
Compute Region: More information needed |
|
Carbon Emitted: More information needed |
|
Technical Specifications |
|
See the associated paper for further details. |
|
|
|
Citation |
|
BibTeX: |
|
|
|
@article{conneau2019unsupervised, |
|
title={Unsupervised Cross-lingual Representation Learning at Scale}, |
|
author={Conneau, Alexis and Khandelwal, Kartikay and Goyal, Naman and Chaudhary, Vishrav and Wenzek, Guillaume and Guzm{\'a}n, Francisco and Grave, Edouard and Ott, Myle and Zettlemoyer, Luke and Stoyanov, Veselin}, |
|
journal={arXiv preprint arXiv:1911.02116}, |
|
year={2019} |
|
} |
|
|
|
APA: |
|
|
|
Conneau, A., Khandelwal, K., Goyal, N., Chaudhary, V., Wenzek, G., Guzmán, F., ... & Stoyanov, V. (2019). Unsupervised cross-lingual representation learning at scale. arXiv preprint arXiv:1911.02116. |
|
Model Card Authors |
|
This model card was written by the team at Hugging Face. |
|
|
|
How to Get Started with the Model |
|
Use the code below to get started with the model. You can use this model directly within a pipeline for NER. |
|
|
|
Click to expand |
|
from transformers import AutoTokenizer, AutoModelForTokenClassification |
|
from transformers import pipeline |
|
tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-finetuned-conll03-english") |
|
model = AutoModelForTokenClassification.from_pretrained("xlm-roberta-large-finetuned-conll03-english") |
|
classifier = pipeline("ner", model=model, tokenizer=tokenizer) |
|
classifier("Hello I'm Omar and I live in Zürich.") |
|
|
|
[{'end': 14, |
|
'entity': 'I-PER', |
|
'index': 5, |
|
'score': 0.9999175, |
|
'start': 10, |
|
'word': '▁Omar'}, |
|
{'end': 35, |
|
'entity': 'I-LOC', |
|
'index': 10, |
|
'score': 0.9999906, |
|
'start': 29, |
|
'word': '▁Zürich'} |
|
]from datasets import load_dataset |
|
|
|
dataset = load_dataset("debatelab/deepa2") |