TiME
Collection
The TiME collection gathers monolingual BERT-style encoders for 16 languages (xs, s, m). Each model outputs embeddings distilled from XLM-R large.
•
49 items
•
Updated
Monolingual BERT-style encoder that outputs embeddings for Irish. Distilled from FacebookAI/xlm-roberta-large.
from transformers import AutoTokenizer, AutoModel
import torch
repo = "dschulmeist/TiME-ga-s"
tok = AutoTokenizer.from_pretrained(repo)
mdl = AutoModel.from_pretrained(repo)
def mean_pool(last_hidden_state, attention_mask):
mask = attention_mask.unsqueeze(-1).type_as(last_hidden_state)
return (last_hidden_state * mask).sum(1) / mask.sum(1).clamp(min=1e-9)
inputs = tok(["example sentence"], padding=True, truncation=True, return_tensors="pt")
outputs = mdl(**inputs)
emb = mean_pool(outputs.last_hidden_state, inputs['attention_mask'])
print(emb.shape)