File size: 1,430 Bytes
e4ca3a6 17ad6cf e4ca3a6 17ad6cf e4ca3a6 17ad6cf 297b879 17ad6cf 297b879 17ad6cf 297b879 17ad6cf 297b879 17ad6cf e4ca3a6 17ad6cf e4ca3a6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
from transformers import Pipeline, AutoModelForSequenceClassification,AutoTokenizer
import torch
from transformers.pipelines import PIPELINE_REGISTRY
class TBCP(Pipeline):
def __init__(self,**kwargs):
Pipeline.__init__(self,**kwargs)
self.tokenizer = AutoTokenizer.from_pretrained(kwargs["tokenizer"])
def _sanitize_parameters(self, **kwargs):
postprocess_kwargs = {}
if "text_pair" in kwargs:
postprocess_kwargs["top_k"] = kwargs["top_k"]
return {}, {}, postprocess_kwargs
def preprocess(self, text):
return self.tokenizer(text, return_tensors="pt")
def _forward(self, model_inputs):
return self.model(**model_inputs)
def postprocess(self, model_outputs,top_k = None):
logits = model_outputs.logits
probabilities = torch.nn.functional.softmax(logits, dim=-1)
best_class = probabilities.argmax().item()
label = f"Label_{best_class}"
# score = probabilities.squeeze()[best_class].item()
logits = logits.squeeze().tolist()
return {"label": label,
# "score": score,
"logits": logits}
PIPELINE_REGISTRY.register_pipeline(
"TunBERT-classifier",
pipeline_class=TBCP,
pt_model=AutoModelForSequenceClassification,
default={"pt": ("not-lain/TunBERT", "main")},
type="text", # current support type: text, audio, image, multimodal
) |