Spaces:
Runtime error
Runtime error
feat(app.py): update app.py
Browse files- app.py +13 -7
- models/bert/__init__.py +0 -1
- models/bert/configuration_bert.py +0 -51
- models/bert/model_bert.py +0 -41
- models/crf/__init__.py +0 -1
- models/crf/model_crf.py +0 -166
- pipelines/__init__.py +0 -1
- pipelines/ner_pipeline.py +0 -114
- register.py +0 -8
app.py
CHANGED
@@ -1,18 +1,24 @@
|
|
1 |
-
from transformers import
|
2 |
from transformers.pipelines import pipeline
|
3 |
-
from register import register
|
4 |
import gradio as gr
|
5 |
from huggingface_hub import login
|
6 |
import os
|
7 |
-
register()
|
8 |
login(os.environ["HF_Token"])
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
ner_predictor = pipeline(
|
12 |
-
"
|
13 |
model=model,
|
|
|
14 |
tokenizer=tokenizer,
|
15 |
-
device="cpu"
|
|
|
16 |
)
|
17 |
|
18 |
def ner_predictor_gradio(input):
|
|
|
1 |
+
from transformers import AutoTokenizer,AutoModel,BertTokenizer
|
2 |
from transformers.pipelines import pipeline
|
|
|
3 |
import gradio as gr
|
4 |
from huggingface_hub import login
|
5 |
import os
|
|
|
6 |
login(os.environ["HF_Token"])
|
7 |
+
model = AutoModel.from_pretrained(
|
8 |
+
"minskiter/resume-token-classification",
|
9 |
+
use_auth_token=True,
|
10 |
+
trust_remote_code=True
|
11 |
+
)
|
12 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
13 |
+
"hfl/chinese-bert-wwm"
|
14 |
+
)
|
15 |
ner_predictor = pipeline(
|
16 |
+
task="nerpipe",
|
17 |
model=model,
|
18 |
+
config=model.config,
|
19 |
tokenizer=tokenizer,
|
20 |
+
device="cpu",
|
21 |
+
trust_remote_code=True
|
22 |
)
|
23 |
|
24 |
def ner_predictor_gradio(input):
|
models/bert/__init__.py
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
from .model_bert import BertCrfModel,BertCrfConfig
|
|
|
|
models/bert/configuration_bert.py
DELETED
@@ -1,51 +0,0 @@
|
|
1 |
-
from transformers import PretrainedConfig
|
2 |
-
|
3 |
-
class BertCrfConfig(PretrainedConfig):
|
4 |
-
|
5 |
-
model_type="bert_crf"
|
6 |
-
|
7 |
-
def __init__(
|
8 |
-
self,
|
9 |
-
vocab_size=30522,
|
10 |
-
hidden_size=768,
|
11 |
-
num_hidden_layers=12,
|
12 |
-
num_attention_heads=12,
|
13 |
-
intermediate_size=3072,
|
14 |
-
hidden_act="gelu",
|
15 |
-
hidden_dropout_prob=0.1,
|
16 |
-
attention_probs_dropout_prob=0.1,
|
17 |
-
max_position_embeddings=512,
|
18 |
-
type_vocab_size=2,
|
19 |
-
initializer_range=0.02,
|
20 |
-
layer_norm_eps=1e-12,
|
21 |
-
pad_token_id=0,
|
22 |
-
position_embedding_type="absolute",
|
23 |
-
use_cache=True,
|
24 |
-
classifier_dropout=None,
|
25 |
-
lstm_hidden_state=300,
|
26 |
-
num_tags=2,
|
27 |
-
tag2id={"O":0,"I":1},
|
28 |
-
id2tag={"0":"O","1":"I"},
|
29 |
-
**kwargs
|
30 |
-
):
|
31 |
-
super().__init__(pad_token_id=pad_token_id,**kwargs)
|
32 |
-
self.vocab_size = vocab_size
|
33 |
-
self.hidden_size = hidden_size
|
34 |
-
self.num_hidden_layers = num_hidden_layers
|
35 |
-
self.num_attention_heads = num_attention_heads
|
36 |
-
self.intermediate_size = intermediate_size
|
37 |
-
self.hidden_act = hidden_act
|
38 |
-
self.hidden_dropout_prob = hidden_dropout_prob
|
39 |
-
self.attention_probs_dropout_prob = attention_probs_dropout_prob
|
40 |
-
self.max_position_embeddings = max_position_embeddings
|
41 |
-
self.type_vocab_size = type_vocab_size
|
42 |
-
self.initializer_range = initializer_range
|
43 |
-
self.layer_norm_eps = layer_norm_eps
|
44 |
-
self.position_embedding_type = position_embedding_type
|
45 |
-
self.use_cache = use_cache
|
46 |
-
self.classifier_dropout = classifier_dropout
|
47 |
-
self.lstm_hidden_state = lstm_hidden_state
|
48 |
-
self.num_tags = num_tags
|
49 |
-
self.tag2id = tag2id
|
50 |
-
self.id2tag = id2tag
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/bert/model_bert.py
DELETED
@@ -1,41 +0,0 @@
|
|
1 |
-
from transformers import PreTrainedModel,BertModel
|
2 |
-
from torch import nn
|
3 |
-
from transformers.configuration_utils import PretrainedConfig
|
4 |
-
from ..crf import CRF
|
5 |
-
from .configuration_bert import BertCrfConfig
|
6 |
-
|
7 |
-
class BertCrfModel(PreTrainedModel):
|
8 |
-
"""BERT LSTM CRF Classify
|
9 |
-
|
10 |
-
Args:
|
11 |
-
PreTrainedModel (BertConfig): config
|
12 |
-
|
13 |
-
Returns:
|
14 |
-
loss: (torch.Tensor) batch loss
|
15 |
-
(best_path, labels): crf best path with true labels
|
16 |
-
"""
|
17 |
-
config_class = BertCrfConfig
|
18 |
-
|
19 |
-
def __init__(self, config, num_tags = None):
|
20 |
-
super().__init__(config)
|
21 |
-
if num_tags is not None:
|
22 |
-
config.num_tags = num_tags
|
23 |
-
self.bert = BertModel(config=config, add_pooling_layer=False)
|
24 |
-
self.lstm = nn.LSTM(config.hidden_size, config.lstm_hidden_state, 1, batch_first=True, bidirectional=True)
|
25 |
-
self.crf = CRF(config.num_tags)
|
26 |
-
self.fc = nn.Linear(config.lstm_hidden_state*2, config.num_tags)
|
27 |
-
|
28 |
-
def forward(self, input_ids, attention_mask, token_type_ids, input_mask, labels=None):
|
29 |
-
outputs = self.bert(
|
30 |
-
input_ids = input_ids,
|
31 |
-
attention_mask = attention_mask,
|
32 |
-
token_type_ids = token_type_ids
|
33 |
-
)
|
34 |
-
hidden_states = outputs[0]
|
35 |
-
lstm_hidden_states = self.lstm(hidden_states)[0]
|
36 |
-
emission_scores = self.fc(lstm_hidden_states)
|
37 |
-
loss = None
|
38 |
-
if labels is not None:
|
39 |
-
loss = self.crf.loss(emission_scores, labels, input_mask==0)
|
40 |
-
_,best_path = self.crf(emission_scores, input_mask==0)
|
41 |
-
return loss,(list(i[1:-1] for i in best_path), labels.cpu() if labels is not None else None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models/crf/__init__.py
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
from .model_crf import CRF
|
|
|
|
models/crf/model_crf.py
DELETED
@@ -1,166 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import torch.nn as nn
|
3 |
-
|
4 |
-
|
5 |
-
def log_sum_exp(x):
|
6 |
-
"""calculate log(sum(exp(x))) = max(x) + log(sum(exp(x - max(x))))
|
7 |
-
"""
|
8 |
-
max_score = x.max(-1)[0]
|
9 |
-
return max_score + (x - max_score.unsqueeze(-1)).exp().sum(-1).log()
|
10 |
-
|
11 |
-
|
12 |
-
IMPOSSIBLE = -1e4
|
13 |
-
|
14 |
-
|
15 |
-
class CRF(nn.Module):
|
16 |
-
"""General CRF module.
|
17 |
-
The CRF module contain a inner Linear Layer which transform the input from features space to tag space.
|
18 |
-
:param in_features: number of features for the input
|
19 |
-
:param num_tag: number of tags. DO NOT include START, STOP tags, they are included internal.
|
20 |
-
"""
|
21 |
-
|
22 |
-
def __init__(self, num_tags):
|
23 |
-
super(CRF, self).__init__()
|
24 |
-
|
25 |
-
self.num_tags = num_tags + 2
|
26 |
-
self.start_idx = self.num_tags - 2
|
27 |
-
self.stop_idx = self.num_tags - 1
|
28 |
-
|
29 |
-
# transition factor, Tij mean transition from j to i
|
30 |
-
self.transitions = nn.Parameter(torch.randn(self.num_tags, self.num_tags), requires_grad=True)
|
31 |
-
self.transitions.data[self.start_idx, :] = IMPOSSIBLE
|
32 |
-
self.transitions.data[:, self.stop_idx] = IMPOSSIBLE
|
33 |
-
|
34 |
-
def __get_emission_score(self, features):
|
35 |
-
# features
|
36 |
-
b,seq,_ = features.size()
|
37 |
-
start_score = torch.full((b,seq,1),IMPOSSIBLE).to(features.device)
|
38 |
-
end_score = torch.full((b,seq,1),IMPOSSIBLE).to(features.device)
|
39 |
-
return torch.cat([features,start_score,end_score],dim=-1)
|
40 |
-
|
41 |
-
def forward(self, features, masks):
|
42 |
-
"""decode tags
|
43 |
-
:param features: [B, L, C], batch of unary scores
|
44 |
-
:param masks: [B, L] masks
|
45 |
-
:return: (best_score, best_paths)
|
46 |
-
best_score: [B]
|
47 |
-
best_paths: [B, L]
|
48 |
-
"""
|
49 |
-
features = self.__get_emission_score(features) # [B,L,C] => [B,L,T]
|
50 |
-
return self.__viterbi_decode(features, masks[:, :features.size(1)].float())
|
51 |
-
|
52 |
-
def loss(self, features, ys, masks):
|
53 |
-
"""negative log likelihood loss
|
54 |
-
B: batch size, L: sequence length, D: dimension
|
55 |
-
:param features: [B, L, D]
|
56 |
-
:param ys: tags, [B, L]
|
57 |
-
:param masks: masks for padding, [B, L]
|
58 |
-
:return: loss
|
59 |
-
"""
|
60 |
-
features = self.__get_emission_score(features) # [B,L,C] => [B,L,T]
|
61 |
-
|
62 |
-
L = features.size(1)
|
63 |
-
masks_ = masks[:, :L].float()
|
64 |
-
forward_score = self.__forward_algorithm(features, masks_)
|
65 |
-
ys = ys.clone().detach()
|
66 |
-
ys[ys<0] = 0
|
67 |
-
gold_score = self.__score_sentence(features, ys[:, :L].long(), masks_)
|
68 |
-
loss = (forward_score - gold_score).mean()
|
69 |
-
return loss
|
70 |
-
|
71 |
-
def __score_sentence(self, features, tags, masks):
|
72 |
-
"""Gives the score of a provided tag sequence
|
73 |
-
:param features: [B, L, C]
|
74 |
-
:param tags: [B, L]
|
75 |
-
:param masks: [B, L]
|
76 |
-
:return: [B] score in the log space
|
77 |
-
"""
|
78 |
-
B, L, C = features.shape
|
79 |
-
|
80 |
-
# emission score
|
81 |
-
emit_scores = features.gather(dim=2, index=tags.unsqueeze(-1)).squeeze(-1)
|
82 |
-
|
83 |
-
# transition score
|
84 |
-
start_tag = torch.full((B, 1), self.start_idx, dtype=torch.long, device=tags.device)
|
85 |
-
tags = torch.cat([start_tag, tags], dim=1) # [B, L+1]
|
86 |
-
trans_scores = self.transitions[tags[:, 1:], tags[:, :-1]]
|
87 |
-
|
88 |
-
# last transition score to STOP tag
|
89 |
-
last_tag = tags.gather(dim=1, index=masks.sum(1).long().unsqueeze(1)).squeeze(1) # [B]
|
90 |
-
last_score = self.transitions[self.stop_idx, last_tag]
|
91 |
-
|
92 |
-
score = ((trans_scores + emit_scores) * masks).sum(1) + last_score
|
93 |
-
return score
|
94 |
-
|
95 |
-
def __viterbi_decode(self, features, masks):
|
96 |
-
"""decode to tags using viterbi algorithm
|
97 |
-
:param features: [B, L, C], batch of unary scores
|
98 |
-
:param masks: [B, L] masks
|
99 |
-
:return: (best_score, best_paths)
|
100 |
-
best_score: [B]
|
101 |
-
best_paths: [B, L]
|
102 |
-
"""
|
103 |
-
B, L, C = features.shape
|
104 |
-
|
105 |
-
bps = torch.zeros(B, L, C, dtype=torch.long, device=features.device) # back pointers
|
106 |
-
|
107 |
-
# Initialize the viterbi variables in log space
|
108 |
-
|
109 |
-
max_score = torch.full((B, C), IMPOSSIBLE, device=features.device) # [B, C]
|
110 |
-
max_score[:, self.start_idx] = 0
|
111 |
-
|
112 |
-
for t in range(L):
|
113 |
-
mask_t = masks[:, t].unsqueeze(1) # [B, 1]
|
114 |
-
emit_score_t = features[:, t] # [B, C]
|
115 |
-
|
116 |
-
# [B, 1, C] + [C, C]
|
117 |
-
acc_score_t = max_score.unsqueeze(1) + self.transitions # [B, C, C]
|
118 |
-
acc_score_t, bps[:, t, :] = acc_score_t.max(dim=-1)
|
119 |
-
acc_score_t += emit_score_t
|
120 |
-
max_score = acc_score_t * mask_t + max_score * (1 - mask_t) # max_score or acc_score_t
|
121 |
-
|
122 |
-
# Transition to STOP_TAG
|
123 |
-
max_score += self.transitions[self.stop_idx]
|
124 |
-
best_score, best_tag = max_score.max(dim=-1)
|
125 |
-
|
126 |
-
# Follow the back pointers to decode the best path.
|
127 |
-
best_paths = []
|
128 |
-
bps = bps.cpu().numpy()
|
129 |
-
for b in range(B):
|
130 |
-
best_tag_b = best_tag[b].item()
|
131 |
-
seq_len = int(masks[b, :].sum().item())
|
132 |
-
|
133 |
-
best_path = [best_tag_b]
|
134 |
-
for bps_t in reversed(bps[b, :seq_len]):
|
135 |
-
best_tag_b = bps_t[best_tag_b]
|
136 |
-
best_path.append(best_tag_b)
|
137 |
-
# drop the last tag and reverse the left
|
138 |
-
best_paths.append(best_path[-2::-1])
|
139 |
-
|
140 |
-
return best_score, best_paths
|
141 |
-
|
142 |
-
def __forward_algorithm(self, features, masks):
|
143 |
-
"""calculate the partition function with forward algorithm.
|
144 |
-
TRICK: log_sum_exp([x1, x2, x3, x4, ...]) = log_sum_exp([log_sum_exp([x1, x2]), log_sum_exp([x3, x4]), ...])
|
145 |
-
:param features: features. [B, L, C]
|
146 |
-
:param masks: [B, L] masks
|
147 |
-
:return: [B], score in the log space
|
148 |
-
"""
|
149 |
-
B, L, C = features.shape
|
150 |
-
|
151 |
-
scores = torch.full((B, C), IMPOSSIBLE, device=features.device) # [B, C]
|
152 |
-
scores[:, self.start_idx] = 0.
|
153 |
-
trans = self.transitions.unsqueeze(0) # [1, C, C]
|
154 |
-
|
155 |
-
# Iterate through the sentence
|
156 |
-
for t in range(L):
|
157 |
-
emit_score_t = features[:, t].unsqueeze(2) # [B, C, 1]
|
158 |
-
score_t = scores.unsqueeze(1) + trans + emit_score_t # [B, 1, C] + [1, C, C] + [B, C, 1] => [B, C, C]
|
159 |
-
score_t = log_sum_exp(score_t) # [B, C]
|
160 |
-
|
161 |
-
mask_t = masks[:, t].unsqueeze(1) # [B, 1]
|
162 |
-
scores = score_t * mask_t + scores * (1 - mask_t)
|
163 |
-
scores = log_sum_exp(scores + self.transitions[self.stop_idx])
|
164 |
-
return scores
|
165 |
-
|
166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pipelines/__init__.py
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
from .ner_pipeline import NERPredictorPipe
|
|
|
|
pipelines/ner_pipeline.py
DELETED
@@ -1,114 +0,0 @@
|
|
1 |
-
from transformers import Pipeline
|
2 |
-
from typing import Dict, Any, Union
|
3 |
-
from transformers.pipelines.base import GenericTensor
|
4 |
-
from transformers.modeling_outputs import ModelOutput
|
5 |
-
import torch
|
6 |
-
|
7 |
-
class NERPredictorPipe(Pipeline):
|
8 |
-
|
9 |
-
def _sanitize_parameters(self, **kwargs):
|
10 |
-
return {},{},{}
|
11 |
-
|
12 |
-
def __token_preprocess(self, input, tokenizer, max_length=512):
|
13 |
-
tokenized = tokenizer(input,
|
14 |
-
padding="max_length",
|
15 |
-
max_length=max_length,
|
16 |
-
truncation=True,
|
17 |
-
return_tensors="pt"
|
18 |
-
)
|
19 |
-
return tokenized
|
20 |
-
|
21 |
-
def preprocess(self, sentence: Union[str,list], max_length=512) -> Dict[str, GenericTensor]:
|
22 |
-
input_tensors = self.__token_preprocess(
|
23 |
-
sentence,
|
24 |
-
self.tokenizer,
|
25 |
-
max_length=max_length
|
26 |
-
)
|
27 |
-
input_tensors["input_mask"] = (~(input_tensors["input_ids"]>0)).long()
|
28 |
-
for key in input_tensors:
|
29 |
-
if input_tensors[key] is not None:
|
30 |
-
input_tensors[key] = input_tensors[key].to(self.device)
|
31 |
-
return input_tensors
|
32 |
-
|
33 |
-
def _forward(self, input_tensors: Dict[str, GenericTensor]) -> ModelOutput:
|
34 |
-
self.model.eval()
|
35 |
-
with torch.no_grad():
|
36 |
-
_,(best_path,_) = self.model(**input_tensors)
|
37 |
-
return (input_tensors["input_ids"].tolist(),best_path)
|
38 |
-
|
39 |
-
def __format_output(self, start, end, text, label):
|
40 |
-
return {
|
41 |
-
"text": text,
|
42 |
-
"start": start,
|
43 |
-
"end": end,
|
44 |
-
"label": label
|
45 |
-
}
|
46 |
-
|
47 |
-
def postprocess(self, model_outputs: ModelOutput) -> Any:
|
48 |
-
batch_slices = []
|
49 |
-
input_ids_list = model_outputs[0]
|
50 |
-
label_ids_list = model_outputs[1]
|
51 |
-
for input_ids,label_ids in zip(input_ids_list,label_ids_list):
|
52 |
-
slices = []
|
53 |
-
labels = list(self.model.config.id2tag[str(id)] for id in label_ids)
|
54 |
-
# get slice
|
55 |
-
past = "O"
|
56 |
-
start = -1
|
57 |
-
end = -1
|
58 |
-
for i,label in enumerate(labels):
|
59 |
-
if label.startswith("B-"):
|
60 |
-
if start!=-1 and end!=-1:
|
61 |
-
slices.append(
|
62 |
-
self.__format_output(
|
63 |
-
start, end,
|
64 |
-
''.join(self.tokenizer.convert_ids_to_tokens(
|
65 |
-
input_ids[start+1:end+2])), past
|
66 |
-
)
|
67 |
-
)
|
68 |
-
start = i
|
69 |
-
end = i
|
70 |
-
past = "-".join(label.split("-")[1:])
|
71 |
-
elif label.startswith("I-") or label.startswith("M-") or label.startswith("E-"):
|
72 |
-
cur = "-".join(label.split("-")[1:])
|
73 |
-
if cur!=past:
|
74 |
-
# cut and skip to next entity
|
75 |
-
if start!=-1 and end!=-1:
|
76 |
-
slices.append(
|
77 |
-
self.__format_output(
|
78 |
-
start, end,
|
79 |
-
''.join(self.tokenizer.convert_ids_to_tokens(
|
80 |
-
input_ids[start+1:end+2])), past
|
81 |
-
)
|
82 |
-
)
|
83 |
-
start = i
|
84 |
-
past = cur
|
85 |
-
end = i
|
86 |
-
elif label.startswith("S-"):
|
87 |
-
if start!=-1 and end!=-1:
|
88 |
-
slices.append(
|
89 |
-
self.__format_output(
|
90 |
-
start, end,
|
91 |
-
''.join(self.tokenizer.convert_ids_to_tokens(
|
92 |
-
input_ids[start+1:end+2])), past
|
93 |
-
)
|
94 |
-
)
|
95 |
-
slices.append(
|
96 |
-
self.__format_output(
|
97 |
-
i, i,
|
98 |
-
''.join(self.tokenizer.convert_ids_to_tokens(
|
99 |
-
input_ids[i+1:i+2])), past
|
100 |
-
)
|
101 |
-
)
|
102 |
-
start = -1
|
103 |
-
end = -1
|
104 |
-
past = "O"
|
105 |
-
if start!=-1 and end!=-1:
|
106 |
-
slices.append(
|
107 |
-
self.__format_output(
|
108 |
-
start, end,
|
109 |
-
''.join(self.tokenizer.convert_ids_to_tokens(
|
110 |
-
input_ids[start+1:end+2])), past
|
111 |
-
)
|
112 |
-
)
|
113 |
-
batch_slices.append(slices)
|
114 |
-
return batch_slices
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
register.py
DELETED
@@ -1,8 +0,0 @@
|
|
1 |
-
from transformers.pipelines import PIPELINE_REGISTRY,AutoModel,AutoConfig
|
2 |
-
from models.bert import BertCrfModel,BertCrfConfig
|
3 |
-
from pipelines import NERPredictorPipe
|
4 |
-
|
5 |
-
def register():
|
6 |
-
PIPELINE_REGISTRY.register_pipeline("ner_predictor", pipeline_class=NERPredictorPipe)
|
7 |
-
AutoConfig.register("bert_crf",BertCrfConfig)
|
8 |
-
AutoModel.register(BertCrfConfig,BertCrfModel)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|