Spaces:

alex6095
/

SanctiMolyOH_Cpu

Runtime error

App Files Files Community

alex6095 commited on Dec 13, 2021

Commit

355910d

1 Parent(s): f8d7d27

Create app.py

Browse files

Files changed (1) hide show

app.py +113 -0

app.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import torch
+import torch.nn as nn
+import re
+import streamlit as st
+from transformers import DistilBertModel
+from tokenization_kobert import KoBertTokenizer
+class SanctiMoly(nn.Module):
+    """ Holy Moly News BERT """
+    def __init__(self, freeze_bert = True):
+        super(SanctiMoly, self).__init__()
+        self.encoder = bert_model
+        # FC-BN-Tanh
+        self.linear = nn.Sequential(nn.Linear(768, 1024),
+                                    nn.BatchNorm1d(1024),
+                                    nn.Tanh(),
+                                    nn.Dropout(),
+                                    nn.Linear(1024, 768),
+                                    nn.BatchNorm1d(768),
+                                    nn.Tanh(),
+                                    nn.Dropout(),
+                                    nn.Linear(768, 120)
+                                    )
+        # self.softmax = nn.LogSoftmax(dim=-1)
+        if freeze_bert == True:
+            for param in self.encoder.parameters():
+                param.requires_grad = False
+        else:
+            for param in self.encoder.parameters():
+                param.requires_grad = True
+    def forward(self, input_ids, input_length):
+        # calculate attention mask
+        attn_mask = torch.arange(input_ids.size(1)).to(device)
+        attn_mask = attn_mask[None, :] < input_length[:, None]
+        enc_o = self.encoder(input_ids, attn_mask)
+        output = self.linear(enc_o.last_hidden_state[:, 0, :])
+        # print(output.shape)
+        return output
+@st.cache(allow_output_mutation=True)
+def get_model():
+    bert_model = DistilBertModel.from_pretrained('monologg/distilkobert')
+    tokenizer = KoBertTokenizer.from_pretrained('monologg/distilkobert')
+    model = SanctiMoly(freeze_bert=False)
+    checkpoint = torch.load("./model.pt", map_location=device)
+    model.load_state_dict(checkpoint['model_state_dict'])
+    return model, tokenizer
+model, tokenizer = get_model()
+class RegexSubstitution(object):
+    """Regex substitution class for transform"""
+    def __init__(self, regex, sub=''):
+        if isinstance(regex, re.Pattern):
+            self.regex = regex
+        else:
+            self.regex = re.compile(regex)
+        self.sub = sub
+    def __call__(self, target):
+        if isinstance(target, list):
+            return [self.regex.sub(self.sub, self.regex.sub(self.sub, string)) for string in target]
+        else:
+            return self.regex.sub(self.sub, self.regex.sub(self.sub, target))
+def i2ym(fl):
+    return (str(fl // 12 + 2009), str(fl % 12 + 1))
+default_text = '''질병관리청은 23일 지방자치단체가 보건당국과 협의 없이 단독으로 인플루엔자(독감) 백신 접종 중단을 결정해서는 안 된다는 입장을 밝혔다.
+    질병청은 이날 참고자료를 배포하고 “향후 전체 국가 예방접종사업이 차질 없이 진행되도록 지자체가 자체적으로 접종 유보 여부를 결정하지 않도록 안내를 했다”고 설명했다.
+    독감백신을 접종한 후 고령층을 중심으로 전국에서 사망자가 잇따르자 서울 영등포구보건소는 전날, 경북 포항시는 이날 관내 의료기관에 접종을 보류해달라는 공문을 내려보냈다. 이는 예방접종과 사망 간 직접적 연관성이 낮아 접종을 중단할 상황은 아니라는 질병청의 판단과는 다른 것이다.
+    질병청은 지난 21일 전문가 등이 참여한 ‘예방접종 피해조사반’의 분석 결과를 바탕으로 독감 예방접종 사업을 일정대로 진행하기로 했다. 특히 고령 어르신과 어린이, 임신부 등 독감 고위험군은 백신을 접종하지 않았을 때 합병증 피해가 클 수 있다면서 접종을 독려했다. 하지만 접종사업 유지 발표 이후에도 사망 보고가 잇따르자 질병청은 이날 ‘예방접종 피해조사반 회의’와 ‘예방접종 전문위원회’를 개최해 독감백신과 사망 간 관련성, 접종사업 유지 여부 등에 대해 다시 결론 내리기로 했다. 회의 결과는 이날 오후 7시 넘어 발표될 예정이다.
+'''
+st.title("Date prediction")
+text = st.text_area("Input news :", value=default_text)
+st.markdown("## Original News Data")
+st.write(text)
+st.markdown("## Predict Date")
+col1, col2 = st.columns(2)
+if text:
+    with st.spinner('processing..'):
+        text = RegexSubstitution(r'\([^()]+\)|[<>\'"△▲□■]')(text)
+        encoded_dict = tokenizer(
+            text=[text],
+            add_special_tokens=True,
+            max_length=512,
+            truncation=True,
+            return_tensors='pt',
+            return_length=True
+        )
+        input_ids = encoded_dict['input_ids']
+        input_ids_len = encoded_dict['length']
+        pred = model(input_ids, input_ids_len)
+    _, indices = torch.topk(pred, 3)
+    pred_print = []
+    for i in indices.squeeze(0):
+        year, month = i2ym(i.item()))
+        pred_print.append(year+"-"+month)
+    st.write(", ".join(pred_print))