cocoirun
/

longforemr-kobart-summary-v1

 - 상담원이 카드 번호와 잔액 확인 후 추가 이용 혜택 안내
 - 고객이 여행 할인, 마일리지, 호텔 할인 등 다양한 혜택에 관심 표현
 """
+```
+해당 모델을 활용하기 위해서 다음과 같은 class 필요
+```
+class LongformerSelfAttentionForBart(nn.Module):
+    def __init__(self, config, layer_id):
+        super().__init__()
+        self.embed_dim = config.d_model
+        self.longformer_self_attn = LongformerSelfAttention(config, layer_id=layer_id)
+        self.output = nn.Linear(self.embed_dim, self.embed_dim)
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        key_value_states: Optional[torch.Tensor] = None,
+        past_key_value: Optional[Tuple[torch.Tensor]] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        layer_head_mask: Optional[torch.Tensor] = None,
+        output_attentions: bool = False,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+        is_cross_attention = key_value_states is not None
+        bsz, tgt_len, embed_dim = hidden_states.size()
+        # bs x seq_len x seq_len -> bs x seq_len 으로 변경
+        attention_mask = attention_mask.squeeze(dim=1)
+        attention_mask = attention_mask[:,0]
+        is_index_masked = attention_mask < 0
+        is_index_global_attn = attention_mask > 0
+        is_global_attn = is_index_global_attn.flatten().any().item()
+        outputs = self.longformer_self_attn(
+            hidden_states,
+            attention_mask=attention_mask,
+            layer_head_mask=None,
+            is_index_masked=is_index_masked,
+            is_index_global_attn=is_index_global_attn,
+            is_global_attn=is_global_attn,
+            output_attentions=output_attentions,
+        )
+        attn_output = self.output(outputs[0])
+        return (attn_output,) + outputs[1:] if len(outputs) == 2 else (attn_output, None, None)
+```
+```
+class LongformerEncoderDecoderForConditionalGeneration(BartForConditionalGeneration):
+    def __init__(self, config):
+        super().__init__(config)
+        if config.attention_mode == 'n2':
+            pass  # do nothing, use BertSelfAttention instead
+        else:
+            self.model.encoder.embed_positions = BartLearnedPositionalEmbedding(
+                config.max_encoder_position_embeddings,
+                config.d_model)
+            self.model.decoder.embed_positions = BartLearnedPositionalEmbedding(
+                config.max_decoder_position_embeddings,
+                config.d_model)
+            for i, layer in enumerate(self.model.encoder.layers):
+                layer.self_attn = LongformerSelfAttentionForBart(config, layer_id=i)
+```
+```
+class LongformerEncoderDecoderConfig(BartConfig):
+    def __init__(self, attention_window: List[int] = None, attention_dilation: List[int] = None,
+                 autoregressive: bool = False, attention_mode: str = 'sliding_chunks',
+                 gradient_checkpointing: bool = False, **kwargs):
+        """
+        Args:
+            attention_window: list of attention window sizes of length = number of layers.
+                window size = number of attention locations on each side.
+                For an affective window size of 512, use `attention_window=[256]*num_layers`
+                which is 256 on each side.
+            attention_dilation: list of attention dilation of length = number of layers.
+                attention dilation of `1` means no dilation.
+            autoregressive: do autoregressive attention or have attention of both sides
+            attention_mode: 'n2' for regular n^2 self-attention, 'tvm' for TVM implemenation of Longformer
+                selfattention, 'sliding_chunks' for another implementation of Longformer selfattention
+        """
+        super().__init__(**kwargs)
+        self.attention_window = attention_window
+        self.attention_dilation = attention_dilation
+        self.autoregressive = autoregressive
+        self.attention_mode = attention_mode
+        self.gradient_checkpointing = gradient_checkpointing
+        assert self.attention_mode in ['tvm', 'sliding_chunks', 'n2']
+```
+모델 오브젝트 로드 후
+weight파일을 별도로 다운받아서 load_state_dict로 웨이트를 불러야 합니다.
+```
+tokenizer = AutoTokenizer.from_pretrained("cocoirun/longforemr-kobart-summary-v1")
+model = LongformerEncoderDecoderForConditionalGeneration.from_pretrained("cocoirun/longforemr-kobart-summary-v1")
+device = torch.device('cuda')
+model.load_state_dict(torch.load("summary weight.ckpt"))
+model.to(device)
+```
+모델 요약 함수
+```
+def summarize(text, max_len):
+    max_seq_len = 4096
+    context_tokens = ['<s>'] + tokenizer.tokenize(text) + ['</s>']
+    input_ids = tokenizer.convert_tokens_to_ids(context_tokens)
+    if len(input_ids) < max_seq_len:
+            while len(input_ids) < max_seq_len:
+                input_ids += [tokenizer.pad_token_id]
+    else:
+        input_ids = input_ids[:max_seq_len - 1] + [
+            tokenizer.eos_token_id]
+    res_ids = model.generate(torch.tensor([input_ids]).to(device),
+                                        max_length=max_len,
+                                        num_beams=5,
+                                        no_repeat_ngram_size = 3,
+                                        eos_token_id=tokenizer.eos_token_id,
+                                        bad_words_ids=[[tokenizer.unk_token_id]])
+    res = tokenizer.batch_decode(res_ids.tolist(), skip_special_tokens=True)[0]
+    res = res.replace("\n\n","\n")
+    return res
 ```