Spaces:

minseokKoo
/

Auto_Classifier

Sleeping

App Files Files Community

minseokKoo

hyomin commited on Jan 31, 2023

Commit

0964dff

1 Parent(s): cd9810f

Update app.py (#2)

Browse files

- Update app.py (46546cc0d5bf65f9514e81531cbbb5010b117f67)

Co-authored-by: Kim Hyomin <hyomin@users.noreply.huggingface.co>

Files changed (1) hide show

app.py +18 -23

app.py CHANGED Viewed

@@ -3,30 +3,24 @@ import numpy as np
 import re
 import os
 import sys
-import random
 import transformers
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from transformers import RobertaTokenizer, RobertaForSequenceClassification
 import torch
 import torch.nn.functional as F
-from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
 from transformers import T5Tokenizer, T5ForConditionalGeneration
 import gradio as gr
-def greet(co):
-    code_text = []
-    code_text.append(co)
-    code_text = ' '.join(code_text)
     code_text = re.sub('\/\*[\S\s]*\*\/', '', code_text)
     code_text = re.sub('\/\/.*', '', code_text)
     code_text = re.sub('(\\\\n)+', '\\n', code_text)
     # 1. CFA-CodeBERTa-small.pt -> CodeBERTa-small-v1 finetunig model
-    path = os.getcwd() + '/models/CFA-CodeBERTa-small.pt'
     tokenizer = AutoTokenizer.from_pretrained("huggingface/CodeBERTa-small-v1")
     input_ids = tokenizer.encode(
         code_text, max_length=512, truncation=True, padding='max_length')
@@ -38,7 +32,7 @@ def greet(co):
     # model(input_ids)[0].argmax().detach().cpu().numpy().item()
     # 2. CFA-codebert-c.pt -> codebert-c finetuning model
-    path = os.getcwd() + '/models/CFA-codebert-c.pt'
     tokenizer = AutoTokenizer.from_pretrained(path)
     input_ids = tokenizer(code_text, padding=True, max_length=512,
                           truncation=True, return_token_type_ids=True)['input_ids']
@@ -49,7 +43,7 @@ def greet(co):
     pred_2 = model(input_ids)[0].detach().cpu().numpy()[0]
     # 3. CFA-codebert-c-v2.pt -> undersampling + codebert-c finetuning model
-    path = os.getcwd() + '/models/CFA-codebert-c-v2.pt'
     tokenizer = RobertaTokenizer.from_pretrained(path)
     input_ids = tokenizer(code_text, padding=True, max_length=512,
                           truncation=True, return_token_type_ids=True)['input_ids']
@@ -60,7 +54,7 @@ def greet(co):
     pred_3 = model(input_ids)[0].detach().cpu().numpy()
     # 4. codeT5 finetuning model
-    path = os.getcwd() + '/models/CFA-codeT5'
     model_params = {
         # model_type: t5-base/t5-large
         "MODEL": path,
@@ -80,16 +74,14 @@ def greet(co):
     pred_4 = int(pred_4[0])
     # ensemble
-    tot_result = (pred_1 * 0.8 + pred_2 * 0.1 +
-                  pred_3 * 0.1 + pred_4 * 0.1).argmax()
     if tot_result == 0:
         return "false positive !!"
     else:
         return "true positive !!"
 # codeT5
 class YourDataSetClass(Dataset):
@@ -194,18 +186,19 @@ demo.launch(share=True)
 '''
 with gr.Blocks() as demo1:
     gr.Markdown(
-    """
     <h1 align="center">
     False-Alarm-Detector
     </h1>
     """)
     gr.Markdown(
-    """
-    정적 분석기로 오류라고 보고된 코드를 입력하면,
-    오류가 True-positive 인지 False-positive 인지 분류 해 주는 프로그램이다.
     """)
     with gr.Accordion(label='모델에 대한 설명 ( 여기를 클릭 하시오. )',open=False):
         gr.Markdown(
         """
@@ -218,14 +211,16 @@ with gr.Blocks() as demo1:
         - codeT5 설명
         """
         )
     with gr.Row():
         with gr.Column():
-            inputs_1 = gr.Textbox(placeholder="코드를 입력하시오.", label='Code')
             with gr.Row():
                 btn = gr.Button("결과 출력")
         with gr.Column():
-            outputs_1 = gr.Text(label = 'Result')
-    btn.click(fn = greet, inputs = inputs_1, outputs= outputs_1)
 if __name__ == "__main__":
     demo1.launch()

 import re
 import os
 import sys
 import transformers
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from transformers import RobertaTokenizer, RobertaForSequenceClassification
 import torch
 import torch.nn.functional as F
+from torch.utils.data import Dataset, DataLoader
 from transformers import T5Tokenizer, T5ForConditionalGeneration
 import gradio as gr
+def is_false_alarm(code_text):
     code_text = re.sub('\/\*[\S\s]*\*\/', '', code_text)
     code_text = re.sub('\/\/.*', '', code_text)
     code_text = re.sub('(\\\\n)+', '\\n', code_text)
     # 1. CFA-CodeBERTa-small.pt -> CodeBERTa-small-v1 finetunig model
+    path = os.getcwd() + '\models\CFA-CodeBERTa-small.pt'
     tokenizer = AutoTokenizer.from_pretrained("huggingface/CodeBERTa-small-v1")
     input_ids = tokenizer.encode(
         code_text, max_length=512, truncation=True, padding='max_length')
     # model(input_ids)[0].argmax().detach().cpu().numpy().item()
     # 2. CFA-codebert-c.pt -> codebert-c finetuning model
+    path = os.getcwd() + '\models\CFA-codebert-c.pt'
     tokenizer = AutoTokenizer.from_pretrained(path)
     input_ids = tokenizer(code_text, padding=True, max_length=512,
                           truncation=True, return_token_type_ids=True)['input_ids']
     pred_2 = model(input_ids)[0].detach().cpu().numpy()[0]
     # 3. CFA-codebert-c-v2.pt -> undersampling + codebert-c finetuning model
+    path = os.getcwd() + '\models\CFA-codebert-c-v2.pt'
     tokenizer = RobertaTokenizer.from_pretrained(path)
     input_ids = tokenizer(code_text, padding=True, max_length=512,
                           truncation=True, return_token_type_ids=True)['input_ids']
     pred_3 = model(input_ids)[0].detach().cpu().numpy()
     # 4. codeT5 finetuning model
+    path = os.getcwd() + '\models\CFA-codeT5'
     model_params = {
         # model_type: t5-base/t5-large
         "MODEL": path,
     pred_4 = int(pred_4[0])
     # ensemble
+    tot_result = (pred_1 * 0.1 + pred_2 * 0.1 +
+                  pred_3 * 0.7 + pred_4 * 0.1).argmax()
     if tot_result == 0:
         return "false positive !!"
     else:
         return "true positive !!"
 # codeT5
 class YourDataSetClass(Dataset):
 '''
 with gr.Blocks() as demo1:
     gr.Markdown(
+        """
     <h1 align="center">
     False-Alarm-Detector
     </h1>
     """)
     gr.Markdown(
+        """
+    정적 분석기를 통해 오류라고 보고된 C언어 코드의 함수를 입력하면,
+    오류가 True-positive 인지 False-positive 인지 분류 해 주는 프로그램입니다.
     """)
+    '''
     with gr.Accordion(label='모델에 대한 설명 ( 여기를 클릭 하시오. )',open=False):
         gr.Markdown(
         """
         - codeT5 설명
         """
         )
+    '''
     with gr.Row():
         with gr.Column():
+            inputs = gr.Textbox(
+                lines=10, placeholder="코드를 입력하시오.", label='Code')
             with gr.Row():
                 btn = gr.Button("결과 출력")
         with gr.Column():
+            output = gr.Text(label='Result')
+    btn.click(fn=is_false_alarm, inputs=inputs, outputs=output)
 if __name__ == "__main__":
     demo1.launch()