minseokKoo hyomin commited on
Commit
0964dff
โ€ข
1 Parent(s): cd9810f

Update app.py (#2)

Browse files

- Update app.py (46546cc0d5bf65f9514e81531cbbb5010b117f67)


Co-authored-by: Kim Hyomin <hyomin@users.noreply.huggingface.co>

Files changed (1) hide show
  1. app.py +18 -23
app.py CHANGED
@@ -3,30 +3,24 @@ import numpy as np
3
  import re
4
  import os
5
  import sys
6
- import random
7
  import transformers
8
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
9
  from transformers import RobertaTokenizer, RobertaForSequenceClassification
10
  import torch
11
  import torch.nn.functional as F
12
- from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
13
  from transformers import T5Tokenizer, T5ForConditionalGeneration
14
  import gradio as gr
15
 
16
 
 
17
 
18
- def greet(co):
19
- code_text = []
20
-
21
- code_text.append(co)
22
-
23
- code_text = ' '.join(code_text)
24
  code_text = re.sub('\/\*[\S\s]*\*\/', '', code_text)
25
  code_text = re.sub('\/\/.*', '', code_text)
26
  code_text = re.sub('(\\\\n)+', '\\n', code_text)
27
 
28
  # 1. CFA-CodeBERTa-small.pt -> CodeBERTa-small-v1 finetunig model
29
- path = os.getcwd() + '/models/CFA-CodeBERTa-small.pt'
30
  tokenizer = AutoTokenizer.from_pretrained("huggingface/CodeBERTa-small-v1")
31
  input_ids = tokenizer.encode(
32
  code_text, max_length=512, truncation=True, padding='max_length')
@@ -38,7 +32,7 @@ def greet(co):
38
  # model(input_ids)[0].argmax().detach().cpu().numpy().item()
39
 
40
  # 2. CFA-codebert-c.pt -> codebert-c finetuning model
41
- path = os.getcwd() + '/models/CFA-codebert-c.pt'
42
  tokenizer = AutoTokenizer.from_pretrained(path)
43
  input_ids = tokenizer(code_text, padding=True, max_length=512,
44
  truncation=True, return_token_type_ids=True)['input_ids']
@@ -49,7 +43,7 @@ def greet(co):
49
  pred_2 = model(input_ids)[0].detach().cpu().numpy()[0]
50
 
51
  # 3. CFA-codebert-c-v2.pt -> undersampling + codebert-c finetuning model
52
- path = os.getcwd() + '/models/CFA-codebert-c-v2.pt'
53
  tokenizer = RobertaTokenizer.from_pretrained(path)
54
  input_ids = tokenizer(code_text, padding=True, max_length=512,
55
  truncation=True, return_token_type_ids=True)['input_ids']
@@ -60,7 +54,7 @@ def greet(co):
60
  pred_3 = model(input_ids)[0].detach().cpu().numpy()
61
 
62
  # 4. codeT5 finetuning model
63
- path = os.getcwd() + '/models/CFA-codeT5'
64
  model_params = {
65
  # model_type: t5-base/t5-large
66
  "MODEL": path,
@@ -80,16 +74,14 @@ def greet(co):
80
  pred_4 = int(pred_4[0])
81
 
82
  # ensemble
83
- tot_result = (pred_1 * 0.8 + pred_2 * 0.1 +
84
- pred_3 * 0.1 + pred_4 * 0.1).argmax()
85
  if tot_result == 0:
86
  return "false positive !!"
87
  else:
88
  return "true positive !!"
89
 
90
 
91
-
92
-
93
  # codeT5
94
  class YourDataSetClass(Dataset):
95
 
@@ -194,18 +186,19 @@ demo.launch(share=True)
194
  '''
195
  with gr.Blocks() as demo1:
196
  gr.Markdown(
197
- """
198
  <h1 align="center">
199
  False-Alarm-Detector
200
  </h1>
201
  """)
202
 
203
  gr.Markdown(
204
- """
205
- ์ •์  ๋ถ„์„๊ธฐ๋กœ ์˜ค๋ฅ˜๋ผ๊ณ  ๋ณด๊ณ ๋œ ์ฝ”๋“œ๋ฅผ ์ž…๋ ฅํ•˜๋ฉด,
206
- ์˜ค๋ฅ˜๊ฐ€ True-positive ์ธ์ง€ False-positive ์ธ์ง€ ๋ถ„๋ฅ˜ ํ•ด ์ฃผ๋Š” ํ”„๋กœ๊ทธ๋žจ์ด๋‹ค.
207
  """)
208
 
 
209
  with gr.Accordion(label='๋ชจ๋ธ์— ๋Œ€ํ•œ ์„ค๋ช… ( ์—ฌ๊ธฐ๋ฅผ ํด๋ฆญ ํ•˜์‹œ์˜ค. )',open=False):
210
  gr.Markdown(
211
  """
@@ -218,14 +211,16 @@ with gr.Blocks() as demo1:
218
  - codeT5 ์„ค๋ช…
219
  """
220
  )
 
221
  with gr.Row():
222
  with gr.Column():
223
- inputs_1 = gr.Textbox(placeholder="์ฝ”๋“œ๋ฅผ ์ž…๋ ฅํ•˜์‹œ์˜ค.", label='Code')
 
224
  with gr.Row():
225
  btn = gr.Button("๊ฒฐ๊ณผ ์ถœ๋ ฅ")
226
  with gr.Column():
227
- outputs_1 = gr.Text(label = 'Result')
228
- btn.click(fn = greet, inputs = inputs_1, outputs= outputs_1)
229
 
230
  if __name__ == "__main__":
231
  demo1.launch()
 
3
  import re
4
  import os
5
  import sys
 
6
  import transformers
7
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
8
  from transformers import RobertaTokenizer, RobertaForSequenceClassification
9
  import torch
10
  import torch.nn.functional as F
11
+ from torch.utils.data import Dataset, DataLoader
12
  from transformers import T5Tokenizer, T5ForConditionalGeneration
13
  import gradio as gr
14
 
15
 
16
+ def is_false_alarm(code_text):
17
 
 
 
 
 
 
 
18
  code_text = re.sub('\/\*[\S\s]*\*\/', '', code_text)
19
  code_text = re.sub('\/\/.*', '', code_text)
20
  code_text = re.sub('(\\\\n)+', '\\n', code_text)
21
 
22
  # 1. CFA-CodeBERTa-small.pt -> CodeBERTa-small-v1 finetunig model
23
+ path = os.getcwd() + '\models\CFA-CodeBERTa-small.pt'
24
  tokenizer = AutoTokenizer.from_pretrained("huggingface/CodeBERTa-small-v1")
25
  input_ids = tokenizer.encode(
26
  code_text, max_length=512, truncation=True, padding='max_length')
 
32
  # model(input_ids)[0].argmax().detach().cpu().numpy().item()
33
 
34
  # 2. CFA-codebert-c.pt -> codebert-c finetuning model
35
+ path = os.getcwd() + '\models\CFA-codebert-c.pt'
36
  tokenizer = AutoTokenizer.from_pretrained(path)
37
  input_ids = tokenizer(code_text, padding=True, max_length=512,
38
  truncation=True, return_token_type_ids=True)['input_ids']
 
43
  pred_2 = model(input_ids)[0].detach().cpu().numpy()[0]
44
 
45
  # 3. CFA-codebert-c-v2.pt -> undersampling + codebert-c finetuning model
46
+ path = os.getcwd() + '\models\CFA-codebert-c-v2.pt'
47
  tokenizer = RobertaTokenizer.from_pretrained(path)
48
  input_ids = tokenizer(code_text, padding=True, max_length=512,
49
  truncation=True, return_token_type_ids=True)['input_ids']
 
54
  pred_3 = model(input_ids)[0].detach().cpu().numpy()
55
 
56
  # 4. codeT5 finetuning model
57
+ path = os.getcwd() + '\models\CFA-codeT5'
58
  model_params = {
59
  # model_type: t5-base/t5-large
60
  "MODEL": path,
 
74
  pred_4 = int(pred_4[0])
75
 
76
  # ensemble
77
+ tot_result = (pred_1 * 0.1 + pred_2 * 0.1 +
78
+ pred_3 * 0.7 + pred_4 * 0.1).argmax()
79
  if tot_result == 0:
80
  return "false positive !!"
81
  else:
82
  return "true positive !!"
83
 
84
 
 
 
85
  # codeT5
86
  class YourDataSetClass(Dataset):
87
 
 
186
  '''
187
  with gr.Blocks() as demo1:
188
  gr.Markdown(
189
+ """
190
  <h1 align="center">
191
  False-Alarm-Detector
192
  </h1>
193
  """)
194
 
195
  gr.Markdown(
196
+ """
197
+ ์ •์  ๋ถ„์„๊ธฐ๋ฅผ ํ†ตํ•ด ์˜ค๋ฅ˜๋ผ๊ณ  ๋ณด๊ณ ๋œ C์–ธ์–ด ์ฝ”๋“œ์˜ ํ•จ์ˆ˜๋ฅผ ์ž…๋ ฅํ•˜๋ฉด,
198
+ ์˜ค๋ฅ˜๊ฐ€ True-positive ์ธ์ง€ False-positive ์ธ์ง€ ๋ถ„๋ฅ˜ ํ•ด ์ฃผ๋Š” ํ”„๋กœ๊ทธ๋žจ์ž…๋‹ˆ๋‹ค.
199
  """)
200
 
201
+ '''
202
  with gr.Accordion(label='๋ชจ๋ธ์— ๋Œ€ํ•œ ์„ค๋ช… ( ์—ฌ๊ธฐ๋ฅผ ํด๋ฆญ ํ•˜์‹œ์˜ค. )',open=False):
203
  gr.Markdown(
204
  """
 
211
  - codeT5 ์„ค๋ช…
212
  """
213
  )
214
+ '''
215
  with gr.Row():
216
  with gr.Column():
217
+ inputs = gr.Textbox(
218
+ lines=10, placeholder="์ฝ”๋“œ๋ฅผ ์ž…๋ ฅํ•˜์‹œ์˜ค.", label='Code')
219
  with gr.Row():
220
  btn = gr.Button("๊ฒฐ๊ณผ ์ถœ๋ ฅ")
221
  with gr.Column():
222
+ output = gr.Text(label='Result')
223
+ btn.click(fn=is_false_alarm, inputs=inputs, outputs=output)
224
 
225
  if __name__ == "__main__":
226
  demo1.launch()