Spaces:
Sleeping
Sleeping
Commit
•
48a61f4
1
Parent(s):
0964dff
Update app.py (#3)
Browse files- Update app.py (3f6e3a1d72badba0df3deca3b864d08d9f48a42e)
Co-authored-by: Kim Hyomin <hyomin@users.noreply.huggingface.co>
app.py
CHANGED
@@ -20,7 +20,7 @@ def is_false_alarm(code_text):
|
|
20 |
code_text = re.sub('(\\\\n)+', '\\n', code_text)
|
21 |
|
22 |
# 1. CFA-CodeBERTa-small.pt -> CodeBERTa-small-v1 finetunig model
|
23 |
-
path = os.getcwd() + '
|
24 |
tokenizer = AutoTokenizer.from_pretrained("huggingface/CodeBERTa-small-v1")
|
25 |
input_ids = tokenizer.encode(
|
26 |
code_text, max_length=512, truncation=True, padding='max_length')
|
@@ -32,7 +32,7 @@ def is_false_alarm(code_text):
|
|
32 |
# model(input_ids)[0].argmax().detach().cpu().numpy().item()
|
33 |
|
34 |
# 2. CFA-codebert-c.pt -> codebert-c finetuning model
|
35 |
-
path = os.getcwd() + '
|
36 |
tokenizer = AutoTokenizer.from_pretrained(path)
|
37 |
input_ids = tokenizer(code_text, padding=True, max_length=512,
|
38 |
truncation=True, return_token_type_ids=True)['input_ids']
|
@@ -43,7 +43,7 @@ def is_false_alarm(code_text):
|
|
43 |
pred_2 = model(input_ids)[0].detach().cpu().numpy()[0]
|
44 |
|
45 |
# 3. CFA-codebert-c-v2.pt -> undersampling + codebert-c finetuning model
|
46 |
-
path = os.getcwd() + '
|
47 |
tokenizer = RobertaTokenizer.from_pretrained(path)
|
48 |
input_ids = tokenizer(code_text, padding=True, max_length=512,
|
49 |
truncation=True, return_token_type_ids=True)['input_ids']
|
@@ -54,7 +54,7 @@ def is_false_alarm(code_text):
|
|
54 |
pred_3 = model(input_ids)[0].detach().cpu().numpy()
|
55 |
|
56 |
# 4. codeT5 finetuning model
|
57 |
-
path = os.getcwd() + '
|
58 |
model_params = {
|
59 |
# model_type: t5-base/t5-large
|
60 |
"MODEL": path,
|
|
|
20 |
code_text = re.sub('(\\\\n)+', '\\n', code_text)
|
21 |
|
22 |
# 1. CFA-CodeBERTa-small.pt -> CodeBERTa-small-v1 finetunig model
|
23 |
+
path = os.getcwd() + '/models/CFA-CodeBERTa-small.pt'
|
24 |
tokenizer = AutoTokenizer.from_pretrained("huggingface/CodeBERTa-small-v1")
|
25 |
input_ids = tokenizer.encode(
|
26 |
code_text, max_length=512, truncation=True, padding='max_length')
|
|
|
32 |
# model(input_ids)[0].argmax().detach().cpu().numpy().item()
|
33 |
|
34 |
# 2. CFA-codebert-c.pt -> codebert-c finetuning model
|
35 |
+
path = os.getcwd() + '/models/CFA-codebert-c.pt'
|
36 |
tokenizer = AutoTokenizer.from_pretrained(path)
|
37 |
input_ids = tokenizer(code_text, padding=True, max_length=512,
|
38 |
truncation=True, return_token_type_ids=True)['input_ids']
|
|
|
43 |
pred_2 = model(input_ids)[0].detach().cpu().numpy()[0]
|
44 |
|
45 |
# 3. CFA-codebert-c-v2.pt -> undersampling + codebert-c finetuning model
|
46 |
+
path = os.getcwd() + '/models/CFA-codebert-c-v2.pt'
|
47 |
tokenizer = RobertaTokenizer.from_pretrained(path)
|
48 |
input_ids = tokenizer(code_text, padding=True, max_length=512,
|
49 |
truncation=True, return_token_type_ids=True)['input_ids']
|
|
|
54 |
pred_3 = model(input_ids)[0].detach().cpu().numpy()
|
55 |
|
56 |
# 4. codeT5 finetuning model
|
57 |
+
path = os.getcwd() + '/models/CFA-codeT5'
|
58 |
model_params = {
|
59 |
# model_type: t5-base/t5-large
|
60 |
"MODEL": path,
|