Spaces:
Runtime error
Runtime error
Upload 21c11027.py
Browse files- 21c11027.py +77 -0
21c11027.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""21C11027.ipynb
|
3 |
+
|
4 |
+
Automatically generated by Colaboratory.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1z_jG4sUgsIhZRoikoXxYMHNAMpiYlWAW
|
8 |
+
|
9 |
+
**KHAI THÁC NGỮ LIỆU VĂN BẢN NÂNG CAO**
|
10 |
+
|
11 |
+
* **Họ và tên:** Huỳnh Viết Thám
|
12 |
+
* **Mã số học viên:** 21C11027
|
13 |
+
|
14 |
+
# Cài đặt thư viện cần thiết
|
15 |
+
"""
|
16 |
+
|
17 |
+
!pip install beautifulsoup4
|
18 |
+
!pip install google
|
19 |
+
!pip install google-search-results
|
20 |
+
|
21 |
+
publication_name = input("Please input the keyword for searching: ")
|
22 |
+
|
23 |
+
from serpapi import GoogleSearch
|
24 |
+
|
25 |
+
def checkPaper(publication_name):
|
26 |
+
params = {
|
27 |
+
"api_key": "3fb62919a0e61a6a58cf9815798253799210ab69fbc3c9c9a81785c7cabcc3fa",
|
28 |
+
"engine": "google",
|
29 |
+
"q": "*",
|
30 |
+
"location": "Austin, Texas, United States",
|
31 |
+
"google_domain": "google.com",
|
32 |
+
"gl": "us",
|
33 |
+
"hl": "en",
|
34 |
+
"as_sitesearch": "github.com"
|
35 |
+
}
|
36 |
+
|
37 |
+
params["q"] = publication_name
|
38 |
+
|
39 |
+
search = GoogleSearch(params)
|
40 |
+
results = search.get_dict()
|
41 |
+
|
42 |
+
for result in results["organic_results"]:
|
43 |
+
print(f"Title: {result['title']}\nSummary: {result['snippet']}\nLink: {result['link']}\n")
|
44 |
+
|
45 |
+
#get top 3:
|
46 |
+
|
47 |
+
top3_result=results["organic_results"][0:3]
|
48 |
+
has_github = False
|
49 |
+
backup_link = None
|
50 |
+
threshold = 0.5 #total number keyword in snippet >= 50% -> ok
|
51 |
+
for result in top3_result:
|
52 |
+
word_list = publication_name.split(' ')
|
53 |
+
len_word_list = len(word_list)
|
54 |
+
count = 0
|
55 |
+
if "https://github.com/" in result['link']:
|
56 |
+
for word in word_list:
|
57 |
+
if word in result['snippet']:
|
58 |
+
count+=1
|
59 |
+
if count >= count/len_word_list:
|
60 |
+
has_github = True
|
61 |
+
backup_link = result['link']
|
62 |
+
break
|
63 |
+
|
64 |
+
if has_github == False:
|
65 |
+
return "This paper doesn't have source code in github!"
|
66 |
+
else:
|
67 |
+
return "This paper has source code in github!\n" + backup_link
|
68 |
+
|
69 |
+
!pip install gradio
|
70 |
+
|
71 |
+
import gradio as gr
|
72 |
+
|
73 |
+
def greet(name):
|
74 |
+
return "Hello " + name + "!"
|
75 |
+
|
76 |
+
demo = gr.Interface(fn=checkPaper, inputs="text", outputs="text")
|
77 |
+
demo.launch()
|