hvtham commited on
Commit
ad103e1
1 Parent(s): 271e8f0

Upload 21c11027.py

Browse files
Files changed (1) hide show
  1. 21c11027.py +77 -0
21c11027.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """21C11027.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1z_jG4sUgsIhZRoikoXxYMHNAMpiYlWAW
8
+
9
+ **KHAI THÁC NGỮ LIỆU VĂN BẢN NÂNG CAO**
10
+
11
+ * **Họ và tên:** Huỳnh Viết Thám
12
+ * **Mã số học viên:** 21C11027
13
+
14
+ # Cài đặt thư viện cần thiết
15
+ """
16
+
17
+ !pip install beautifulsoup4
18
+ !pip install google
19
+ !pip install google-search-results
20
+
21
+ publication_name = input("Please input the keyword for searching: ")
22
+
23
+ from serpapi import GoogleSearch
24
+
25
+ def checkPaper(publication_name):
26
+ params = {
27
+ "api_key": "3fb62919a0e61a6a58cf9815798253799210ab69fbc3c9c9a81785c7cabcc3fa",
28
+ "engine": "google",
29
+ "q": "*",
30
+ "location": "Austin, Texas, United States",
31
+ "google_domain": "google.com",
32
+ "gl": "us",
33
+ "hl": "en",
34
+ "as_sitesearch": "github.com"
35
+ }
36
+
37
+ params["q"] = publication_name
38
+
39
+ search = GoogleSearch(params)
40
+ results = search.get_dict()
41
+
42
+ for result in results["organic_results"]:
43
+ print(f"Title: {result['title']}\nSummary: {result['snippet']}\nLink: {result['link']}\n")
44
+
45
+ #get top 3:
46
+
47
+ top3_result=results["organic_results"][0:3]
48
+ has_github = False
49
+ backup_link = None
50
+ threshold = 0.5 #total number keyword in snippet >= 50% -> ok
51
+ for result in top3_result:
52
+ word_list = publication_name.split(' ')
53
+ len_word_list = len(word_list)
54
+ count = 0
55
+ if "https://github.com/" in result['link']:
56
+ for word in word_list:
57
+ if word in result['snippet']:
58
+ count+=1
59
+ if count >= count/len_word_list:
60
+ has_github = True
61
+ backup_link = result['link']
62
+ break
63
+
64
+ if has_github == False:
65
+ return "This paper doesn't have source code in github!"
66
+ else:
67
+ return "This paper has source code in github!\n" + backup_link
68
+
69
+ !pip install gradio
70
+
71
+ import gradio as gr
72
+
73
+ def greet(name):
74
+ return "Hello " + name + "!"
75
+
76
+ demo = gr.Interface(fn=checkPaper, inputs="text", outputs="text")
77
+ demo.launch()