File size: 1,973 Bytes
ad103e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63fc868
ad103e1
 
63fc868
ad103e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# -*- coding: utf-8 -*-
"""21C11027.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1z_jG4sUgsIhZRoikoXxYMHNAMpiYlWAW

**KHAI THÁC NGỮ LIỆU VĂN BẢN NÂNG CAO**

*   **Họ và tên:** Huỳnh Viết Thám
*   **Mã số học viên:** 21C11027

# Cài đặt thư viện cần thiết
"""

!pip install beautifulsoup4
!pip install google
!pip install google-search-results

publication_name = input("Please input the keyword for searching: ")

from serpapi import GoogleSearch

#SECRET_TOKEN: get key in serpAPI
def checkPaper(publication_name):
  params = {
    "api_key": "SECRET_TOKEN",
    "engine": "google",
    "q": "*",
    "location": "Austin, Texas, United States",
    "google_domain": "google.com",
    "gl": "us",
    "hl": "en",
    "as_sitesearch": "github.com"
  }

  params["q"] = publication_name

  search = GoogleSearch(params)
  results = search.get_dict()

  for result in results["organic_results"]:
    print(f"Title: {result['title']}\nSummary: {result['snippet']}\nLink: {result['link']}\n")

  #get top 3:

  top3_result=results["organic_results"][0:3]
  has_github = False
  backup_link = None
  threshold = 0.5 #total number keyword in snippet >= 50% -> ok
  for result in top3_result:
    word_list = publication_name.split(' ')
    len_word_list = len(word_list)
    count = 0
    if "https://github.com/" in result['link']:
      for word in word_list:
        if word in result['snippet']:
         count+=1
      if count >= count/len_word_list:
        has_github = True
        backup_link = result['link']
        break

  if has_github == False:
    return "This paper doesn't have source code in github!"
  else:
    return "This paper has source code in github!\n" + backup_link

!pip install gradio

import gradio as gr

def greet(name):
    return "Hello " + name + "!"

demo = gr.Interface(fn=checkPaper, inputs="text", outputs="text")
demo.launch()