Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
"""21C11027.ipynb | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/drive/1z_jG4sUgsIhZRoikoXxYMHNAMpiYlWAW | |
**KHAI THÁC NGỮ LIỆU VĂN BẢN NÂNG CAO** | |
* **Họ và tên:** Huỳnh Viết Thám | |
* **Mã số học viên:** 21C11027 | |
# Cài đặt thư viện cần thiết | |
""" | |
!pip install beautifulsoup4 | |
!pip install google | |
!pip install google-search-results | |
publication_name = input("Please input the keyword for searching: ") | |
from serpapi import GoogleSearch | |
#SECRET_TOKEN: get key in serpAPI | |
def checkPaper(publication_name): | |
params = { | |
"api_key": "SECRET_TOKEN", | |
"engine": "google", | |
"q": "*", | |
"location": "Austin, Texas, United States", | |
"google_domain": "google.com", | |
"gl": "us", | |
"hl": "en", | |
"as_sitesearch": "github.com" | |
} | |
params["q"] = publication_name | |
search = GoogleSearch(params) | |
results = search.get_dict() | |
for result in results["organic_results"]: | |
print(f"Title: {result['title']}\nSummary: {result['snippet']}\nLink: {result['link']}\n") | |
#get top 3: | |
top3_result=results["organic_results"][0:3] | |
has_github = False | |
backup_link = None | |
threshold = 0.5 #total number keyword in snippet >= 50% -> ok | |
for result in top3_result: | |
word_list = publication_name.split(' ') | |
len_word_list = len(word_list) | |
count = 0 | |
if "https://github.com/" in result['link']: | |
for word in word_list: | |
if word in result['snippet']: | |
count+=1 | |
if count >= count/len_word_list: | |
has_github = True | |
backup_link = result['link'] | |
break | |
if has_github == False: | |
return "This paper doesn't have source code in github!" | |
else: | |
return "This paper has source code in github!\n" + backup_link | |
!pip install gradio | |
import gradio as gr | |
def greet(name): | |
return "Hello " + name + "!" | |
demo = gr.Interface(fn=checkPaper, inputs="text", outputs="text") | |
demo.launch() |