Spaces:

hvtham
/

text_mining_21C11027

Runtime error

App Files Files Community

text_mining_21C11027 / 21c11027.py

hvtham

update SECRET_TOKEN

63fc868 verified 10 months ago

raw

history blame

1.97 kB

	# -- coding: utf-8 --
	"""21C11027.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/1z_jG4sUgsIhZRoikoXxYMHNAMpiYlWAW

	KHAI THÁC NGỮ LIỆU VĂN BẢN NÂNG CAO

	* Họ và tên: Huỳnh Viết Thám
	* Mã số học viên: 21C11027

	# Cài đặt thư viện cần thiết
	"""

	!pip install beautifulsoup4
	!pip install google
	!pip install google-search-results

	publication_name = input("Please input the keyword for searching: ")

	from serpapi import GoogleSearch

	#SECRET_TOKEN: get key in serpAPI
	def checkPaper(publication_name):
	params = {
	"api_key": "SECRET_TOKEN",
	"engine": "google",
	"q": "*",
	"location": "Austin, Texas, United States",
	"google_domain": "google.com",
	"gl": "us",
	"hl": "en",
	"as_sitesearch": "github.com"
	}

	params["q"] = publication_name

	search = GoogleSearch(params)
	results = search.get_dict()

	for result in results["organic_results"]:
	print(f"Title: {result['title']}\nSummary: {result['snippet']}\nLink: {result['link']}\n")

	#get top 3:

	top3_result=results["organic_results"][0:3]
	has_github = False
	backup_link = None
	threshold = 0.5 #total number keyword in snippet >= 50% -> ok
	for result in top3_result:
	word_list = publication_name.split(' ')
	len_word_list = len(word_list)
	count = 0
	if "https://github.com/" in result['link']:
	for word in word_list:
	if word in result['snippet']:
	count+=1
	if count >= count/len_word_list:
	has_github = True
	backup_link = result['link']
	break

	if has_github == False:
	return "This paper doesn't have source code in github!"
	else:
	return "This paper has source code in github!\n" + backup_link

	!pip install gradio

	import gradio as gr

	def greet(name):
	return "Hello " + name + "!"

	demo = gr.Interface(fn=checkPaper, inputs="text", outputs="text")
	demo.launch()