# -*- coding: utf-8 -*- """21C11027.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1z_jG4sUgsIhZRoikoXxYMHNAMpiYlWAW **KHAI THÁC NGỮ LIỆU VĂN BẢN NÂNG CAO** * **Họ và tên:** Huỳnh Viết Thám * **Mã số học viên:** 21C11027 # Cài đặt thư viện cần thiết """ !pip install beautifulsoup4 !pip install google !pip install google-search-results publication_name = input("Please input the keyword for searching: ") from serpapi import GoogleSearch def checkPaper(publication_name): params = { "api_key": "3fb62919a0e61a6a58cf9815798253799210ab69fbc3c9c9a81785c7cabcc3fa", "engine": "google", "q": "*", "location": "Austin, Texas, United States", "google_domain": "google.com", "gl": "us", "hl": "en", "as_sitesearch": "github.com" } params["q"] = publication_name search = GoogleSearch(params) results = search.get_dict() for result in results["organic_results"]: print(f"Title: {result['title']}\nSummary: {result['snippet']}\nLink: {result['link']}\n") #get top 3: top3_result=results["organic_results"][0:3] has_github = False backup_link = None threshold = 0.5 #total number keyword in snippet >= 50% -> ok for result in top3_result: word_list = publication_name.split(' ') len_word_list = len(word_list) count = 0 if "https://github.com/" in result['link']: for word in word_list: if word in result['snippet']: count+=1 if count >= count/len_word_list: has_github = True backup_link = result['link'] break if has_github == False: return "This paper doesn't have source code in github!" else: return "This paper has source code in github!\n" + backup_link !pip install gradio import gradio as gr def greet(name): return "Hello " + name + "!" demo = gr.Interface(fn=checkPaper, inputs="text", outputs="text") demo.launch()