Spaces:
Runtime error
Runtime error
File size: 4,019 Bytes
72837e0 289f685 db9914d 289f685 72837e0 db9914d 2e9acdc db9914d 2e9acdc db9914d 2e9acdc db9914d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import gradio as gr
import pandas as pd
import logging
from scrapper.main import ArxivPaper
from config import *
from db.db_functions import get_correct_author_name, insert_papers_data, fetch_papers_data
from utils import compare_paper_ids
"""
author_obj = ArxivPaper("Andrew Ng")
paper_links = author_obj.get_results_google(number_of_results=25)
paper_ids = author_obj.get_paper_id(paper_links)
author_obj.get_paper_details_batch(paper_ids=paper_ids, path="./data/papers")
"""
def plagiarism_checker(authors_name: str,number_of_results=5, progress=gr.Progress()):
progress(0.2, desc="Collecting Links")
author_obj = ArxivPaper(authors_name)
db_author_name = get_correct_author_name(authors_name)
paper_links = author_obj.get_results_google(number_of_results=number_of_results)
paper_ids = author_obj.get_paper_id(paper_links)
progress(0.4, desc="Collecting Papers")
if db_author_name is None:
print("No similar author found in the database")
author_obj.get_paper_details_batch(paper_ids=paper_ids, path="./data/papers")
local_saved_papers = os.path.join(os.getcwd(), "data", "papers", authors_name.replace(" ", "_"))
progress(0.6, desc="Making summary")
data_to_save = []
for paper in os.listdir(local_saved_papers):
paper_path = os.path.join(local_saved_papers, paper)
with open(paper_path, "r") as f:
data_to_save.append(f.read())
else:
print(f"Found similar author in the database: {db_author_name}")
data = fetch_papers_data(db_author_name)
reamining_paper_ids = compare_paper_ids(data,paper_ids)
progress(0.6, desc="Making summary")
data_to_save = []
if reamining_paper_ids != []:
author_obj.get_paper_details_batch(paper_ids=reamining_paper_ids, path="./data/papers")
local_saved_papers = os.path.join(os.getcwd(), "data", "papers", authors_name.replace(" ", "_"))
for paper in os.listdir(local_saved_papers):
paper_path = os.path.join(local_saved_papers, paper)
with open(paper_path, "r") as f:
data_to_save.append(f.read())
else:
print("All papers already present in the database")
progress(0.8, desc="Saving to Database")
insert_papers_data(data_to_save, authors_name)
return "Fetched Latest Papers"
def fetch_papers_data_df(authors_name: str, progress=gr.Progress()):
progress(0.2, desc="Fetching Papers")
fetched_data = fetch_papers_data(authors_name,all=True)
progress(0.8, desc="Making DataFrame")
return pd.DataFrame(fetched_data[1])
with gr.Blocks() as demo:
with gr.Tab("Get Papers Data"):
with gr.Row():
authors_name_paper = gr.Textbox(label="Enter Author's Name")
submit_button_tab_2 = gr.Button("Start")
with gr.Row():
dataframe_output = gr.Dataframe(headers=['doi_no', 'title', 'summary', 'authors', 'year', 'pdf_link',
'references', 'categories', 'comment', 'journal_ref', 'source',
'primary_category', 'published','author_name'])
with gr.Tab("Arxiv Plagiarism Fetcher & Save to DB"):
with gr.Row():
authors_name = gr.Textbox(label="Enter Author's Name")
number_of_results = gr.Number(label="Number of results - Min - 5")
submit_button_tab_1 = gr.Button("Start")
with gr.Row():
completed = gr.Textbox(label="Completed")
with gr.Tab("Arxiv Plagiarism Checker"):
with gr.Row():
authors_name = gr.Textbox(label="Enter Author's Name")
number_of_results = gr.Number(label="Number of results - Min - 5")
submit_button = gr.Button("Start")
submit_button_tab_1.click(fn=plagiarism_checker,inputs=[authors_name, number_of_results] ,outputs= completed)
submit_button_tab_2.click(fn=fetch_papers_data_df,inputs=[authors_name_paper] ,outputs=dataframe_output)
demo.launch() |