import gradio as gr import pandas as pd import logging from scrapper.main import ArxivPaper from config import * from db.db_functions import get_correct_author_name, insert_papers_data, fetch_papers_data from utils import compare_paper_ids """ author_obj = ArxivPaper("Andrew Ng") paper_links = author_obj.get_results_google(number_of_results=25) paper_ids = author_obj.get_paper_id(paper_links) author_obj.get_paper_details_batch(paper_ids=paper_ids, path="./data/papers") """ def plagiarism_checker(authors_name: str,number_of_results=5, progress=gr.Progress()): progress(0.2, desc="Collecting Links") author_obj = ArxivPaper(authors_name) db_author_name = get_correct_author_name(authors_name) paper_links = author_obj.get_results_google(number_of_results=number_of_results) paper_ids = author_obj.get_paper_id(paper_links) progress(0.4, desc="Collecting Papers") if db_author_name is None: print("No similar author found in the database") author_obj.get_paper_details_batch(paper_ids=paper_ids, path="./data/papers") local_saved_papers = os.path.join(os.getcwd(), "data", "papers", authors_name.replace(" ", "_")) progress(0.6, desc="Making summary") data_to_save = [] for paper in os.listdir(local_saved_papers): paper_path = os.path.join(local_saved_papers, paper) with open(paper_path, "r") as f: data_to_save.append(f.read()) else: print(f"Found similar author in the database: {db_author_name}") data = fetch_papers_data(db_author_name) reamining_paper_ids = compare_paper_ids(data,paper_ids) progress(0.6, desc="Making summary") data_to_save = [] if reamining_paper_ids != []: author_obj.get_paper_details_batch(paper_ids=reamining_paper_ids, path="./data/papers") local_saved_papers = os.path.join(os.getcwd(), "data", "papers", authors_name.replace(" ", "_")) for paper in os.listdir(local_saved_papers): paper_path = os.path.join(local_saved_papers, paper) with open(paper_path, "r") as f: data_to_save.append(f.read()) else: print("All papers already present in the database") progress(0.8, desc="Saving to Database") insert_papers_data(data_to_save, authors_name) return "Fetched Latest Papers" def fetch_papers_data_df(authors_name: str, progress=gr.Progress()): progress(0.2, desc="Fetching Papers") fetched_data = fetch_papers_data(authors_name,all=True) progress(0.8, desc="Making DataFrame") return pd.DataFrame(fetched_data[1]) with gr.Blocks() as demo: with gr.Tab("Get Papers Data"): with gr.Row(): authors_name_paper = gr.Textbox(label="Enter Author's Name") submit_button_tab_2 = gr.Button("Start") with gr.Row(): dataframe_output = gr.Dataframe(headers=['doi_no', 'title', 'summary', 'authors', 'year', 'pdf_link', 'references', 'categories', 'comment', 'journal_ref', 'source', 'primary_category', 'published','author_name']) with gr.Tab("Arxiv Plagiarism Fetcher & Save to DB"): with gr.Row(): authors_name = gr.Textbox(label="Enter Author's Name") number_of_results = gr.Number(label="Number of results - Min - 5") submit_button_tab_1 = gr.Button("Start") with gr.Row(): completed = gr.Textbox(label="Completed") with gr.Tab("Arxiv Plagiarism Checker"): with gr.Row(): authors_name = gr.Textbox(label="Enter Author's Name") number_of_results = gr.Number(label="Number of results - Min - 5") submit_button = gr.Button("Start") submit_button_tab_1.click(fn=plagiarism_checker,inputs=[authors_name, number_of_results] ,outputs= completed) submit_button_tab_2.click(fn=fetch_papers_data_df,inputs=[authors_name_paper] ,outputs=dataframe_output) demo.launch()