gamingflexer commited on
Commit
d1e24cf
·
1 Parent(s): 452e8f6

Tested & debuged

Browse files
Files changed (1) hide show
  1. src/app.py +21 -19
src/app.py CHANGED
@@ -13,17 +13,19 @@ paper_ids = author_obj.get_paper_id(paper_links)
13
  author_obj.get_paper_details_batch(paper_ids=paper_ids, path="./data/papers")
14
  """
15
 
16
- def plagiarism_checker(authors_name: str,number_of_results=5, progress=gr.Progress()):
 
 
17
  progress(0.2, desc="Collecting Links")
18
- author_obj = ArxivPaper(authors_name)
19
- db_author_name = get_correct_author_name(authors_name)
20
- paper_links = author_obj.get_results_google(number_of_results=number_of_results)
21
  paper_ids = author_obj.get_paper_id(paper_links)
22
  progress(0.4, desc="Collecting Papers")
23
  if db_author_name is None:
24
  print("No similar author found in the database")
25
  author_obj.get_paper_details_batch(paper_ids=paper_ids, path="./data/papers")
26
- local_saved_papers = os.path.join(os.getcwd(), "data", "papers", authors_name.replace(" ", "_"))
27
  progress(0.6, desc="Making summary")
28
  data_to_save = []
29
  for paper in os.listdir(local_saved_papers):
@@ -33,12 +35,12 @@ def plagiarism_checker(authors_name: str,number_of_results=5, progress=gr.Progre
33
  else:
34
  print(f"Found similar author in the database: {db_author_name}")
35
  data = fetch_papers_data(db_author_name)
36
- reamining_paper_ids = compare_paper_ids(data,paper_ids)
37
  progress(0.6, desc="Making summary")
38
  data_to_save = []
39
- if reamining_paper_ids != []:
40
- author_obj.get_paper_details_batch(paper_ids=reamining_paper_ids, path="./data/papers")
41
- local_saved_papers = os.path.join(os.getcwd(), "data", "papers", authors_name.replace(" ", "_"))
42
  for paper in os.listdir(local_saved_papers):
43
  paper_path = os.path.join(local_saved_papers, paper)
44
  with open(paper_path, "r") as f:
@@ -47,14 +49,16 @@ def plagiarism_checker(authors_name: str,number_of_results=5, progress=gr.Progre
47
  print("All papers already present in the database")
48
 
49
  progress(0.8, desc="Saving to Database")
50
- insert_papers_data(data_to_save, authors_name)
51
  return "Fetched Latest Papers"
52
 
53
  def fetch_papers_data_df(authors_name: str, progress=gr.Progress()):
54
  progress(0.2, desc="Fetching Papers")
55
- fetched_data = fetch_papers_data(authors_name,all=True)
 
 
56
  progress(0.8, desc="Making DataFrame")
57
- return pd.DataFrame(fetched_data[1])
58
 
59
  with gr.Blocks() as demo:
60
 
@@ -63,18 +67,17 @@ with gr.Blocks() as demo:
63
  authors_name_paper = gr.Textbox(label="Enter Author's Name")
64
  submit_button_tab_2 = gr.Button("Start")
65
  with gr.Row():
66
- dataframe_output = gr.Dataframe(headers=['doi_no', 'title', 'summary', 'authors', 'year', 'pdf_link',
67
  'references', 'categories', 'comment', 'journal_ref', 'source',
68
- 'primary_category', 'published','author_name'])
69
 
70
  with gr.Tab("Arxiv Plagiarism Fetcher & Save to DB"):
71
  with gr.Row():
72
- authors_name = gr.Textbox(label="Enter Author's Name")
73
- number_of_results = gr.Number(label="Number of results - Min - 5")
74
  submit_button_tab_1 = gr.Button("Start")
75
  with gr.Row():
76
  completed = gr.Textbox(label="Completed")
77
-
78
 
79
  with gr.Tab("Arxiv Plagiarism Checker"):
80
  with gr.Row():
@@ -82,8 +85,7 @@ with gr.Blocks() as demo:
82
  number_of_results = gr.Number(label="Number of results - Min - 5")
83
  submit_button = gr.Button("Start")
84
 
85
-
86
- submit_button_tab_1.click(fn=plagiarism_checker,inputs=[authors_name, number_of_results] ,outputs= completed)
87
  submit_button_tab_2.click(fn=fetch_papers_data_df,inputs=[authors_name_paper] ,outputs=dataframe_output)
88
 
89
  demo.launch()
 
13
  author_obj.get_paper_details_batch(paper_ids=paper_ids, path="./data/papers")
14
  """
15
 
16
+ def plagiarism_checker(authors_name_fetch,number_of_results_fetch, progress=gr.Progress()):
17
+ number_of_results_fetch = int(number_of_results_fetch)
18
+ print(authors_name_fetch,number_of_results_fetch,type(number_of_results_fetch))
19
  progress(0.2, desc="Collecting Links")
20
+ author_obj = ArxivPaper(authors_name_fetch)
21
+ db_author_name = get_correct_author_name(authors_name_fetch)
22
+ paper_links = author_obj.get_results_google(number_of_results=number_of_results_fetch)
23
  paper_ids = author_obj.get_paper_id(paper_links)
24
  progress(0.4, desc="Collecting Papers")
25
  if db_author_name is None:
26
  print("No similar author found in the database")
27
  author_obj.get_paper_details_batch(paper_ids=paper_ids, path="./data/papers")
28
+ local_saved_papers = os.path.join(os.getcwd(), "data", "papers", authors_name_fetch.replace(" ", "_"))
29
  progress(0.6, desc="Making summary")
30
  data_to_save = []
31
  for paper in os.listdir(local_saved_papers):
 
35
  else:
36
  print(f"Found similar author in the database: {db_author_name}")
37
  data = fetch_papers_data(db_author_name)
38
+ remaining_paper_ids = compare_paper_ids(data,paper_ids)
39
  progress(0.6, desc="Making summary")
40
  data_to_save = []
41
+ if remaining_paper_ids != []:
42
+ author_obj.get_paper_details_batch(paper_ids=remaining_paper_ids, path="./data/papers")
43
+ local_saved_papers = os.path.join(os.getcwd(), "data", "papers", authors_name_fetch.replace(" ", "_"))
44
  for paper in os.listdir(local_saved_papers):
45
  paper_path = os.path.join(local_saved_papers, paper)
46
  with open(paper_path, "r") as f:
 
49
  print("All papers already present in the database")
50
 
51
  progress(0.8, desc="Saving to Database")
52
+ insert_papers_data(data_to_save, authors_name_fetch)
53
  return "Fetched Latest Papers"
54
 
55
  def fetch_papers_data_df(authors_name: str, progress=gr.Progress()):
56
  progress(0.2, desc="Fetching Papers")
57
+ fetched_data = fetch_papers_data(authors_name,fields_to_query=['doi_no', 'author_name', 'title', 'authors', 'year', 'pdf_link',
58
+ 'references', 'categories', 'comment', 'journal_ref', 'source',
59
+ 'summary', 'published'])
60
  progress(0.8, desc="Making DataFrame")
61
+ return pd.DataFrame(fetched_data)
62
 
63
  with gr.Blocks() as demo:
64
 
 
67
  authors_name_paper = gr.Textbox(label="Enter Author's Name")
68
  submit_button_tab_2 = gr.Button("Start")
69
  with gr.Row():
70
+ dataframe_output = gr.Dataframe(headers=['doi_no', 'author_name', 'title', 'authors', 'year', 'pdf_link',
71
  'references', 'categories', 'comment', 'journal_ref', 'source',
72
+ 'summary', 'published'])
73
 
74
  with gr.Tab("Arxiv Plagiarism Fetcher & Save to DB"):
75
  with gr.Row():
76
+ authors_name_fetch = gr.Textbox(label="Enter Author's Name")
77
+ number_of_results_fetch = gr.Textbox(label="Number of results - Min - 5")
78
  submit_button_tab_1 = gr.Button("Start")
79
  with gr.Row():
80
  completed = gr.Textbox(label="Completed")
 
81
 
82
  with gr.Tab("Arxiv Plagiarism Checker"):
83
  with gr.Row():
 
85
  number_of_results = gr.Number(label="Number of results - Min - 5")
86
  submit_button = gr.Button("Start")
87
 
88
+ submit_button_tab_1.click(fn=plagiarism_checker,inputs=[authors_name_fetch, number_of_results_fetch] ,outputs= completed)
 
89
  submit_button_tab_2.click(fn=fetch_papers_data_df,inputs=[authors_name_paper] ,outputs=dataframe_output)
90
 
91
  demo.launch()