gamingflexer commited on
Commit
80ed9e0
·
1 Parent(s): dc3eeb1

dummy file updated & Gitingonre

Browse files
.gitignore CHANGED
@@ -162,4 +162,6 @@ src/flagged/log.csv
162
  .vscode/PythonImportHelper-v2-Completion.json
163
  notebooks/*.pdf
164
  notebooks/notebooks/papers/*.jsonl
165
- src/data/papers/*.jsonl
 
 
 
162
  .vscode/PythonImportHelper-v2-Completion.json
163
  notebooks/*.pdf
164
  notebooks/notebooks/papers/*.jsonl
165
+ src/data/papers/*.jsonl
166
+ src/data/papers/*
167
+ src/notebooks/papers/*.jsonl
src/app.py CHANGED
@@ -6,8 +6,6 @@ from config import *
6
  from db.db_functions import get_correct_author_name, insert_papers_data, fetch_papers_data, get_unquine_authors
7
  from utils import compare_paper_ids
8
 
9
- unique_authors_df = get_unquine_authors()
10
-
11
  def plagiarism_checker(authors_name_fetch,number_of_results_fetch, progress=gr.Progress()):
12
  number_of_results_fetch = int(number_of_results_fetch)
13
  print(authors_name_fetch,number_of_results_fetch,type(number_of_results_fetch))
@@ -69,7 +67,7 @@ with gr.Blocks() as demo:
69
  'references', 'categories', 'comment', 'journal_ref', 'source',
70
  'summary', 'published'])
71
  with gr.Row():
72
- unquine_authors_output = gr.Dataframe(headers=["author_name"],value=unique_authors_df, label=" Authors Currently in our DB")
73
 
74
 
75
  with gr.Tab("Arxiv Plagiarism Fetcher & Save to DB"):
 
6
  from db.db_functions import get_correct_author_name, insert_papers_data, fetch_papers_data, get_unquine_authors
7
  from utils import compare_paper_ids
8
 
 
 
9
  def plagiarism_checker(authors_name_fetch,number_of_results_fetch, progress=gr.Progress()):
10
  number_of_results_fetch = int(number_of_results_fetch)
11
  print(authors_name_fetch,number_of_results_fetch,type(number_of_results_fetch))
 
67
  'references', 'categories', 'comment', 'journal_ref', 'source',
68
  'summary', 'published'])
69
  with gr.Row():
70
+ unquine_authors_output = gr.Dataframe(headers=["author_name"],value=get_unquine_authors(), label=" Authors Currently in our DB")
71
 
72
 
73
  with gr.Tab("Arxiv Plagiarism Fetcher & Save to DB"):
src/db/vector_fucntions.py ADDED
File without changes
src/scrapper/articles.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import AsyncChromiumLoader
2
+ from langchain_community.document_transformers import BeautifulSoupTransformer
3
+
4
+ class GetSimilarArticles:
5
+
6
+ def __init__(self, paper_title_name: str):
7
+ self.paper_title_name = paper_title_name
8
+