eljanmahammadli commited on
Commit
03fd59b
·
1 Parent(s): c45480c
.gitattributes CHANGED
File without changes
.gitignore CHANGED
@@ -1 +1,3 @@
1
- _pycache_/
 
 
 
1
+ _pycache_
2
+ .env
3
+ nohup.out
README.md CHANGED
File without changes
__pycache__/ai_generate.cpython-310.pyc DELETED
Binary file (1.87 kB)
 
__pycache__/ai_generate.cpython-39.pyc CHANGED
Binary files a/__pycache__/ai_generate.cpython-39.pyc and b/__pycache__/ai_generate.cpython-39.pyc differ
 
__pycache__/app.cpython-39.pyc ADDED
Binary file (19.2 kB). View file
 
__pycache__/gptzero_free.cpython-310.pyc DELETED
Binary file (3.58 kB)
 
__pycache__/gptzero_free.cpython-39.pyc DELETED
Binary file (3.58 kB)
 
__pycache__/humanize.cpython-310.pyc DELETED
Binary file (2.46 kB)
 
__pycache__/humanize.cpython-39.pyc CHANGED
Binary files a/__pycache__/humanize.cpython-39.pyc and b/__pycache__/humanize.cpython-39.pyc differ
 
ai_generate.py CHANGED
@@ -3,13 +3,57 @@ from openai import OpenAI
3
  import os
4
  from transformers import pipeline
5
  from groq import Groq
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  groq_client = Groq(
8
- api_key=os.environ.get("groq_key"),
9
  )
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- def generate_groq(text, model):
 
13
  completion = groq_client.chat.completions.create(
14
  model=model,
15
  messages=[
@@ -31,6 +75,12 @@ def generate_groq(text, model):
31
  response += chunk.choices[0].delta.content or ""
32
  return response
33
 
 
 
 
 
 
 
34
 
35
  def generate_openai(text, model, openai_client):
36
  message = [{"role": "user", "content": text}]
@@ -40,15 +90,15 @@ def generate_openai(text, model, openai_client):
40
  return response.choices[0].message.content
41
 
42
 
43
- def generate(text, model, api):
44
  if model == "Llama 3":
45
- return generate_groq(text, "llama3-70b-8192")
46
  elif model == "Groq":
47
- return generate_groq(text, "llama3-groq-70b-8192-tool-use-preview")
48
  elif model == "Mistral":
49
- return generate_groq(text, "mixtral-8x7b-32768")
50
  elif model == "Gemma":
51
- return generate_groq(text, "gemma2-9b-it")
52
  elif model == "OpenAI GPT 3.5":
53
  try:
54
  openai_client = OpenAI(api_key=api)
 
3
  import os
4
  from transformers import pipeline
5
  from groq import Groq
6
+ from langchain_community.document_loaders import PyMuPDFLoader
7
+ from langchain_community.document_loaders import TextLoader
8
+ from langchain_community.embeddings.sentence_transformer import (
9
+ SentenceTransformerEmbeddings,
10
+ )
11
+ from langchain_community.vectorstores import Chroma
12
+ from langchain_text_splitters import CharacterTextSplitter
13
+ from langchain import hub
14
+ from langchain_core.output_parsers import StrOutputParser
15
+ from langchain_core.runnables import RunnablePassthrough
16
+ from langchain.chains import RetrievalQA
17
+ from langchain_groq import ChatGroq
18
+ from dotenv import load_dotenv
19
+ load_dotenv()
20
 
21
  groq_client = Groq(
22
+ api_key=os.environ.get("GROQ_API_KEY"),
23
  )
24
 
25
+ def create_db_with_langchain(path):
26
+ loader = PyMuPDFLoader(path)
27
+ data = loader.load()
28
+ # split it into chunks
29
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
30
+ docs = text_splitter.split_documents(data)
31
+
32
+ # create the open-source embedding function
33
+ embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
34
+
35
+ # load it into Chroma
36
+ db = Chroma.from_documents(docs, embedding_function)
37
+ return db
38
+
39
+
40
+ def generate_groq_rag(text, model, path):
41
+ llm = ChatGroq(
42
+ temperature=0,
43
+ model_name=model,
44
+ )
45
+ db = create_db_with_langchain(path)
46
+ retriever = db.as_retriever(search_type="mmr", search_kwargs={"k": 4, "fetch_k": 20})
47
+ prompt = hub.pull("rlm/rag-prompt")
48
+
49
+ def format_docs(docs):
50
+ return "\n\n".join(doc.page_content for doc in docs)
51
+
52
+ rag_chain = {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | llm
53
+ return rag_chain.invoke(text).content
54
 
55
+
56
+ def generate_groq_base(text, model):
57
  completion = groq_client.chat.completions.create(
58
  model=model,
59
  messages=[
 
75
  response += chunk.choices[0].delta.content or ""
76
  return response
77
 
78
+ def generate_groq(text, model, path):
79
+ if path:
80
+ return generate_groq_rag(text, model, path)
81
+ else:
82
+ return generate_groq_base(text, model)
83
+
84
 
85
  def generate_openai(text, model, openai_client):
86
  message = [{"role": "user", "content": text}]
 
90
  return response.choices[0].message.content
91
 
92
 
93
+ def generate(text, model, path, api):
94
  if model == "Llama 3":
95
+ return generate_groq(text, "llama3-70b-8192", path)
96
  elif model == "Groq":
97
+ return generate_groq(text, "llama3-groq-70b-8192-tool-use-preview", path)
98
  elif model == "Mistral":
99
+ return generate_groq(text, "mixtral-8x7b-32768", path)
100
  elif model == "Gemma":
101
+ return generate_groq(text, "gemma2-9b-it", path)
102
  elif model == "OpenAI GPT 3.5":
103
  try:
104
  openai_client = OpenAI(api_key=api)
app.py CHANGED
@@ -5,7 +5,7 @@ import re
5
  from humanize import paraphrase_text
6
  from ai_generate import generate
7
  import requests
8
- import language_tool_python
9
  import torch
10
  from gradio_client import Client
11
  from transformers import GPT2LMHeadModel, GPT2TokenizerFast
@@ -287,6 +287,7 @@ def generate_article(
287
  ai_model: str,
288
  content_string: str,
289
  api_key: str = None,
 
290
  generated_article: str = None,
291
  user_comments: str = None,
292
  ) -> str:
@@ -332,7 +333,7 @@ def generate_article(
332
  )
333
  article = response.choices[0].message.content.strip()
334
  else:
335
- article = generate(prompt, ai_model, api_key)
336
 
337
  return clean_text(article)
338
 
@@ -414,6 +415,7 @@ def generate_and_format(
414
  month_to,
415
  day_to,
416
  domains_to_include,
 
417
  generated_article: str = None,
418
  user_comments: str = None,
419
  ):
@@ -444,6 +446,7 @@ def generate_and_format(
444
  ai_model,
445
  content_string,
446
  api_key,
 
447
  generated_article,
448
  user_comments,
449
  )
@@ -589,11 +592,9 @@ def create_interface():
589
  elem_classes="input-highlight-turquoise",
590
  )
591
  gr.Markdown("# Search Options", elem_classes="text-center text-3xl mb-6")
592
- with gr.Group():
593
- with gr.Row():
594
- google_search_check = gr.Checkbox(
595
- label="Enable Google Search For Recent Sources", value=True
596
- )
597
  with gr.Row():
598
  month_from = gr.Dropdown(
599
  choices=months,
@@ -621,6 +622,8 @@ def create_interface():
621
  multiselect=True,
622
  label="Domains To Include",
623
  )
 
 
624
 
625
  with gr.Group():
626
  gr.Markdown("## AI Model Configuration", elem_classes="text-xl mb-4")
@@ -681,7 +684,7 @@ def create_interface():
681
  humanized_output = gr.Markdown(label="Humanized Article", value="\n\n\n\n", render=True)
682
  copy_to_input_btn = gr.Button("Copy to Input for AI Check")
683
 
684
- def become_visible(text):
685
  if text:
686
  return gr.update(visible=True)
687
  else:
@@ -693,9 +696,16 @@ def create_interface():
693
  else:
694
  return gr.update(visible=False)
695
 
 
 
 
 
 
 
 
696
  ai_detector_dropdown.change(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
697
- output_article.change(become_visible, inputs=output_article, outputs=ai_comments)
698
- ai_comments.change(become_visible, inputs=output_article, outputs=regenerate_btn)
699
  ai_check_btn.click(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
700
 
701
  generate_btn.click(
@@ -724,6 +734,7 @@ def create_interface():
724
  month_to,
725
  day_to,
726
  domains_to_include,
 
727
  ],
728
  outputs=[output_article],
729
  )
@@ -754,6 +765,7 @@ def create_interface():
754
  month_to,
755
  day_to,
756
  domains_to_include,
 
757
  output_article,
758
  ai_comments,
759
  ],
@@ -779,6 +791,12 @@ def create_interface():
779
  outputs=[humanized_output],
780
  )
781
 
 
 
 
 
 
 
782
  copy_to_input_btn.click(
783
  fn=copy_to_input,
784
  inputs=[humanized_output],
@@ -790,5 +808,5 @@ def create_interface():
790
 
791
  if __name__ == "__main__":
792
  demo = create_interface()
793
- demo.launch(server_name="0.0.0.0", share=True, server_port=7890)
794
- # demo.launch(server_name="0.0.0.0")
 
5
  from humanize import paraphrase_text
6
  from ai_generate import generate
7
  import requests
8
+ import language_tool_python
9
  import torch
10
  from gradio_client import Client
11
  from transformers import GPT2LMHeadModel, GPT2TokenizerFast
 
287
  ai_model: str,
288
  content_string: str,
289
  api_key: str = None,
290
+ pdf_file_input=None,
291
  generated_article: str = None,
292
  user_comments: str = None,
293
  ) -> str:
 
333
  )
334
  article = response.choices[0].message.content.strip()
335
  else:
336
+ article = generate(prompt, ai_model, pdf_file_input, api_key)
337
 
338
  return clean_text(article)
339
 
 
415
  month_to,
416
  day_to,
417
  domains_to_include,
418
+ pdf_file_input,
419
  generated_article: str = None,
420
  user_comments: str = None,
421
  ):
 
446
  ai_model,
447
  content_string,
448
  api_key,
449
+ pdf_file_input,
450
  generated_article,
451
  user_comments,
452
  )
 
592
  elem_classes="input-highlight-turquoise",
593
  )
594
  gr.Markdown("# Search Options", elem_classes="text-center text-3xl mb-6")
595
+ with gr.Row():
596
+ google_search_check = gr.Checkbox(label="Enable Google Search For Recent Sources", value=True)
597
+ with gr.Group(visible=False) as search_options:
 
 
598
  with gr.Row():
599
  month_from = gr.Dropdown(
600
  choices=months,
 
622
  multiselect=True,
623
  label="Domains To Include",
624
  )
625
+ gr.Markdown("# Add Optional PDF File with Information", elem_classes="text-center text-3xl mb-6")
626
+ pdf_file_input = gr.File(label="Upload PDF")
627
 
628
  with gr.Group():
629
  gr.Markdown("## AI Model Configuration", elem_classes="text-xl mb-4")
 
684
  humanized_output = gr.Markdown(label="Humanized Article", value="\n\n\n\n", render=True)
685
  copy_to_input_btn = gr.Button("Copy to Input for AI Check")
686
 
687
+ def regenerate_visible(text):
688
  if text:
689
  return gr.update(visible=True)
690
  else:
 
696
  else:
697
  return gr.update(visible=False)
698
 
699
+ def search_visible(toggle):
700
+ if toggle:
701
+ return gr.update(visible=True)
702
+ else:
703
+ return gr.update(visible=False)
704
+
705
+ google_search_check.change(search_visible, inputs=google_search_check, outputs=search_options)
706
  ai_detector_dropdown.change(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
707
+ output_article.change(regenerate_visible, inputs=output_article, outputs=ai_comments)
708
+ ai_comments.change(regenerate_visible, inputs=output_article, outputs=regenerate_btn)
709
  ai_check_btn.click(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
710
 
711
  generate_btn.click(
 
734
  month_to,
735
  day_to,
736
  domains_to_include,
737
+ pdf_file_input,
738
  ],
739
  outputs=[output_article],
740
  )
 
765
  month_to,
766
  day_to,
767
  domains_to_include,
768
+ pdf_file_input,
769
  output_article,
770
  ai_comments,
771
  ],
 
791
  outputs=[humanized_output],
792
  )
793
 
794
+
795
+
796
+
797
+
798
+
799
+
800
  copy_to_input_btn.click(
801
  fn=copy_to_input,
802
  inputs=[humanized_output],
 
808
 
809
  if __name__ == "__main__":
810
  demo = create_interface()
811
+ # demo.launch(server_name="0.0.0.0", share=True, server_port=7890)
812
+ demo.launch(server_name="0.0.0.0")
gptzero_free.py CHANGED
File without changes
humanize.py CHANGED
@@ -18,6 +18,8 @@ else:
18
  print("CUDA is not available. Using CPU instead.")
19
  device = torch.device("cpu")
20
 
 
 
21
  # Configuration for models and their adapters
22
  model_config = {
23
  "Base Model": "polygraf-ai/poly-humanizer-base",
 
18
  print("CUDA is not available. Using CPU instead.")
19
  device = torch.device("cpu")
20
 
21
+
22
+
23
  # Configuration for models and their adapters
24
  model_config = {
25
  "Base Model": "polygraf-ai/poly-humanizer-base",
nohup.out CHANGED
The diff for this file is too large to render. See raw diff
 
packages.txt CHANGED
File without changes
plagiarism.py CHANGED
File without changes
requirements.txt CHANGED
@@ -12,4 +12,11 @@ Unidecode
12
  BeautifulSoup4
13
  google-api-python-client
14
  newspaper3k
15
- jusText
 
 
 
 
 
 
 
 
12
  BeautifulSoup4
13
  google-api-python-client
14
  newspaper3k
15
+ jusText
16
+ langchain-groq
17
+ langchainhub
18
+ sentence-transformers
19
+ langchain-community
20
+ pymupdf
21
+ chromadb
22
+ language-tool-python
test.py CHANGED
File without changes
utils.py CHANGED
File without changes