Spaces:
Runtime error
Runtime error
eljanmahammadli
commited on
Commit
·
03fd59b
1
Parent(s):
c45480c
added RAG
Browse files- .gitattributes +0 -0
- .gitignore +3 -1
- README.md +0 -0
- __pycache__/ai_generate.cpython-310.pyc +0 -0
- __pycache__/ai_generate.cpython-39.pyc +0 -0
- __pycache__/app.cpython-39.pyc +0 -0
- __pycache__/gptzero_free.cpython-310.pyc +0 -0
- __pycache__/gptzero_free.cpython-39.pyc +0 -0
- __pycache__/humanize.cpython-310.pyc +0 -0
- __pycache__/humanize.cpython-39.pyc +0 -0
- ai_generate.py +57 -7
- app.py +30 -12
- gptzero_free.py +0 -0
- humanize.py +2 -0
- nohup.out +0 -0
- packages.txt +0 -0
- plagiarism.py +0 -0
- requirements.txt +8 -1
- test.py +0 -0
- utils.py +0 -0
.gitattributes
CHANGED
File without changes
|
.gitignore
CHANGED
@@ -1 +1,3 @@
|
|
1 |
-
_pycache_
|
|
|
|
|
|
1 |
+
_pycache_
|
2 |
+
.env
|
3 |
+
nohup.out
|
README.md
CHANGED
File without changes
|
__pycache__/ai_generate.cpython-310.pyc
DELETED
Binary file (1.87 kB)
|
|
__pycache__/ai_generate.cpython-39.pyc
CHANGED
Binary files a/__pycache__/ai_generate.cpython-39.pyc and b/__pycache__/ai_generate.cpython-39.pyc differ
|
|
__pycache__/app.cpython-39.pyc
ADDED
Binary file (19.2 kB). View file
|
|
__pycache__/gptzero_free.cpython-310.pyc
DELETED
Binary file (3.58 kB)
|
|
__pycache__/gptzero_free.cpython-39.pyc
DELETED
Binary file (3.58 kB)
|
|
__pycache__/humanize.cpython-310.pyc
DELETED
Binary file (2.46 kB)
|
|
__pycache__/humanize.cpython-39.pyc
CHANGED
Binary files a/__pycache__/humanize.cpython-39.pyc and b/__pycache__/humanize.cpython-39.pyc differ
|
|
ai_generate.py
CHANGED
@@ -3,13 +3,57 @@ from openai import OpenAI
|
|
3 |
import os
|
4 |
from transformers import pipeline
|
5 |
from groq import Groq
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
groq_client = Groq(
|
8 |
-
api_key=os.environ.get("
|
9 |
)
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
-
|
|
|
13 |
completion = groq_client.chat.completions.create(
|
14 |
model=model,
|
15 |
messages=[
|
@@ -31,6 +75,12 @@ def generate_groq(text, model):
|
|
31 |
response += chunk.choices[0].delta.content or ""
|
32 |
return response
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
def generate_openai(text, model, openai_client):
|
36 |
message = [{"role": "user", "content": text}]
|
@@ -40,15 +90,15 @@ def generate_openai(text, model, openai_client):
|
|
40 |
return response.choices[0].message.content
|
41 |
|
42 |
|
43 |
-
def generate(text, model, api):
|
44 |
if model == "Llama 3":
|
45 |
-
return generate_groq(text, "llama3-70b-8192")
|
46 |
elif model == "Groq":
|
47 |
-
return generate_groq(text, "llama3-groq-70b-8192-tool-use-preview")
|
48 |
elif model == "Mistral":
|
49 |
-
return generate_groq(text, "mixtral-8x7b-32768")
|
50 |
elif model == "Gemma":
|
51 |
-
return generate_groq(text, "gemma2-9b-it")
|
52 |
elif model == "OpenAI GPT 3.5":
|
53 |
try:
|
54 |
openai_client = OpenAI(api_key=api)
|
|
|
3 |
import os
|
4 |
from transformers import pipeline
|
5 |
from groq import Groq
|
6 |
+
from langchain_community.document_loaders import PyMuPDFLoader
|
7 |
+
from langchain_community.document_loaders import TextLoader
|
8 |
+
from langchain_community.embeddings.sentence_transformer import (
|
9 |
+
SentenceTransformerEmbeddings,
|
10 |
+
)
|
11 |
+
from langchain_community.vectorstores import Chroma
|
12 |
+
from langchain_text_splitters import CharacterTextSplitter
|
13 |
+
from langchain import hub
|
14 |
+
from langchain_core.output_parsers import StrOutputParser
|
15 |
+
from langchain_core.runnables import RunnablePassthrough
|
16 |
+
from langchain.chains import RetrievalQA
|
17 |
+
from langchain_groq import ChatGroq
|
18 |
+
from dotenv import load_dotenv
|
19 |
+
load_dotenv()
|
20 |
|
21 |
groq_client = Groq(
|
22 |
+
api_key=os.environ.get("GROQ_API_KEY"),
|
23 |
)
|
24 |
|
25 |
+
def create_db_with_langchain(path):
|
26 |
+
loader = PyMuPDFLoader(path)
|
27 |
+
data = loader.load()
|
28 |
+
# split it into chunks
|
29 |
+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
30 |
+
docs = text_splitter.split_documents(data)
|
31 |
+
|
32 |
+
# create the open-source embedding function
|
33 |
+
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
|
34 |
+
|
35 |
+
# load it into Chroma
|
36 |
+
db = Chroma.from_documents(docs, embedding_function)
|
37 |
+
return db
|
38 |
+
|
39 |
+
|
40 |
+
def generate_groq_rag(text, model, path):
|
41 |
+
llm = ChatGroq(
|
42 |
+
temperature=0,
|
43 |
+
model_name=model,
|
44 |
+
)
|
45 |
+
db = create_db_with_langchain(path)
|
46 |
+
retriever = db.as_retriever(search_type="mmr", search_kwargs={"k": 4, "fetch_k": 20})
|
47 |
+
prompt = hub.pull("rlm/rag-prompt")
|
48 |
+
|
49 |
+
def format_docs(docs):
|
50 |
+
return "\n\n".join(doc.page_content for doc in docs)
|
51 |
+
|
52 |
+
rag_chain = {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | llm
|
53 |
+
return rag_chain.invoke(text).content
|
54 |
|
55 |
+
|
56 |
+
def generate_groq_base(text, model):
|
57 |
completion = groq_client.chat.completions.create(
|
58 |
model=model,
|
59 |
messages=[
|
|
|
75 |
response += chunk.choices[0].delta.content or ""
|
76 |
return response
|
77 |
|
78 |
+
def generate_groq(text, model, path):
|
79 |
+
if path:
|
80 |
+
return generate_groq_rag(text, model, path)
|
81 |
+
else:
|
82 |
+
return generate_groq_base(text, model)
|
83 |
+
|
84 |
|
85 |
def generate_openai(text, model, openai_client):
|
86 |
message = [{"role": "user", "content": text}]
|
|
|
90 |
return response.choices[0].message.content
|
91 |
|
92 |
|
93 |
+
def generate(text, model, path, api):
|
94 |
if model == "Llama 3":
|
95 |
+
return generate_groq(text, "llama3-70b-8192", path)
|
96 |
elif model == "Groq":
|
97 |
+
return generate_groq(text, "llama3-groq-70b-8192-tool-use-preview", path)
|
98 |
elif model == "Mistral":
|
99 |
+
return generate_groq(text, "mixtral-8x7b-32768", path)
|
100 |
elif model == "Gemma":
|
101 |
+
return generate_groq(text, "gemma2-9b-it", path)
|
102 |
elif model == "OpenAI GPT 3.5":
|
103 |
try:
|
104 |
openai_client = OpenAI(api_key=api)
|
app.py
CHANGED
@@ -5,7 +5,7 @@ import re
|
|
5 |
from humanize import paraphrase_text
|
6 |
from ai_generate import generate
|
7 |
import requests
|
8 |
-
import language_tool_python
|
9 |
import torch
|
10 |
from gradio_client import Client
|
11 |
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
|
@@ -287,6 +287,7 @@ def generate_article(
|
|
287 |
ai_model: str,
|
288 |
content_string: str,
|
289 |
api_key: str = None,
|
|
|
290 |
generated_article: str = None,
|
291 |
user_comments: str = None,
|
292 |
) -> str:
|
@@ -332,7 +333,7 @@ def generate_article(
|
|
332 |
)
|
333 |
article = response.choices[0].message.content.strip()
|
334 |
else:
|
335 |
-
article = generate(prompt, ai_model, api_key)
|
336 |
|
337 |
return clean_text(article)
|
338 |
|
@@ -414,6 +415,7 @@ def generate_and_format(
|
|
414 |
month_to,
|
415 |
day_to,
|
416 |
domains_to_include,
|
|
|
417 |
generated_article: str = None,
|
418 |
user_comments: str = None,
|
419 |
):
|
@@ -444,6 +446,7 @@ def generate_and_format(
|
|
444 |
ai_model,
|
445 |
content_string,
|
446 |
api_key,
|
|
|
447 |
generated_article,
|
448 |
user_comments,
|
449 |
)
|
@@ -589,11 +592,9 @@ def create_interface():
|
|
589 |
elem_classes="input-highlight-turquoise",
|
590 |
)
|
591 |
gr.Markdown("# Search Options", elem_classes="text-center text-3xl mb-6")
|
592 |
-
with gr.
|
593 |
-
|
594 |
-
|
595 |
-
label="Enable Google Search For Recent Sources", value=True
|
596 |
-
)
|
597 |
with gr.Row():
|
598 |
month_from = gr.Dropdown(
|
599 |
choices=months,
|
@@ -621,6 +622,8 @@ def create_interface():
|
|
621 |
multiselect=True,
|
622 |
label="Domains To Include",
|
623 |
)
|
|
|
|
|
624 |
|
625 |
with gr.Group():
|
626 |
gr.Markdown("## AI Model Configuration", elem_classes="text-xl mb-4")
|
@@ -681,7 +684,7 @@ def create_interface():
|
|
681 |
humanized_output = gr.Markdown(label="Humanized Article", value="\n\n\n\n", render=True)
|
682 |
copy_to_input_btn = gr.Button("Copy to Input for AI Check")
|
683 |
|
684 |
-
def
|
685 |
if text:
|
686 |
return gr.update(visible=True)
|
687 |
else:
|
@@ -693,9 +696,16 @@ def create_interface():
|
|
693 |
else:
|
694 |
return gr.update(visible=False)
|
695 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
696 |
ai_detector_dropdown.change(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
|
697 |
-
output_article.change(
|
698 |
-
ai_comments.change(
|
699 |
ai_check_btn.click(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
|
700 |
|
701 |
generate_btn.click(
|
@@ -724,6 +734,7 @@ def create_interface():
|
|
724 |
month_to,
|
725 |
day_to,
|
726 |
domains_to_include,
|
|
|
727 |
],
|
728 |
outputs=[output_article],
|
729 |
)
|
@@ -754,6 +765,7 @@ def create_interface():
|
|
754 |
month_to,
|
755 |
day_to,
|
756 |
domains_to_include,
|
|
|
757 |
output_article,
|
758 |
ai_comments,
|
759 |
],
|
@@ -779,6 +791,12 @@ def create_interface():
|
|
779 |
outputs=[humanized_output],
|
780 |
)
|
781 |
|
|
|
|
|
|
|
|
|
|
|
|
|
782 |
copy_to_input_btn.click(
|
783 |
fn=copy_to_input,
|
784 |
inputs=[humanized_output],
|
@@ -790,5 +808,5 @@ def create_interface():
|
|
790 |
|
791 |
if __name__ == "__main__":
|
792 |
demo = create_interface()
|
793 |
-
demo.launch(server_name="0.0.0.0", share=True, server_port=7890)
|
794 |
-
|
|
|
5 |
from humanize import paraphrase_text
|
6 |
from ai_generate import generate
|
7 |
import requests
|
8 |
+
import language_tool_python
|
9 |
import torch
|
10 |
from gradio_client import Client
|
11 |
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
|
|
|
287 |
ai_model: str,
|
288 |
content_string: str,
|
289 |
api_key: str = None,
|
290 |
+
pdf_file_input=None,
|
291 |
generated_article: str = None,
|
292 |
user_comments: str = None,
|
293 |
) -> str:
|
|
|
333 |
)
|
334 |
article = response.choices[0].message.content.strip()
|
335 |
else:
|
336 |
+
article = generate(prompt, ai_model, pdf_file_input, api_key)
|
337 |
|
338 |
return clean_text(article)
|
339 |
|
|
|
415 |
month_to,
|
416 |
day_to,
|
417 |
domains_to_include,
|
418 |
+
pdf_file_input,
|
419 |
generated_article: str = None,
|
420 |
user_comments: str = None,
|
421 |
):
|
|
|
446 |
ai_model,
|
447 |
content_string,
|
448 |
api_key,
|
449 |
+
pdf_file_input,
|
450 |
generated_article,
|
451 |
user_comments,
|
452 |
)
|
|
|
592 |
elem_classes="input-highlight-turquoise",
|
593 |
)
|
594 |
gr.Markdown("# Search Options", elem_classes="text-center text-3xl mb-6")
|
595 |
+
with gr.Row():
|
596 |
+
google_search_check = gr.Checkbox(label="Enable Google Search For Recent Sources", value=True)
|
597 |
+
with gr.Group(visible=False) as search_options:
|
|
|
|
|
598 |
with gr.Row():
|
599 |
month_from = gr.Dropdown(
|
600 |
choices=months,
|
|
|
622 |
multiselect=True,
|
623 |
label="Domains To Include",
|
624 |
)
|
625 |
+
gr.Markdown("# Add Optional PDF File with Information", elem_classes="text-center text-3xl mb-6")
|
626 |
+
pdf_file_input = gr.File(label="Upload PDF")
|
627 |
|
628 |
with gr.Group():
|
629 |
gr.Markdown("## AI Model Configuration", elem_classes="text-xl mb-4")
|
|
|
684 |
humanized_output = gr.Markdown(label="Humanized Article", value="\n\n\n\n", render=True)
|
685 |
copy_to_input_btn = gr.Button("Copy to Input for AI Check")
|
686 |
|
687 |
+
def regenerate_visible(text):
|
688 |
if text:
|
689 |
return gr.update(visible=True)
|
690 |
else:
|
|
|
696 |
else:
|
697 |
return gr.update(visible=False)
|
698 |
|
699 |
+
def search_visible(toggle):
|
700 |
+
if toggle:
|
701 |
+
return gr.update(visible=True)
|
702 |
+
else:
|
703 |
+
return gr.update(visible=False)
|
704 |
+
|
705 |
+
google_search_check.change(search_visible, inputs=google_search_check, outputs=search_options)
|
706 |
ai_detector_dropdown.change(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
|
707 |
+
output_article.change(regenerate_visible, inputs=output_article, outputs=ai_comments)
|
708 |
+
ai_comments.change(regenerate_visible, inputs=output_article, outputs=regenerate_btn)
|
709 |
ai_check_btn.click(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
|
710 |
|
711 |
generate_btn.click(
|
|
|
734 |
month_to,
|
735 |
day_to,
|
736 |
domains_to_include,
|
737 |
+
pdf_file_input,
|
738 |
],
|
739 |
outputs=[output_article],
|
740 |
)
|
|
|
765 |
month_to,
|
766 |
day_to,
|
767 |
domains_to_include,
|
768 |
+
pdf_file_input,
|
769 |
output_article,
|
770 |
ai_comments,
|
771 |
],
|
|
|
791 |
outputs=[humanized_output],
|
792 |
)
|
793 |
|
794 |
+
|
795 |
+
|
796 |
+
|
797 |
+
|
798 |
+
|
799 |
+
|
800 |
copy_to_input_btn.click(
|
801 |
fn=copy_to_input,
|
802 |
inputs=[humanized_output],
|
|
|
808 |
|
809 |
if __name__ == "__main__":
|
810 |
demo = create_interface()
|
811 |
+
# demo.launch(server_name="0.0.0.0", share=True, server_port=7890)
|
812 |
+
demo.launch(server_name="0.0.0.0")
|
gptzero_free.py
CHANGED
File without changes
|
humanize.py
CHANGED
@@ -18,6 +18,8 @@ else:
|
|
18 |
print("CUDA is not available. Using CPU instead.")
|
19 |
device = torch.device("cpu")
|
20 |
|
|
|
|
|
21 |
# Configuration for models and their adapters
|
22 |
model_config = {
|
23 |
"Base Model": "polygraf-ai/poly-humanizer-base",
|
|
|
18 |
print("CUDA is not available. Using CPU instead.")
|
19 |
device = torch.device("cpu")
|
20 |
|
21 |
+
|
22 |
+
|
23 |
# Configuration for models and their adapters
|
24 |
model_config = {
|
25 |
"Base Model": "polygraf-ai/poly-humanizer-base",
|
nohup.out
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
packages.txt
CHANGED
File without changes
|
plagiarism.py
CHANGED
File without changes
|
requirements.txt
CHANGED
@@ -12,4 +12,11 @@ Unidecode
|
|
12 |
BeautifulSoup4
|
13 |
google-api-python-client
|
14 |
newspaper3k
|
15 |
-
jusText
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
BeautifulSoup4
|
13 |
google-api-python-client
|
14 |
newspaper3k
|
15 |
+
jusText
|
16 |
+
langchain-groq
|
17 |
+
langchainhub
|
18 |
+
sentence-transformers
|
19 |
+
langchain-community
|
20 |
+
pymupdf
|
21 |
+
chromadb
|
22 |
+
language-tool-python
|
test.py
CHANGED
File without changes
|
utils.py
CHANGED
File without changes
|