Spaces:

polygraf-ai
/

article_writer

Runtime error

App Files Files Community

eljanmahammadli commited on Aug 1, 2024

Commit

03fd59b

1 Parent(s): c45480c

added RAG

Browse files

Files changed (20) hide show

.gitattributes +0 -0
.gitignore +3 -1
README.md +0 -0
__pycache__/ai_generate.cpython-310.pyc +0 -0
__pycache__/ai_generate.cpython-39.pyc +0 -0
__pycache__/app.cpython-39.pyc +0 -0
__pycache__/gptzero_free.cpython-310.pyc +0 -0
__pycache__/gptzero_free.cpython-39.pyc +0 -0
__pycache__/humanize.cpython-310.pyc +0 -0
__pycache__/humanize.cpython-39.pyc +0 -0
ai_generate.py +57 -7
app.py +30 -12
gptzero_free.py +0 -0
humanize.py +2 -0
nohup.out +0 -0
packages.txt +0 -0
plagiarism.py +0 -0
requirements.txt +8 -1
test.py +0 -0
utils.py +0 -0

.gitattributes CHANGED Viewed

File without changes

.gitignore CHANGED Viewed

	@@ -1 +1,3 @@
1	- _pycache_/

+_pycache_
+.env
+nohup.out

README.md CHANGED Viewed

File without changes

__pycache__/ai_generate.cpython-310.pyc DELETED Viewed

Binary file (1.87 kB)

__pycache__/ai_generate.cpython-39.pyc CHANGED Viewed

Binary files a/__pycache__/ai_generate.cpython-39.pyc and b/__pycache__/ai_generate.cpython-39.pyc differ

__pycache__/app.cpython-39.pyc ADDED Viewed

Binary file (19.2 kB). View file

__pycache__/gptzero_free.cpython-310.pyc DELETED Viewed

Binary file (3.58 kB)

__pycache__/gptzero_free.cpython-39.pyc DELETED Viewed

Binary file (3.58 kB)

__pycache__/humanize.cpython-310.pyc DELETED Viewed

Binary file (2.46 kB)

__pycache__/humanize.cpython-39.pyc CHANGED Viewed

Binary files a/__pycache__/humanize.cpython-39.pyc and b/__pycache__/humanize.cpython-39.pyc differ

ai_generate.py CHANGED Viewed

@@ -3,13 +3,57 @@ from openai import OpenAI
 import os
 from transformers import pipeline
 from groq import Groq
 groq_client = Groq(
-    api_key=os.environ.get("groq_key"),
 )
-def generate_groq(text, model):
     completion = groq_client.chat.completions.create(
         model=model,
         messages=[
@@ -31,6 +75,12 @@ def generate_groq(text, model):
             response += chunk.choices[0].delta.content or ""
     return response
 def generate_openai(text, model, openai_client):
     message = [{"role": "user", "content": text}]
@@ -40,15 +90,15 @@ def generate_openai(text, model, openai_client):
     return response.choices[0].message.content
-def generate(text, model, api):
     if model == "Llama 3":
-        return generate_groq(text, "llama3-70b-8192")
     elif model == "Groq":
-        return generate_groq(text, "llama3-groq-70b-8192-tool-use-preview")
     elif model == "Mistral":
-        return generate_groq(text, "mixtral-8x7b-32768")
     elif model == "Gemma":
-        return generate_groq(text, "gemma2-9b-it")
     elif model == "OpenAI GPT 3.5":
         try:
             openai_client = OpenAI(api_key=api)

 import os
 from transformers import pipeline
 from groq import Groq
+from langchain_community.document_loaders import PyMuPDFLoader
+from langchain_community.document_loaders import TextLoader
+from langchain_community.embeddings.sentence_transformer import (
+    SentenceTransformerEmbeddings,
+)
+from langchain_community.vectorstores import Chroma
+from langchain_text_splitters import CharacterTextSplitter
+from langchain import hub
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough
+from langchain.chains import RetrievalQA
+from langchain_groq import ChatGroq
+from dotenv import load_dotenv
+load_dotenv()
 groq_client = Groq(
+    api_key=os.environ.get("GROQ_API_KEY"),
 )
+def create_db_with_langchain(path):
+    loader = PyMuPDFLoader(path)
+    data = loader.load()
+    # split it into chunks
+    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+    docs = text_splitter.split_documents(data)
+    # create the open-source embedding function
+    embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
+    # load it into Chroma
+    db = Chroma.from_documents(docs, embedding_function)
+    return db
+def generate_groq_rag(text, model, path):
+    llm = ChatGroq(
+        temperature=0,
+        model_name=model,
+    )
+    db = create_db_with_langchain(path)
+    retriever = db.as_retriever(search_type="mmr", search_kwargs={"k": 4, "fetch_k": 20})
+    prompt = hub.pull("rlm/rag-prompt")
+    def format_docs(docs):
+        return "\n\n".join(doc.page_content for doc in docs)
+    rag_chain = {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | llm
+    return rag_chain.invoke(text).content
+def generate_groq_base(text, model):
     completion = groq_client.chat.completions.create(
         model=model,
         messages=[
             response += chunk.choices[0].delta.content or ""
     return response
+def generate_groq(text, model, path):
+    if path:
+        return generate_groq_rag(text, model, path)
+    else:
+        return generate_groq_base(text, model)
 def generate_openai(text, model, openai_client):
     message = [{"role": "user", "content": text}]
     return response.choices[0].message.content
+def generate(text, model, path, api):
     if model == "Llama 3":
+        return generate_groq(text, "llama3-70b-8192", path)
     elif model == "Groq":
+        return generate_groq(text, "llama3-groq-70b-8192-tool-use-preview", path)
     elif model == "Mistral":
+        return generate_groq(text, "mixtral-8x7b-32768", path)
     elif model == "Gemma":
+        return generate_groq(text, "gemma2-9b-it", path)
     elif model == "OpenAI GPT 3.5":
         try:
             openai_client = OpenAI(api_key=api)

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import re
 from humanize import paraphrase_text
 from ai_generate import generate
 import requests
-import language_tool_python
 import torch
 from gradio_client import Client
 from transformers import GPT2LMHeadModel, GPT2TokenizerFast
@@ -287,6 +287,7 @@ def generate_article(
     ai_model: str,
     content_string: str,
     api_key: str = None,
     generated_article: str = None,
     user_comments: str = None,
 ) -> str:
@@ -332,7 +333,7 @@ def generate_article(
         )
         article = response.choices[0].message.content.strip()
     else:
-        article = generate(prompt, ai_model, api_key)
     return clean_text(article)
@@ -414,6 +415,7 @@ def generate_and_format(
     month_to,
     day_to,
     domains_to_include,
     generated_article: str = None,
     user_comments: str = None,
 ):
@@ -444,6 +446,7 @@ def generate_and_format(
         ai_model,
         content_string,
         api_key,
         generated_article,
         user_comments,
     )
@@ -589,11 +592,9 @@ def create_interface():
                         elem_classes="input-highlight-turquoise",
                     )
                     gr.Markdown("# Search Options", elem_classes="text-center text-3xl mb-6")
-                    with gr.Group():
-                        with gr.Row():
-                            google_search_check = gr.Checkbox(
-                                label="Enable Google Search For Recent Sources", value=True
-                            )
                         with gr.Row():
                             month_from = gr.Dropdown(
                                 choices=months,
@@ -621,6 +622,8 @@ def create_interface():
                                 multiselect=True,
                                 label="Domains To Include",
                             )
                 with gr.Group():
                     gr.Markdown("## AI Model Configuration", elem_classes="text-xl mb-4")
@@ -681,7 +684,7 @@ def create_interface():
                 humanized_output = gr.Markdown(label="Humanized Article", value="\n\n\n\n", render=True)
                 copy_to_input_btn = gr.Button("Copy to Input for AI Check")
-        def become_visible(text):
             if text:
                 return gr.update(visible=True)
             else:
@@ -693,9 +696,16 @@ def create_interface():
             else:
                 return gr.update(visible=False)
         ai_detector_dropdown.change(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
-        output_article.change(become_visible, inputs=output_article, outputs=ai_comments)
-        ai_comments.change(become_visible, inputs=output_article, outputs=regenerate_btn)
         ai_check_btn.click(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
         generate_btn.click(
@@ -724,6 +734,7 @@ def create_interface():
                 month_to,
                 day_to,
                 domains_to_include,
             ],
             outputs=[output_article],
         )
@@ -754,6 +765,7 @@ def create_interface():
                 month_to,
                 day_to,
                 domains_to_include,
                 output_article,
                 ai_comments,
             ],
@@ -779,6 +791,12 @@ def create_interface():
             outputs=[humanized_output],
         )
         copy_to_input_btn.click(
             fn=copy_to_input,
             inputs=[humanized_output],
@@ -790,5 +808,5 @@ def create_interface():
 if __name__ == "__main__":
     demo = create_interface()
-    demo.launch(server_name="0.0.0.0", share=True, server_port=7890)
-    # demo.launch(server_name="0.0.0.0")

 from humanize import paraphrase_text
 from ai_generate import generate
 import requests
+import language_tool_python
 import torch
 from gradio_client import Client
 from transformers import GPT2LMHeadModel, GPT2TokenizerFast
     ai_model: str,
     content_string: str,
     api_key: str = None,
+    pdf_file_input=None,
     generated_article: str = None,
     user_comments: str = None,
 ) -> str:
         )
         article = response.choices[0].message.content.strip()
     else:
+        article = generate(prompt, ai_model, pdf_file_input, api_key)
     return clean_text(article)
     month_to,
     day_to,
     domains_to_include,
+    pdf_file_input,
     generated_article: str = None,
     user_comments: str = None,
 ):
         ai_model,
         content_string,
         api_key,
+        pdf_file_input,
         generated_article,
         user_comments,
     )
                         elem_classes="input-highlight-turquoise",
                     )
                     gr.Markdown("# Search Options", elem_classes="text-center text-3xl mb-6")
+                    with gr.Row():
+                        google_search_check = gr.Checkbox(label="Enable Google Search For Recent Sources", value=True)
+                    with gr.Group(visible=False) as search_options:
                         with gr.Row():
                             month_from = gr.Dropdown(
                                 choices=months,
                                 multiselect=True,
                                 label="Domains To Include",
                             )
+                    gr.Markdown("# Add Optional PDF File with Information", elem_classes="text-center text-3xl mb-6")
+                    pdf_file_input = gr.File(label="Upload PDF")
                 with gr.Group():
                     gr.Markdown("## AI Model Configuration", elem_classes="text-xl mb-4")
                 humanized_output = gr.Markdown(label="Humanized Article", value="\n\n\n\n", render=True)
                 copy_to_input_btn = gr.Button("Copy to Input for AI Check")
+        def regenerate_visible(text):
             if text:
                 return gr.update(visible=True)
             else:
             else:
                 return gr.update(visible=False)
+        def search_visible(toggle):
+            if toggle:
+                return gr.update(visible=True)
+            else:
+                return gr.update(visible=False)
+        google_search_check.change(search_visible, inputs=google_search_check, outputs=search_options)
         ai_detector_dropdown.change(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
+        output_article.change(regenerate_visible, inputs=output_article, outputs=ai_comments)
+        ai_comments.change(regenerate_visible, inputs=output_article, outputs=regenerate_btn)
         ai_check_btn.click(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
         generate_btn.click(
                 month_to,
                 day_to,
                 domains_to_include,
+                pdf_file_input,
             ],
             outputs=[output_article],
         )
                 month_to,
                 day_to,
                 domains_to_include,
+                pdf_file_input,
                 output_article,
                 ai_comments,
             ],
             outputs=[humanized_output],
         )
         copy_to_input_btn.click(
             fn=copy_to_input,
             inputs=[humanized_output],
 if __name__ == "__main__":
     demo = create_interface()
+    # demo.launch(server_name="0.0.0.0", share=True, server_port=7890)
+    demo.launch(server_name="0.0.0.0")

gptzero_free.py CHANGED Viewed

File without changes

humanize.py CHANGED Viewed

@@ -18,6 +18,8 @@ else:
     print("CUDA is not available. Using CPU instead.")
     device = torch.device("cpu")
 # Configuration for models and their adapters
 model_config = {
     "Base Model": "polygraf-ai/poly-humanizer-base",

     print("CUDA is not available. Using CPU instead.")
     device = torch.device("cpu")
 # Configuration for models and their adapters
 model_config = {
     "Base Model": "polygraf-ai/poly-humanizer-base",

nohup.out CHANGED Viewed

The diff for this file is too large to render. See raw diff

packages.txt CHANGED Viewed

File without changes

plagiarism.py CHANGED Viewed

File without changes

requirements.txt CHANGED Viewed

@@ -12,4 +12,11 @@ Unidecode
 BeautifulSoup4
 google-api-python-client
 newspaper3k
-jusText

 BeautifulSoup4
 google-api-python-client
 newspaper3k
+jusText
+langchain-groq
+langchainhub
+sentence-transformers
+langchain-community
+pymupdf
+chromadb
+language-tool-python

test.py CHANGED Viewed

File without changes

utils.py CHANGED Viewed

File without changes