from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub
import os
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import gradio as gr
import re
import shutil
import glob
def delete_files_except(filename, folder_path):
    delfile=None
    files = glob.glob(os.path.join(folder_path, '*'))
    for file in files:
        if os.path.basename(file) == filename:
            continue 
        else:
            delfile=os.path.basename(file)
            os.remove(file) 
    return delfile
def MOP(path):
  docs=[]
  loader = PyPDFDirectoryLoader(path)
  docs = loader.load()
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000 , chunk_overlap=500)
  text_chunks = text_splitter.split_documents(docs)
  embeddings = HuggingFaceEmbeddings(model_name="thenlper/gte-base")
  vector_store = FAISS.from_documents(text_chunks, embedding=embeddings)
  filename_to_keep = 'Dummy_standard MoP_template_new.pdf'
  prompt_file=delete_files_except(filename_to_keep,path)
  repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1"
  llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature": 0.1, "max_new_tokens": 2048})
  retriever = vector_store.as_retriever(search_type="similarity",search_kwargs={"k": len(text_chunks)})
  qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever,verbose=True)
  message= f"""<s> [INST] You have two documents:

Template Document: "Dummy_standard MoP_template_new.pdf"
Content Document: "{prompt_file}"

Your task is to determine whether the Content Document effectively incorporates the context and instructions of the Pre-Check Procedures and Post-Check Procedures specified in the Template Document, without the requirement for them to be under specified labels. The focus should be on the presence of the context and instructions rather than their exact placement.

Provide a "Yes" or "No" response indicating whether the Content Document accurately integrates the context and instructions of the Pre-Check Procedures and Post-Check Procedures as outlined in the Template Document.

Additionally, identify any missing elements related to the context and instructions of the Pre-Check Procedures and Post-Check Procedures if present, regardless of their placement within the Content Document.

Instructions:

Review the context and instructions of the Pre-Check Procedures and Post-Check Procedures detailed in the Template Document ("Dummy_standard MoP_template_new.pdf").
Assess whether the Content Document ("{prompt_file}") includes the necessary context and instructions for the Pre-Check Procedures and Post-Check Procedures, regardless of their specific placement or labeling.
Provide a "Yes" if the Content Document adequately integrates the context and instructions of the Pre-Check Procedures and Post-Check Procedures, or "No" if there are significant gaps or omissions.
If the answer is "No," specify any missing elements related to the context and instructions of the Pre-Check Procedures and Post-Check Procedures, emphasizing their importance in the Content Document.
Ensure careful consideration of the context and instructions provided in the Template Document ("Dummy_standard MoP_template_new.pdf") while evaluating the alignment of the Content Document ("{prompt_file}"). [/INST] </s>"""
  result=qa.run(message)
  pattern = r"Helpful Answer:\n\n(.*)"
  match = re.search(pattern, result, re.DOTALL)
  if match:
    helpful_answer_text = match.group(1)
    return helpful_answer_text
  return result
 
def process_file(fileobj):
  destination_folder="data"
  file_name = os.path.basename(fileobj)
  destination_path = os.path.join(destination_folder, file_name)
  shutil.copyfile(fileobj.name, destination_path)
  return MOP(destination_folder)

 
demo = gr.Interface(
    fn=process_file,
    inputs=[
        "file",
    ],
    outputs="text"
)
demo.launch()