from langchain.chains import ConversationalRetrievalChain from langchain.chains.question_answering import load_qa_chain from langchain.chains import RetrievalQA from langchain.memory import ConversationBufferMemory from langchain.memory import ConversationTokenBufferMemory from langchain.llms import HuggingFacePipeline # from langchain import PromptTemplate from langchain.prompts import PromptTemplate from langchain.embeddings import HuggingFaceEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.vectorstores import Chroma from langchain.embeddings import HuggingFaceBgeEmbeddings from langchain.document_loaders import ( CSVLoader, DirectoryLoader, GitLoader, NotebookLoader, OnlinePDFLoader, PythonLoader, TextLoader, UnstructuredFileLoader, UnstructuredHTMLLoader, UnstructuredPDFLoader, UnstructuredWordDocumentLoader, WebBaseLoader, PyPDFLoader, UnstructuredMarkdownLoader, UnstructuredEPubLoader, UnstructuredHTMLLoader, UnstructuredPowerPointLoader, UnstructuredODTLoader, NotebookLoader, UnstructuredFileLoader ) from transformers import ( AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList, pipeline, GenerationConfig, TextStreamer, pipeline ) from langchain.llms import HuggingFaceHub import torch from transformers import BitsAndBytesConfig import os from langchain.llms import CTransformers import streamlit as st from langchain.document_loaders.base import BaseLoader from langchain.schema import Document import gradio as gr import tempfile import timeit FILE_LOADER_MAPPING = { "csv": (CSVLoader, {"encoding": "utf-8"}), "doc": (UnstructuredWordDocumentLoader, {}), "docx": (UnstructuredWordDocumentLoader, {}), "epub": (UnstructuredEPubLoader, {}), "html": (UnstructuredHTMLLoader, {}), "md": (UnstructuredMarkdownLoader, {}), "odt": (UnstructuredODTLoader, {}), "pdf": (PyPDFLoader, {}), "ppt": (UnstructuredPowerPointLoader, {}), "pptx": (UnstructuredPowerPointLoader, {}), "txt": (TextLoader, {"encoding": "utf8"}), "ipynb": (NotebookLoader, {}), "py": (PythonLoader, {}), # Add more mappings for other file extensions and loaders as needed } def load_model(): config = {'max_new_tokens': 1024, 'repetition_penalty': 1.1, 'temperature': 0.1, 'top_k': 50, 'top_p': 0.9, 'stream': True, 'threads': int(os.cpu_count() / 2) } llm = CTransformers( model = "TheBloke/zephyr-7B-beta-GGUF", model_file = "zephyr-7b-beta.Q4_0.gguf", callbacks=[StreamingStdOutCallbackHandler()], lib="avx2", #for CPU use **config # model_type=model_type, # max_new_tokens=max_new_tokens, # type: ignore # temperature=temperature, # type: ignore ) return llm