Spaces:

markpeace
/

rise_ai

Sleeping

App Files Files Community

rise_ai / test.py

markpeace

basic chatbot

ce155aa 11 months ago

raw

history blame contribute delete

4.21 kB

	#import json

	from flask import Flask,request
	from dotenv import load_dotenv

	from langchain.document_loaders import WebBaseLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import FAISS
	from transformers import AutoTokenizer, AutoModelForQuestionAnswering
	from transformers import AutoTokenizer, pipeline
	from langchain import HuggingFacePipeline
	from langchain.chains import RetrievalQA

	# Initializing flask app
	app = Flask(__name__)
	load_dotenv()

	@app.route("/train/faq", methods=['GET','POST'])
	def embeddings_faqs():

	data = WebBaseLoader("https://rise.mmu.ac.uk/what-is-rise/").load()

	# Create an instance of the RecursiveCharacterTextSplitter class with specific parameters.
	# It splits text into chunks of 1000 characters each with a 150-character overlap.
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)

	# 'data' holds the text you want to split, split the text into documents using the text splitter.
	docs = text_splitter.split_documents(data)

	# Define the path to the pre-trained model you want to use
	modelPath = "sentence-transformers/all-MiniLM-l6-v2"

	# Create a dictionary with model configuration options, specifying to use the CPU for computations
	model_kwargs = {'device':'cpu'}

	# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to False
	encode_kwargs = {'normalize_embeddings': False}

	# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
	embeddings = HuggingFaceEmbeddings(
	model_name=modelPath, # Provide the pre-trained model's path
	model_kwargs=model_kwargs, # Pass the model configuration options
	encode_kwargs=encode_kwargs # Pass the encoding options
	)

	# Create vectors
	vectorstore = FAISS.from_documents(docs, embeddings)
	# Persist the vectors locally on disk
	vectorstore.save_local("_rise_faq_db");

	return {"trained":"success"}

	@app.route('/ask', methods=['GET','POST'])
	def ask():
	# Specify the model name you want to use
	model_name = "Intel/dynamic_tinybert"

	# Load the tokenizer associated with the specified model
	tokenizer = AutoTokenizer.from_pretrained(model_name, padding=True, truncation=True, max_length=512)

	# Define a question-answering pipeline using the model and tokenizer
	question_answerer = pipeline(
	"question-answering",
	model=model_name,
	tokenizer=tokenizer,
	return_tensors='pt'
	)

	# Create an instance of the HuggingFacePipeline, which wraps the question-answering pipeline
	# with additional model-specific arguments (temperature and max_length)
	llm = HuggingFacePipeline(
	pipeline=question_answerer,
	model_kwargs={"temperature": 0.7, "max_length": 512},
	)

	# Define the path to the pre-trained model you want to use
	modelPath = "sentence-transformers/all-MiniLM-l6-v2"

	# Create a dictionary with model configuration options, specifying to use the CPU for computations
	model_kwargs = {'device':'cpu'}

	# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to False
	encode_kwargs = {'normalize_embeddings': False}

	# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
	embeddings = HuggingFaceEmbeddings(
	model_name=modelPath, # Provide the pre-trained model's path
	model_kwargs=model_kwargs, # Pass the model configuration options
	encode_kwargs=encode_kwargs # Pass the encoding options
	)
	persisted_vectorstore = FAISS.load_local("_rise_faq_db", embeddings)

	# Create a retriever object from the 'db' using the 'as_retriever' method.
	# This retriever is likely used for retrieving data or documents from the database.
	retriever = persisted_vectorstore.as_retriever()

	docs = retriever.get_relevant_documents("What are the benefits?")
	print(docs[0].page_content)

	return "uip"

	@app.route('/', methods=['GET','POST'])
	def index():
	return {"response":"just some junk response"}