markpeace commited on
Commit
76c5345
0 Parent(s):

first commit

Browse files
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ .env
2
+ __pycache__/
3
+ /__pycache__/
4
+
5
+ /_rise_faq_db/
6
+ _rise_faq_db/
7
+
8
+ /_rise_product_db/
9
+ _rise_product_db/
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ RUN apt-get update -y && apt-get install -y build-essential
4
+
5
+ WORKDIR /app
6
+
7
+ RUN useradd -m -u 1000 user
8
+ USER user
9
+ ENV HOME=/home/user \
10
+ PATH=/home/user/.local/bin:$PATH
11
+
12
+ WORKDIR $HOME/app
13
+
14
+ COPY --chown=user . $HOME/app
15
+
16
+ COPY app.py app.py
17
+
18
+ RUN pip install -r requirements.txt
19
+ #RUN pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cpu
20
+
21
+
22
+ CMD ["gunicorn","-b","0.0.0.0:7860", "app:app","--timeout","950"]
agent/__init__.py ADDED
File without changes
agent/_create.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SET UP ENVIRONMENT VARIABLES
2
+
3
+ def agent_executor():
4
+
5
+ from dotenv import load_dotenv
6
+ load_dotenv();
7
+
8
+
9
+ from agent.toolset import tools
10
+ from agent.prompt import prompt
11
+
12
+
13
+ from langchain_openai import ChatOpenAI
14
+ llm = ChatOpenAI(model="gpt-4", temperature=0)
15
+
16
+ from langchain_community.tools.convert_to_openai import format_tool_to_openai_function
17
+
18
+ llm_with_tools = llm.bind(functions=[format_tool_to_openai_function(t) for t in tools])
19
+
20
+ from langchain.agents.format_scratchpad import format_to_openai_function_messages
21
+ from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
22
+
23
+ agent = (
24
+ {
25
+ "input": lambda x: x["input"],
26
+ "agent_scratchpad": lambda x: format_to_openai_function_messages(
27
+ x["intermediate_steps"]
28
+ ),
29
+ }
30
+ | prompt
31
+ | llm_with_tools
32
+ | OpenAIFunctionsAgentOutputParser()
33
+ )
34
+
35
+
36
+
37
+ from langchain.agents import AgentExecutor
38
+
39
+ agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
40
+
41
+ return agent_executor.invoke({"input": "What is Rise?"})['output']
agent/datastructures.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from typing import List
3
+ from langchain_core.output_parsers import JsonOutputParser
4
+ from langchain_core.pydantic_v1 import BaseModel, Field
5
+ from langchain.output_parsers import PydanticOutputParser
6
+
7
+ ## DEFINE INPUT FRAMEWORK
8
+ class InputSchema(BaseModel):
9
+ """Expect the input from the frontend to be a JSON object with this structure"""
10
+ question: str = Field(description="The enquiry that is passed from the user")
11
+
12
+ # Define your desired data structure.
13
+ class FrontEndActions(BaseModel):
14
+ """Structure to pass actions back to the frontend"""
15
+ text: str = Field(description="The text to display on the button")
16
+ type: str = Field(description="This should be a string that identifies the type of action. It can be one of: SuggestGoal, SuggestRiseActivity")
17
+
18
+ class ResponseSchema(BaseModel):
19
+ """Final response to the question being asked"""
20
+ message: str = Field(description="final answer to respond to the user")
21
+ #characters: str = Field(description="number of characters in the answer")
22
+ #actions: List[FrontEndActions] = Field(description="List of suggested actions that should be passed back to the frontend to display. The use will click these to enact them. ")
23
+ #tokens: int = Field(description="Count the number of used to produce the response")
24
+
25
+ parser = PydanticOutputParser(pydantic_object=ResponseSchema)
agent/prompt.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder
2
+
3
+ prompt = ChatPromptTemplate.from_messages(
4
+ [
5
+ (
6
+ "system",
7
+ """
8
+ You support the core proposition to students of an English university: that higher education is a time to find purpose and gain skills, knowledge and experience to realise ambitions. We encourage our students to form personal action plans (Future me plans) and offer an extensive extended curriculum to learn beyond courses (Rise).
9
+
10
+ If students have questions about our offer (Rise or Futureme) you should use the FAQ tool provided to answer them. Please only base your answers on the facts contained in this, and if you cannot answer them you advise them to contact rise@mmu.ac.uk
11
+
12
+ You should coach students to navigate our offer. You will support them to better understand and articulate their ambitions (the things they want to achieve in their futures) and to break them into goals (things they need or want to achieve during their time at university. When you suggest these, you should pass these as actions in your output (rather than in the message field), with the command ‘addAmbition’ or ‘addGoal’
13
+
14
+ You should help students to identify activities that support these goals. These activities might be related to their course - but you should also recommend co-curricular learning opportunities presented in our Rise portfolio (a tool is provided to help you to identify these). If no activities are relevant you should also suggest ways in which students can organise their own relevant activities, and encourage them to claim points for these.
15
+
16
+ Our students are diverse, and many come from non-traditional backgrounds and minority ethnic groups. Some may have ambitions for particular careers, others may not - and many may not be confident or have the social and financial advantages to reach their goals. If a student has a sense of what they want to achieve, you should help them to create smart targets. If they don't, you should be reassuring that its ok not to have clear goals yet, but help them to reflect and form some ambitions. These could be career-oriented, or they could be about succeeding in, and making the most of, their university experience.
17
+
18
+ You should always output a well-formatted JSON object.
19
+
20
+ Any content you wish to communicate to students should be outputted in a ‘message’ field. Please make this as concise as possible
21
+ A ‘tokens’ field should be report how many tokens have been used to generate the answer.
22
+ A 'cost' field should report how much the api calls have cost (please output n/a if you can't do this)
23
+ If relevant you should also output an array of ‘actions’ which will be displayed as buttons on the front end. Each action can have a label (the content displayed to student) and a command (the function you want the front-end to execute if the user accepts the action). The commands you are allowed to use are all defined in this prompt - you cannot use any not mentioned here. If content is included in an action label, do not repeat it in the message, just refer to the actions
24
+
25
+
26
+ """,
27
+ ),
28
+ ("user", "{input}"),
29
+ MessagesPlaceholder(variable_name="agent_scratchpad"),
30
+ ]
31
+ )
agent/toolset.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.agents import tool
2
+ from langchain_openai import OpenAIEmbeddings
3
+ from langchain_community.vectorstores.faiss import FAISS
4
+ from langchain.chains import RetrievalQA
5
+ from langchain_openai import OpenAI
6
+
7
+
8
+ @tool
9
+ def FAQ(input: str):
10
+ """Provides answers to questions that students might have about Rise and Futureme. Please add ### to the beginning of your answer"""
11
+
12
+ # Load from local storage
13
+ embeddings = OpenAIEmbeddings()
14
+ persisted_vectorstore = FAISS.load_local("_rise_faq_db", embeddings)
15
+
16
+ # Use RetrievalQA chain for orchestration
17
+ qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=persisted_vectorstore.as_retriever())
18
+ result = qa.invoke(input)
19
+ return result
20
+
21
+ @tool
22
+ def recommend_activity(question: str):
23
+ """Recommends an activity from Rise catalogue."""
24
+
25
+ # Load from local storage
26
+ embeddings = OpenAIEmbeddings()
27
+ persisted_vectorstore = FAISS.load_local("_rise_product_db", embeddings)
28
+
29
+ # Use RetrievalQA chain for orchestration
30
+ qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=persisted_vectorstore.as_retriever())
31
+ result = qa.invoke(input)
32
+ return result
33
+
34
+ @tool
35
+ def placeholder_tool():
36
+ """This is just a placeholder function"""
37
+ return "placeholder"
38
+
39
+ tools = [placeholder_tool, FAQ]
app.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #ESTABLISH THE SERVER
2
+ from flask import Flask,request
3
+ from dotenv import load_dotenv
4
+
5
+ # Initializing flask app
6
+ app = Flask(__name__)
7
+ load_dotenv()
8
+
9
+ @app.route("/", methods=['GET','POST'])
10
+ def index():
11
+ from agent._create import agent_executor
12
+ return agent_executor();
faq_train.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders.csv_loader import CSVLoader
2
+ from langchain.text_splitter import CharacterTextSplitter
3
+ from langchain_openai import OpenAIEmbeddings
4
+ from langchain_community.vectorstores.faiss import FAISS
5
+ from dotenv import load_dotenv
6
+ from langchain.document_loaders import WebBaseLoader
7
+
8
+ load_dotenv();
9
+
10
+ documents = WebBaseLoader("https://rise.mmu.ac.uk/what-is-rise/").load()
11
+
12
+ # Split document in chunks
13
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=30)
14
+ docs = text_splitter.split_documents(documents=documents)
15
+
16
+ embeddings = OpenAIEmbeddings()
17
+ # Create vectors
18
+ vectorstore = FAISS.from_documents(docs, embeddings)
19
+ # Persist the vectors locally on disk
20
+ vectorstore.save_local("_rise_faq_db");
hf.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ load_dotenv();
3
+
4
+ from langchain_community.document_loaders import WebBaseLoader
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+
7
+
8
+ #LOAD AND SPLIT DATA
9
+ data = WebBaseLoader("https://rise.mmu.ac.uk/what-is-rise/").load()
10
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
11
+ docs = text_splitter.split_documents(data)
12
+
13
+ #CREATE EMBEDDINGS AND PUSH TO VECTORSTORE
14
+ from langchain_community.embeddings import HuggingFaceEmbeddings
15
+ from langchain_community.vectorstores import FAISS
16
+ embeddings = HuggingFaceEmbeddings()
17
+ db = FAISS.from_documents(docs, embeddings)
18
+ db.save_local("_rise_faq_db");
19
+
20
+
21
+ #DO A QUESTION
22
+ from langchain_community.llms import HuggingFaceHub
23
+ from langchain.chains import LLMChain
24
+ from langchain.prompts import PromptTemplate
25
+
26
+ question = "Who won the FIFA World Cup in the year 1994? "
27
+
28
+ template = """Question: {question}
29
+
30
+ Answer: Let's think step by step."""
31
+
32
+ prompt = PromptTemplate(template=template, input_variables=["question"])
33
+
34
+
35
+ repo_id = "google/flan-t5-xxl"
36
+ llm = HuggingFaceHub(
37
+ repo_id=repo_id, model_kwargs={"temperature": 0.5, "max_length": 64}
38
+ )
39
+
40
+ db = FAISS.load_local("_rise_product_db", embeddings)
41
+ from langchain.chains import RetrievalQA
42
+
43
+ # Use RetrievalQA chain for orchestration
44
+ qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever())
45
+ result = qa.run(question)
46
+ #return {"response":result}
joke.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ load_dotenv();
3
+
4
+
5
+ from langchain_community.utils.openai_functions import (
6
+ convert_pydantic_to_openai_function,
7
+ )
8
+ from langchain_core.prompts import ChatPromptTemplate
9
+ from langchain_core.pydantic_v1 import BaseModel, Field, validator
10
+ from langchain_openai import ChatOpenAI
11
+
12
+ class Joke(BaseModel):
13
+ """Joke to tell user."""
14
+
15
+ setup: str = Field(description="question to set up a joke")
16
+ punchline: str = Field(description="answer to resolve the joke")
17
+
18
+
19
+ openai_functions = [convert_pydantic_to_openai_function(Joke)]
20
+ model = ChatOpenAI(temperature=0)
21
+
22
+ prompt = ChatPromptTemplate.from_messages(
23
+ [("system", "You are helpful assistant"), ("user", "{input}")]
24
+ )
25
+
26
+ from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
27
+
28
+ parser = JsonOutputFunctionsParser()
29
+ chain = prompt | model.bind(functions=openai_functions) | parser
30
+ print(chain.invoke({"input": "tell me a joke"}))
31
+
32
+
products_recommend.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_openai import OpenAIEmbeddings
2
+ from langchain_community.vectorstores.faiss import FAISS
3
+ from langchain.chains import RetrievalQA
4
+ from langchain_openai import OpenAI
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv();
8
+
9
+
10
+ # Get question
11
+ question="I would like to be a teacher, can you recommend an activity?";
12
+
13
+ # Load from local storage
14
+ embeddings = OpenAIEmbeddings()
15
+ persisted_vectorstore = FAISS.load_local("_rise_product_db", embeddings)
16
+
17
+ # Use RetrievalQA chain for orchestration
18
+ qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=persisted_vectorstore.as_retriever())
19
+ result = qa.invoke(question)
20
+ print(result)
products_train.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders.csv_loader import CSVLoader
2
+ from langchain.text_splitter import CharacterTextSplitter
3
+ from langchain_openai import OpenAIEmbeddings
4
+ from langchain_community.vectorstores.faiss import FAISS
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv();
8
+
9
+ documents = CSVLoader(file_path="posts.csv").load()
10
+
11
+ # Split document in chunks
12
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=30)
13
+ docs = text_splitter.split_documents(documents=documents)
14
+
15
+ embeddings = OpenAIEmbeddings()
16
+ # Create vectors
17
+ vectorstore = FAISS.from_documents(docs, embeddings)
18
+ # Persist the vectors locally on disk
19
+ vectorstore.save_local("_rise_product_db");
requirements.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #SERVER
2
+ Flask
3
+ Flask-cors
4
+ gunicorn
5
+ python-dotenv
6
+
7
+ #LLM
8
+ bs4
9
+ langchain
10
+ torch
11
+ transformers
12
+ sentence-transformers
13
+ datasets
14
+ faiss-cpu
15
+
16
+ #HF
17
+ langchain
18
+ torch
19
+ transformers
20
+ sentence-transformers
21
+ datasets
22
+ faiss-cpu
23
+
structure.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ load_dotenv();
3
+
4
+ from typing import List
5
+
6
+ from langchain.utils.openai_functions import convert_pydantic_to_openai_function
7
+ from pydantic import BaseModel, Field
8
+
9
+
10
+ class FrontendActions(BaseModel):
11
+ """Commands and options that can be passed to the frontend chatbot to elicit a response or student action"""
12
+ label: str = Field(description="Label which will appear on the frontend chatbot button. Omit this is you are just making a server request for more information")
13
+ link: str = Field(description="Link to take the user to a different place.")
14
+
15
+
16
+ class Response(BaseModel):
17
+ """Final response to the question being asked. This will be passed to the frontend chatbot for processing"""
18
+
19
+ message: str = Field(description="The final answer to respond to the user")
20
+ tokens: int = Field(description="Count the number of used to produce the response")
21
+ #actions: List[FrontendActions] = Field(description="List of actions taken to produce the response.")
22
+
23
+
24
+ import json
25
+
26
+ from langchain_core.agents import AgentActionMessageLog, AgentFinish
27
+
28
+ def parse(output):
29
+ # If no function was invoked, return to user
30
+ if "function_call" not in output.additional_kwargs:
31
+ return AgentFinish(return_values={"output": output.content}, log=output.content)
32
+
33
+ # Parse out the function call
34
+ function_call = output.additional_kwargs["function_call"]
35
+ name = function_call["name"]
36
+ inputs = json.loads(function_call["arguments"])
37
+
38
+ # If the Response function was invoked, return to the user with the function inputs
39
+ if name == "Response":
40
+ return AgentFinish(return_values=inputs, log=str(function_call))
41
+ # Otherwise, return an agent action
42
+ else:
43
+ return AgentActionMessageLog(
44
+ tool=name, tool_input=inputs, log="", message_log=[output]
45
+ )
46
+
47
+ from langchain.agents import tool
48
+
49
+
50
+ @tool
51
+ def placeholder():
52
+ """This is just a placeholder function"""
53
+ return "placeholder"
54
+
55
+ tools=[placeholder]
56
+
57
+ from langchain.agents import AgentExecutor
58
+ from langchain.agents.format_scratchpad import format_to_openai_function_messages
59
+ from langchain_community.tools.convert_to_openai import format_tool_to_openai_function
60
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
61
+ from langchain_openai import ChatOpenAI
62
+
63
+ prompt = ChatPromptTemplate.from_messages(
64
+ [
65
+ ("system", "Please only make two suggestions at a time, and output a JSON object using the response scheme provided to you in the associated tool. If you suggest objectives and goals, please make them actions in the schema with the link 'plan'"),
66
+ ("user", "{input}"),
67
+ MessagesPlaceholder(variable_name="agent_scratchpad"),
68
+ ]
69
+ )
70
+
71
+ llm = ChatOpenAI(model="gpt-4",temperature=0)
72
+
73
+ llm_with_tools = llm.bind(
74
+ functions=[
75
+ # The retriever tool
76
+ format_tool_to_openai_function(placeholder),
77
+ # Response schema
78
+ convert_pydantic_to_openai_function(Response),
79
+ ]
80
+ )
81
+
82
+ agent = (
83
+ {
84
+ "input": lambda x: x["input"],
85
+ # Format agent scratchpad from intermediate steps
86
+ "agent_scratchpad": lambda x: format_to_openai_function_messages(
87
+ x["intermediate_steps"]
88
+ ),
89
+ }
90
+ | prompt
91
+ | llm_with_tools
92
+ | parse
93
+ )
94
+
95
+ agent_executor = AgentExecutor(tools=[placeholder], agent=agent, verbose=True)
96
+
97
+ agent_executor.invoke(
98
+ {"input": "Can you suggest to me some actions I could take to become a teacher?"},
99
+ return_only_outputs=True,
100
+ )