Spaces:
Sleeping
Sleeping
First version
Browse files- .env +13 -0
- app.py +45 -16
- chat.py +152 -0
- gameload.py +72 -0
- helper.py +13 -0
- prompt_response.txt +12 -0
.env
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
OPENAI_API_KEY=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
|
2 |
+
PINECONE_API_KEY=416ba549-14f7-4770-abcc-bb054451dcd3
|
3 |
+
PINECONE_REGION=us-east4-gcp
|
4 |
+
PINECONE_INDEX=game-instructions
|
5 |
+
EMBEDDING_ENGINE=text-embedding-ada-002
|
6 |
+
CONVO_LENGTH_TO_FETCH=1000
|
7 |
+
LLM_MODEL=gpt-4
|
8 |
+
DEBUG=True
|
9 |
+
GAME_DOCS_FOLDER="game_docs"
|
10 |
+
GAME_DOCS_FILE="Sim_AI_Superprompt.docx"
|
11 |
+
GAME_ID_INDEX="game_index.json"
|
12 |
+
HOST=127.0.0.1
|
13 |
+
PORT=5000
|
app.py
CHANGED
@@ -1,18 +1,47 @@
|
|
1 |
-
import
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
)
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, jsonify, request
|
2 |
+
from gameload import upload_game_docs
|
3 |
+
import chat
|
4 |
+
import os
|
5 |
+
import dotenv
|
6 |
+
import os
|
7 |
|
8 |
+
app = Flask(__name__)
|
9 |
+
dotenv.load_dotenv('.env')
|
10 |
+
host = os.getenv('HOST')
|
11 |
+
port = os.getenv('PORT')
|
12 |
|
13 |
+
@app.route('/initialize_game', methods=['GET'])
|
14 |
+
def initialize_game():
|
15 |
+
inputs = request.args.to_dict()
|
16 |
+
user_id = inputs['user_id']
|
17 |
+
game_id = inputs['game_id']
|
18 |
+
result = chat.initialize_game(user_id, game_id)
|
19 |
+
response = {'role': 'assistant', 'content': result}
|
20 |
+
return jsonify(response)
|
21 |
+
|
22 |
+
@app.route('/start_game', methods=['GET'])
|
23 |
+
def start_game():
|
24 |
+
inputs = request.args.to_dict()
|
25 |
+
user_id = inputs['user_id']
|
26 |
+
game_id = inputs['game_id']
|
27 |
+
user_input = inputs['user_input']
|
28 |
+
gpt_output = chat.start_game(game_id, user_id, user_input)
|
29 |
+
response = {'role': 'assistant', 'content': gpt_output}
|
30 |
+
return jsonify(response)
|
31 |
+
|
32 |
+
@app.route('/health_check', methods=['GET'])
|
33 |
+
def health_check():
|
34 |
+
response = {'message': 'Site is healthy'}
|
35 |
+
return jsonify(response)
|
36 |
+
|
37 |
+
@app.route('/load_game', methods=['GET'])
|
38 |
+
def load_game():
|
39 |
+
upload_game_docs()
|
40 |
+
response = {'message': 'Game loaded'}
|
41 |
+
return jsonify(response)
|
42 |
+
|
43 |
+
if __name__ == '__main__':
|
44 |
+
host = '0.0.0.0' # Replace with your desired host IP
|
45 |
+
port = 8080 # Replace with your desired port number
|
46 |
+
|
47 |
+
app.run(host=host, port=port)
|
chat.py
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from time import time, sleep
|
2 |
+
import datetime
|
3 |
+
import dotenv
|
4 |
+
import os
|
5 |
+
import openai
|
6 |
+
import json
|
7 |
+
import pinecone
|
8 |
+
from uuid import uuid4
|
9 |
+
from helper import open_file, save_file
|
10 |
+
import re
|
11 |
+
from langchain.memory import VectorStoreRetrieverMemory
|
12 |
+
|
13 |
+
## Read the environment variables
|
14 |
+
dotenv.load_dotenv('.env')
|
15 |
+
openai.api_key = os.getenv('OPENAI_API_KEY')
|
16 |
+
embedding_model = os.getenv('EMBEDDING_ENGINE')
|
17 |
+
convo_length = int(os.getenv('CONVO_LENGTH_TO_FETCH'))
|
18 |
+
llm_model = os.getenv('LLM_MODEL')
|
19 |
+
|
20 |
+
pinecone_api_key = os.getenv('PINECONE_API_KEY')
|
21 |
+
pinecone_env = os.getenv('PINECONE_REGION')
|
22 |
+
pinecone_index = os.getenv('PINECONE_INDEX')
|
23 |
+
pinecone.init(
|
24 |
+
api_key=pinecone_api_key,
|
25 |
+
environment=pinecone_env
|
26 |
+
)
|
27 |
+
vector_db = pinecone.Index(pinecone_index)
|
28 |
+
|
29 |
+
def timestamp_to_datetime(unix_time):
|
30 |
+
return datetime.datetime.fromtimestamp(unix_time).strftime("%A, %B %d, %Y at %I:%M%p %Z")
|
31 |
+
|
32 |
+
|
33 |
+
def perform_embedding(content):
|
34 |
+
content = content.encode(encoding='ASCII',errors='ignore').decode()
|
35 |
+
response = openai.Embedding.create(model=embedding_model, input=content)
|
36 |
+
vector = response['data'][0]['embedding']
|
37 |
+
return vector
|
38 |
+
|
39 |
+
def load_conversation(results):
|
40 |
+
result = list()
|
41 |
+
for m in results['matches']:
|
42 |
+
result.append({'time1': m['metadata']['timestring'], 'text': m['metadata']['text']})
|
43 |
+
ordered = sorted(result, key=lambda d: d['time1'], reverse = False)
|
44 |
+
messages = [i['text'] for i in ordered]
|
45 |
+
message_block = '\n'.join(messages).strip()
|
46 |
+
return message_block
|
47 |
+
|
48 |
+
|
49 |
+
def call_gpt(prompt):
|
50 |
+
max_retry = 5
|
51 |
+
retry = 0
|
52 |
+
prompt = prompt.encode(encoding='ASCII',errors='ignore').decode()
|
53 |
+
while True:
|
54 |
+
try:
|
55 |
+
response = openai.ChatCompletion.create(
|
56 |
+
model=llm_model,
|
57 |
+
temperature=0.9,
|
58 |
+
messages=[
|
59 |
+
{"role": "user", "content": prompt}
|
60 |
+
]
|
61 |
+
)
|
62 |
+
|
63 |
+
text = response.choices[0].message.content
|
64 |
+
text = re.sub('[\r\n]+', '\n', text)
|
65 |
+
text = re.sub('[\t ]+', ' ', text)
|
66 |
+
filename = '%s_gpt3.txt' % time()
|
67 |
+
if not os.path.exists('gpt3_logs'):
|
68 |
+
os.makedirs('gpt3_logs')
|
69 |
+
save_file('gpt3_logs/%s' % filename, prompt + '\n\n==========\n\n' + text)
|
70 |
+
response.choices[0].message.content = text
|
71 |
+
return response
|
72 |
+
except Exception as oops:
|
73 |
+
retry += 1
|
74 |
+
if retry >= max_retry:
|
75 |
+
return "GPT3 error: %s" % oops
|
76 |
+
print('Error communicating with OpenAI:', oops)
|
77 |
+
sleep(1)
|
78 |
+
|
79 |
+
|
80 |
+
def start_game(game_id, user_id, user_input):
|
81 |
+
payload = list()
|
82 |
+
|
83 |
+
# Get user input, save it, vectorize it and save to pinecone
|
84 |
+
timestamp = time()
|
85 |
+
timestring = timestamp_to_datetime(timestamp)
|
86 |
+
unique_id = str(uuid4())
|
87 |
+
vector = perform_embedding(user_input)
|
88 |
+
metadata = {'speaker': 'USER', 'user_id': user_id, 'game_id': game_id, 'timestring': timestring, 'text': user_input}
|
89 |
+
payload.append((unique_id, vector, metadata))
|
90 |
+
|
91 |
+
|
92 |
+
# Search for relevant messages and return a response
|
93 |
+
results=vector_db.query(vector=vector, top_k=convo_length, include_metadata=True,
|
94 |
+
filter={
|
95 |
+
"$and": [{ "user_id": { "$eq": user_id } }, { "game_id": { "$eq": game_id } }]
|
96 |
+
}
|
97 |
+
)
|
98 |
+
conversation = load_conversation(results)
|
99 |
+
|
100 |
+
|
101 |
+
# Populate prompt
|
102 |
+
prompt_text = open_file(f"prompt_{game_id}_{user_id}.txt")
|
103 |
+
prompt = open_file('prompt_response.txt').replace('<<PROMPT_VALUE>>', prompt_text).replace('<<CONVERSATION>>', conversation).replace('<<USER_MSG>>', user_input).replace('<<USER_VAL>>', user_id)
|
104 |
+
|
105 |
+
# Generate response, vectorize
|
106 |
+
llm_output_msg = call_gpt(prompt)
|
107 |
+
llm_output = llm_output_msg.choices[0].message.content
|
108 |
+
timestamp_op = time()
|
109 |
+
timestring_op = timestamp_to_datetime(timestamp)
|
110 |
+
vector_op = perform_embedding(llm_output)
|
111 |
+
unique_id_op = str(uuid4)
|
112 |
+
metadata_op = {'speaker': 'BOT', 'user_id': user_id, 'game_id': game_id, 'timestring': timestring, 'text': llm_output}
|
113 |
+
payload.append((unique_id_op, vector_op, metadata_op))
|
114 |
+
|
115 |
+
# Upsert into the vector database
|
116 |
+
vector_db.upsert(payload)
|
117 |
+
|
118 |
+
return(llm_output)
|
119 |
+
|
120 |
+
def populate_prompt(game_id):
|
121 |
+
prompt_text = list()
|
122 |
+
idlist = []
|
123 |
+
for j in range(21):
|
124 |
+
idlist.append(game_id + "-" + str(j))
|
125 |
+
|
126 |
+
|
127 |
+
results=vector_db.fetch(ids=idlist)
|
128 |
+
|
129 |
+
for ids in idlist:
|
130 |
+
prompt_text.append(results['vectors'][ids]["metadata"]["text"])
|
131 |
+
|
132 |
+
whole_prompt = ' '.join(prompt_text).strip()
|
133 |
+
return whole_prompt
|
134 |
+
|
135 |
+
|
136 |
+
def initialize_game(user_id, game_id):
|
137 |
+
whole_prompt = populate_prompt(game_id)
|
138 |
+
llm_prompt_op = call_gpt(whole_prompt)
|
139 |
+
#print(llm_prompt_op.choices[0]["message"]["content"])
|
140 |
+
fname="prompt_" + game_id + "_" + user_id + ".txt"
|
141 |
+
save_file(fname, llm_prompt_op.choices[0]["message"]["content"])
|
142 |
+
return llm_prompt_op.choices[0]["message"]["content"]
|
143 |
+
|
144 |
+
if __name__ == '__main__':
|
145 |
+
user_id='user_1'
|
146 |
+
game_id = '536e6bc89df5'
|
147 |
+
output = initialize_game(user_id, game_id)
|
148 |
+
print('\n\nGENESIS: %s' % output)
|
149 |
+
while True:
|
150 |
+
a = input('\n\n%s: ' % user_id)
|
151 |
+
output = start_game(game_id, user_id, user_input=a)
|
152 |
+
print('\n\nGENESIS: %s' % output)
|
gameload.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import dotenv
|
3 |
+
import openai
|
4 |
+
import pinecone
|
5 |
+
from langchain.document_loaders import Docx2txtLoader
|
6 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
7 |
+
import hashlib
|
8 |
+
from time import sleep
|
9 |
+
from helper import append_file
|
10 |
+
import json
|
11 |
+
|
12 |
+
## Read the environment variables
|
13 |
+
dotenv.load_dotenv('.env')
|
14 |
+
openai.api_key = os.getenv('OPENAI_API_KEY')
|
15 |
+
embedding_model = os.getenv('EMBEDDING_ENGINE')
|
16 |
+
debug_mode = os.getenv('DEBUG')
|
17 |
+
file_path = os.getenv('GAME_DOCS_FOLDER')
|
18 |
+
file_name = os.getenv('GAME_DOCS_FILE')
|
19 |
+
game_index = os.getenv('GAME_ID_INDEX')
|
20 |
+
|
21 |
+
pinecone_api_key = os.getenv('PINECONE_API_KEY')
|
22 |
+
pinecone_env = os.getenv('PINECONE_REGION')
|
23 |
+
pinecone_index = os.getenv('PINECONE_INDEX')
|
24 |
+
pinecone.init(
|
25 |
+
api_key=pinecone_api_key,
|
26 |
+
environment=pinecone_env
|
27 |
+
)
|
28 |
+
# check if index_name' index already exists (only create index if not)
|
29 |
+
if pinecone_index not in pinecone.list_indexes():
|
30 |
+
pinecone.create_index(pinecone_index, dimension=1536, metric="cosine", pods=1, pod_type="p1.x1")
|
31 |
+
sleep(3)
|
32 |
+
|
33 |
+
vector_db = pinecone.Index(pinecone_index)
|
34 |
+
|
35 |
+
|
36 |
+
def perform_embedding(doclist):
|
37 |
+
payload=list()
|
38 |
+
m = hashlib.md5()
|
39 |
+
# convert file_name to unique ID
|
40 |
+
m.update(file_name.encode('utf-8'))
|
41 |
+
game_id = m.hexdigest()[:12]
|
42 |
+
json_val = {"game_id":game_id, "game_file":file_name}
|
43 |
+
append_file(f"{file_path}/{game_index}",json.dumps(json_val))
|
44 |
+
|
45 |
+
for i in range(len(doclist)):
|
46 |
+
unique_id = game_id + "-" + str(i)
|
47 |
+
content = doclist[i].page_content
|
48 |
+
content = content.encode(encoding='ASCII',errors='ignore').decode()
|
49 |
+
response = openai.Embedding.create(model=embedding_model, input=content)
|
50 |
+
metadata = {'game_id': game_id, 'split_count': i, 'text': content}
|
51 |
+
vector = response['data'][0]['embedding']
|
52 |
+
payload.append((unique_id, vector, metadata))
|
53 |
+
|
54 |
+
return payload
|
55 |
+
|
56 |
+
|
57 |
+
def load_split_document():
|
58 |
+
loader = Docx2txtLoader(file_path + "/" + file_name)
|
59 |
+
word_doc_data = loader.load()
|
60 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
61 |
+
docs = text_splitter.split_documents(word_doc_data)
|
62 |
+
if debug_mode == 'True':
|
63 |
+
print("Total count of splits created: " + str(len(docs)))
|
64 |
+
return docs
|
65 |
+
|
66 |
+
def upload_game_docs():
|
67 |
+
docs = load_split_document()
|
68 |
+
payload = perform_embedding(docs)
|
69 |
+
vector_db.upsert(payload)
|
70 |
+
|
71 |
+
if __name__ == '__main__':
|
72 |
+
upload_game_docs()
|
helper.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
|
3 |
+
def open_file(filepath):
|
4 |
+
with open(filepath, 'r', encoding='utf-8') as infile:
|
5 |
+
return infile.read()
|
6 |
+
|
7 |
+
def save_file(filepath, content):
|
8 |
+
with open(filepath, 'w', encoding='utf-8') as outfile:
|
9 |
+
outfile.write(content)
|
10 |
+
|
11 |
+
def append_file(filepath, content):
|
12 |
+
with open(filepath, 'w', encoding='utf-8') as outfile:
|
13 |
+
outfile.write(content)
|
prompt_response.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<<PROMPT_VALUE>>
|
2 |
+
|
3 |
+
|
4 |
+
The following are the most relevant messages in the conversation:
|
5 |
+
<<CONVERSATION>>
|
6 |
+
|
7 |
+
<<USER_VAL>>:
|
8 |
+
<<USER_MSG>>
|
9 |
+
|
10 |
+
|
11 |
+
You will now provide a response, followed by a question:
|
12 |
+
GENESIS:
|