Daniel Marques commited on
Commit
dc8d635
·
1 Parent(s): b2d865e

fix: add console trupple

Browse files
Files changed (4) hide show
  1. .flake8 +0 -4
  2. load_models.py +8 -1
  3. main.py +2 -11
  4. requirements.txt +0 -1
.flake8 DELETED
@@ -1,4 +0,0 @@
1
- [flake8]
2
- exclude = docs
3
- max-line-length = 119
4
- extend-ignore = E203
 
 
 
 
 
load_models.py CHANGED
@@ -204,6 +204,8 @@ def load_model(device_type, model_id, model_basename=None, LOGGING=logging, stre
204
 
205
  streamer = TextStreamer(tokenizer, skip_prompt=True)
206
 
 
 
207
  pipe = pipeline(
208
  "text-generation",
209
  model=model,
@@ -220,4 +222,9 @@ def load_model(device_type, model_id, model_basename=None, LOGGING=logging, stre
220
  local_llm = HuggingFacePipeline(pipeline=pipe)
221
  logging.info("Local LLM Loaded")
222
 
223
- return local_llm, streamer
 
 
 
 
 
 
204
 
205
  streamer = TextStreamer(tokenizer, skip_prompt=True)
206
 
207
+ logging.info(streamer)
208
+
209
  pipe = pipeline(
210
  "text-generation",
211
  model=model,
 
222
  local_llm = HuggingFacePipeline(pipeline=pipe)
223
  logging.info("Local LLM Loaded")
224
 
225
+ generated_text = ""
226
+ for new_text in streamer:
227
+ generated_text += new_text
228
+ print(generated_text)
229
+
230
+ return local_llm
main.py CHANGED
@@ -1,7 +1,6 @@
1
  from fastapi import FastAPI, HTTPException, UploadFile, WebSocket
2
  from fastapi.staticfiles import StaticFiles
3
 
4
-
5
  from pydantic import BaseModel
6
  import os
7
  import glob
@@ -14,7 +13,6 @@ from langchain.embeddings import HuggingFaceInstructEmbeddings
14
  from langchain.prompts import PromptTemplate
15
  from langchain.memory import ConversationBufferMemory
16
 
17
-
18
  # from langchain.embeddings import HuggingFaceEmbeddings
19
  from load_models import load_model
20
 
@@ -44,11 +42,7 @@ DB = Chroma(
44
 
45
  RETRIEVER = DB.as_retriever()
46
 
47
- models = load_model(device_type=DEVICE_TYPE, model_id=MODEL_ID, model_basename=MODEL_BASENAME, stream=False)
48
-
49
- print(models)
50
-
51
- LLM, STREAMER = models
52
 
53
  template = """Your name is Katara and you are a helpful, respectful and honest assistant. You should only use the source documents provided to answer the questions.
54
  You should only respond only topics that contains in documents use to training.
@@ -186,10 +180,7 @@ async def predict(data: Predict):
186
  )
187
 
188
 
189
- generated_text = ""
190
- for new_text in STREAMER:
191
- generated_text += new_text
192
- print(generated_text)
193
 
194
  return {"response": prompt_response_dict}
195
  else:
 
1
  from fastapi import FastAPI, HTTPException, UploadFile, WebSocket
2
  from fastapi.staticfiles import StaticFiles
3
 
 
4
  from pydantic import BaseModel
5
  import os
6
  import glob
 
13
  from langchain.prompts import PromptTemplate
14
  from langchain.memory import ConversationBufferMemory
15
 
 
16
  # from langchain.embeddings import HuggingFaceEmbeddings
17
  from load_models import load_model
18
 
 
42
 
43
  RETRIEVER = DB.as_retriever()
44
 
45
+ LLM = load_model(device_type=DEVICE_TYPE, model_id=MODEL_ID, model_basename=MODEL_BASENAME, stream=False)
 
 
 
 
46
 
47
  template = """Your name is Katara and you are a helpful, respectful and honest assistant. You should only use the source documents provided to answer the questions.
48
  You should only respond only topics that contains in documents use to training.
 
180
  )
181
 
182
 
183
+
 
 
 
184
 
185
  return {"response": prompt_response_dict}
186
  else:
requirements.txt CHANGED
@@ -24,7 +24,6 @@ accelerate
24
  bitsandbytes ; sys_platform != 'win32'
25
  bitsandbytes-windows ; sys_platform == 'win32'
26
  click
27
- flask
28
  requests
29
  uvicorn
30
  fastapi
 
24
  bitsandbytes ; sys_platform != 'win32'
25
  bitsandbytes-windows ; sys_platform == 'win32'
26
  click
 
27
  requests
28
  uvicorn
29
  fastapi