dinhquangson commited on
Commit
9a04097
1 Parent(s): 668d00f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -3
app.py CHANGED
@@ -207,7 +207,7 @@ def search(prompt: str):
207
  generator = OpenAIGenerator(
208
  api_key=Secret.from_env_var("OCTOAI_TOKEN"),
209
  api_base_url="https://text.octoai.run/v1",
210
- model="mixtral-8x22b-finetuned",
211
  generation_kwargs = {"max_tokens": 512}
212
  )
213
  metadata_extractor = QueryMetadataExtractor()
@@ -275,6 +275,12 @@ async def download_database():
275
  # Return the zip file as a response for download
276
  return FileResponse(zip_path, media_type='application/zip', filename='database.zip')
277
 
 
 
 
 
 
 
278
  @app.post("/pdf2text/")
279
  async def convert_upload_file(file: UploadFile = File(...)):
280
  import pytesseract
@@ -296,13 +302,13 @@ async def convert_upload_file(file: UploadFile = File(...)):
296
  for image in images:
297
  ocr_text = pytesseract.image_to_string(image,lang='vie')
298
  if first_page=="":
299
- first_page = ocr_text
300
  text=text+ocr_text+'\n'
301
 
302
  client = OctoAI()
303
 
304
  completion = client.text_gen.create_chat_completion(
305
- model="mixtral-8x22b-finetuned",
306
  messages=[
307
  ChatMessage(role="system", content="You are a helpful assistant."),
308
  ChatMessage(role="user", content=first_page),
 
207
  generator = OpenAIGenerator(
208
  api_key=Secret.from_env_var("OCTOAI_TOKEN"),
209
  api_base_url="https://text.octoai.run/v1",
210
+ model="meta-llama-3-8b-instruct",
211
  generation_kwargs = {"max_tokens": 512}
212
  )
213
  metadata_extractor = QueryMetadataExtractor()
 
275
  # Return the zip file as a response for download
276
  return FileResponse(zip_path, media_type='application/zip', filename='database.zip')
277
 
278
+ def truncate_text(text: str) -> str:
279
+ if len(text) <= 3000:
280
+ return text
281
+ else:
282
+ return text[:3000]
283
+
284
  @app.post("/pdf2text/")
285
  async def convert_upload_file(file: UploadFile = File(...)):
286
  import pytesseract
 
302
  for image in images:
303
  ocr_text = pytesseract.image_to_string(image,lang='vie')
304
  if first_page=="":
305
+ first_page = truncate_text(ocr_text)
306
  text=text+ocr_text+'\n'
307
 
308
  client = OctoAI()
309
 
310
  completion = client.text_gen.create_chat_completion(
311
+ model="meta-llama-3-8b-instruct",
312
  messages=[
313
  ChatMessage(role="system", content="You are a helpful assistant."),
314
  ChatMessage(role="user", content=first_page),