QDrantRAG9

Sleeping

dinhquangson commited on Jun 28

Commit

9a04097

•

1 Parent(s): 668d00f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -207,7 +207,7 @@ def search(prompt: str):
     generator = OpenAIGenerator(
         api_key=Secret.from_env_var("OCTOAI_TOKEN"),
         api_base_url="https://text.octoai.run/v1",
-        model="mixtral-8x22b-finetuned",
         generation_kwargs = {"max_tokens": 512}
     )
     metadata_extractor = QueryMetadataExtractor()
@@ -275,6 +275,12 @@ async def download_database():
     # Return the zip file as a response for download
     return FileResponse(zip_path, media_type='application/zip', filename='database.zip')
 @app.post("/pdf2text/")
 async def convert_upload_file(file: UploadFile = File(...)):
     import pytesseract
@@ -296,13 +302,13 @@ async def convert_upload_file(file: UploadFile = File(...)):
     for image in images:
         ocr_text = pytesseract.image_to_string(image,lang='vie')
         if first_page=="":
-            first_page = ocr_text
         text=text+ocr_text+'\n'
     client = OctoAI()
     completion = client.text_gen.create_chat_completion(
-        model="mixtral-8x22b-finetuned",
         messages=[
             ChatMessage(role="system", content="You are a helpful assistant."),
             ChatMessage(role="user", content=first_page),

     generator = OpenAIGenerator(
         api_key=Secret.from_env_var("OCTOAI_TOKEN"),
         api_base_url="https://text.octoai.run/v1",
+        model="meta-llama-3-8b-instruct",
         generation_kwargs = {"max_tokens": 512}
     )
     metadata_extractor = QueryMetadataExtractor()
     # Return the zip file as a response for download
     return FileResponse(zip_path, media_type='application/zip', filename='database.zip')
+def truncate_text(text: str) -> str:
+    if len(text) <= 3000:
+        return text
+    else:
+        return text[:3000]
 @app.post("/pdf2text/")
 async def convert_upload_file(file: UploadFile = File(...)):
     import pytesseract
     for image in images:
         ocr_text = pytesseract.image_to_string(image,lang='vie')
         if first_page=="":
+            first_page = truncate_text(ocr_text)
         text=text+ocr_text+'\n'
     client = OctoAI()
     completion = client.text_gen.create_chat_completion(
+        model="meta-llama-3-8b-instruct",
         messages=[
             ChatMessage(role="system", content="You are a helpful assistant."),
             ChatMessage(role="user", content=first_page),