Spaces:
Sleeping
Sleeping
dinhquangson
commited on
Commit
•
9a04097
1
Parent(s):
668d00f
Update app.py
Browse files
app.py
CHANGED
@@ -207,7 +207,7 @@ def search(prompt: str):
|
|
207 |
generator = OpenAIGenerator(
|
208 |
api_key=Secret.from_env_var("OCTOAI_TOKEN"),
|
209 |
api_base_url="https://text.octoai.run/v1",
|
210 |
-
model="
|
211 |
generation_kwargs = {"max_tokens": 512}
|
212 |
)
|
213 |
metadata_extractor = QueryMetadataExtractor()
|
@@ -275,6 +275,12 @@ async def download_database():
|
|
275 |
# Return the zip file as a response for download
|
276 |
return FileResponse(zip_path, media_type='application/zip', filename='database.zip')
|
277 |
|
|
|
|
|
|
|
|
|
|
|
|
|
278 |
@app.post("/pdf2text/")
|
279 |
async def convert_upload_file(file: UploadFile = File(...)):
|
280 |
import pytesseract
|
@@ -296,13 +302,13 @@ async def convert_upload_file(file: UploadFile = File(...)):
|
|
296 |
for image in images:
|
297 |
ocr_text = pytesseract.image_to_string(image,lang='vie')
|
298 |
if first_page=="":
|
299 |
-
first_page = ocr_text
|
300 |
text=text+ocr_text+'\n'
|
301 |
|
302 |
client = OctoAI()
|
303 |
|
304 |
completion = client.text_gen.create_chat_completion(
|
305 |
-
model="
|
306 |
messages=[
|
307 |
ChatMessage(role="system", content="You are a helpful assistant."),
|
308 |
ChatMessage(role="user", content=first_page),
|
|
|
207 |
generator = OpenAIGenerator(
|
208 |
api_key=Secret.from_env_var("OCTOAI_TOKEN"),
|
209 |
api_base_url="https://text.octoai.run/v1",
|
210 |
+
model="meta-llama-3-8b-instruct",
|
211 |
generation_kwargs = {"max_tokens": 512}
|
212 |
)
|
213 |
metadata_extractor = QueryMetadataExtractor()
|
|
|
275 |
# Return the zip file as a response for download
|
276 |
return FileResponse(zip_path, media_type='application/zip', filename='database.zip')
|
277 |
|
278 |
+
def truncate_text(text: str) -> str:
|
279 |
+
if len(text) <= 3000:
|
280 |
+
return text
|
281 |
+
else:
|
282 |
+
return text[:3000]
|
283 |
+
|
284 |
@app.post("/pdf2text/")
|
285 |
async def convert_upload_file(file: UploadFile = File(...)):
|
286 |
import pytesseract
|
|
|
302 |
for image in images:
|
303 |
ocr_text = pytesseract.image_to_string(image,lang='vie')
|
304 |
if first_page=="":
|
305 |
+
first_page = truncate_text(ocr_text)
|
306 |
text=text+ocr_text+'\n'
|
307 |
|
308 |
client = OctoAI()
|
309 |
|
310 |
completion = client.text_gen.create_chat_completion(
|
311 |
+
model="meta-llama-3-8b-instruct",
|
312 |
messages=[
|
313 |
ChatMessage(role="system", content="You are a helpful assistant."),
|
314 |
ChatMessage(role="user", content=first_page),
|