Spaces:
Sleeping
Sleeping
dinhquangson
commited on
Commit
•
5979d39
1
Parent(s):
ea5868d
Update app.py
Browse files
app.py
CHANGED
@@ -284,27 +284,29 @@ async def convert_upload_file(file: UploadFile = File(...)):
|
|
284 |
images = convert_from_path(file_savePath)
|
285 |
|
286 |
text=""
|
|
|
287 |
|
288 |
# Extract text from images
|
289 |
for image in images:
|
290 |
ocr_text = pytesseract.image_to_string(image,lang='vie')
|
|
|
|
|
291 |
text=text+ocr_text+'\n'
|
292 |
|
293 |
client = OctoAI()
|
294 |
|
295 |
completion = client.text_gen.create_chat_completion(
|
296 |
-
model="
|
297 |
messages=[
|
298 |
ChatMessage(role="system", content="You are a helpful assistant."),
|
299 |
-
ChatMessage(role="user", content=
|
300 |
],
|
301 |
-
max_tokens=
|
302 |
presence_penalty=0,
|
303 |
temperature=0.1,
|
304 |
top_p=0.9,
|
305 |
response_format=ChatCompletionResponseFormat(
|
306 |
type="json_object",
|
307 |
-
max_tokens=65536,
|
308 |
schema=Law.model_json_schema(),
|
309 |
),
|
310 |
)
|
|
|
284 |
images = convert_from_path(file_savePath)
|
285 |
|
286 |
text=""
|
287 |
+
first_page = ""
|
288 |
|
289 |
# Extract text from images
|
290 |
for image in images:
|
291 |
ocr_text = pytesseract.image_to_string(image,lang='vie')
|
292 |
+
if first_page=="":
|
293 |
+
first_page = ocr_text
|
294 |
text=text+ocr_text+'\n'
|
295 |
|
296 |
client = OctoAI()
|
297 |
|
298 |
completion = client.text_gen.create_chat_completion(
|
299 |
+
model="meta-llama-3-8b-instruct",
|
300 |
messages=[
|
301 |
ChatMessage(role="system", content="You are a helpful assistant."),
|
302 |
+
ChatMessage(role="user", content=first_page),
|
303 |
],
|
304 |
+
max_tokens=8192,
|
305 |
presence_penalty=0,
|
306 |
temperature=0.1,
|
307 |
top_p=0.9,
|
308 |
response_format=ChatCompletionResponseFormat(
|
309 |
type="json_object",
|
|
|
310 |
schema=Law.model_json_schema(),
|
311 |
),
|
312 |
)
|