Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -369,9 +369,35 @@ def create_dataframe(cells_pytess_result: list, max_cols: int, max_rows: int, cs
|
|
369 |
|
370 |
return df
|
371 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
372 |
|
373 |
def process_image(image):
|
374 |
-
TD_THRESHOLD = 0.
|
375 |
TSR_THRESHOLD = 0.8
|
376 |
padd_top = 100
|
377 |
padd_left = 100
|
@@ -424,10 +450,8 @@ def process_image(image):
|
|
424 |
csv_path = "/content/sample_data/table_" + str(idx)
|
425 |
df = create_dataframe(sequential_cell_img_list, max_cols, max_rows, csv_path)
|
426 |
result.append(df)
|
427 |
-
|
428 |
-
|
429 |
-
res = {"items": res.to_json(orient='records')}
|
430 |
-
return res
|
431 |
|
432 |
|
433 |
title = "Interactive demo OCR: microsoft - table-transformer-detection + tesseract"
|
|
|
369 |
|
370 |
return df
|
371 |
|
372 |
+
def postprocess_dataframes(result_tables):
|
373 |
+
"""
|
374 |
+
Normalize column names
|
375 |
+
"""
|
376 |
+
# df.columns = [col.replace('|', '') for col in df.columns]
|
377 |
+
res = {}
|
378 |
+
for idx, table_df in enumerate(result):
|
379 |
+
result_df = pd.DataFrame()
|
380 |
+
for col in table_df.columns:
|
381 |
+
if col.lower().startswith("item"):
|
382 |
+
result_df["name"] = table_df[col].copy()
|
383 |
+
if (
|
384 |
+
col.lower().startswith("total")
|
385 |
+
or col.lower().startswith("amount")
|
386 |
+
or col.lower().startswith("cost")
|
387 |
+
):
|
388 |
+
result_df["amount"] = table_df[col].copy()
|
389 |
+
print(result_df.columns)
|
390 |
+
if len(result_df.columns) == 0:
|
391 |
+
result_df["name"] = table_df.iloc[:, 0].copy()
|
392 |
+
result_df["amount"] = table_df.iloc[:, 1].copy()
|
393 |
+
|
394 |
+
result_df["cost_code"] = ""
|
395 |
+
res["Table1" + str(idx)] = result_df.to_json(orient="records")
|
396 |
+
return res
|
397 |
+
|
398 |
|
399 |
def process_image(image):
|
400 |
+
TD_THRESHOLD = 0.7
|
401 |
TSR_THRESHOLD = 0.8
|
402 |
padd_top = 100
|
403 |
padd_left = 100
|
|
|
450 |
csv_path = "/content/sample_data/table_" + str(idx)
|
451 |
df = create_dataframe(sequential_cell_img_list, max_cols, max_rows, csv_path)
|
452 |
result.append(df)
|
453 |
+
output = postprocess_dataframes(result)
|
454 |
+
return output
|
|
|
|
|
455 |
|
456 |
|
457 |
title = "Interactive demo OCR: microsoft - table-transformer-detection + tesseract"
|