sussahoo commited on
Commit
a310b30
·
1 Parent(s): 09cab40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -5
app.py CHANGED
@@ -369,9 +369,35 @@ def create_dataframe(cells_pytess_result: list, max_cols: int, max_rows: int, cs
369
 
370
  return df
371
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372
 
373
  def process_image(image):
374
- TD_THRESHOLD = 0.9
375
  TSR_THRESHOLD = 0.8
376
  padd_top = 100
377
  padd_left = 100
@@ -424,10 +450,8 @@ def process_image(image):
424
  csv_path = "/content/sample_data/table_" + str(idx)
425
  df = create_dataframe(sequential_cell_img_list, max_cols, max_rows, csv_path)
426
  result.append(df)
427
- res = result[0].rename(columns={'Item': 'name', 'Total Cost': 'amount'})[["name", "amount"]]
428
- res["cost Code"] = ""
429
- res = {"items": res.to_json(orient='records')}
430
- return res
431
 
432
 
433
  title = "Interactive demo OCR: microsoft - table-transformer-detection + tesseract"
 
369
 
370
  return df
371
 
372
+ def postprocess_dataframes(result_tables):
373
+ """
374
+ Normalize column names
375
+ """
376
+ # df.columns = [col.replace('|', '') for col in df.columns]
377
+ res = {}
378
+ for idx, table_df in enumerate(result):
379
+ result_df = pd.DataFrame()
380
+ for col in table_df.columns:
381
+ if col.lower().startswith("item"):
382
+ result_df["name"] = table_df[col].copy()
383
+ if (
384
+ col.lower().startswith("total")
385
+ or col.lower().startswith("amount")
386
+ or col.lower().startswith("cost")
387
+ ):
388
+ result_df["amount"] = table_df[col].copy()
389
+ print(result_df.columns)
390
+ if len(result_df.columns) == 0:
391
+ result_df["name"] = table_df.iloc[:, 0].copy()
392
+ result_df["amount"] = table_df.iloc[:, 1].copy()
393
+
394
+ result_df["cost_code"] = ""
395
+ res["Table1" + str(idx)] = result_df.to_json(orient="records")
396
+ return res
397
+
398
 
399
  def process_image(image):
400
+ TD_THRESHOLD = 0.7
401
  TSR_THRESHOLD = 0.8
402
  padd_top = 100
403
  padd_left = 100
 
450
  csv_path = "/content/sample_data/table_" + str(idx)
451
  df = create_dataframe(sequential_cell_img_list, max_cols, max_rows, csv_path)
452
  result.append(df)
453
+ output = postprocess_dataframes(result)
454
+ return output
 
 
455
 
456
 
457
  title = "Interactive demo OCR: microsoft - table-transformer-detection + tesseract"