DSatishchandra commited on
Commit
310e3a8
·
verified ·
1 Parent(s): 23d889a

Update parse_toshiba.py

Browse files
Files changed (1) hide show
  1. parse_toshiba.py +6 -2
parse_toshiba.py CHANGED
@@ -14,8 +14,12 @@ def parse_toshiba_pdf(pdf_path):
14
  table = page.extract_table()
15
  if table:
16
  for row in table[1:]: # Skip header row
17
- data.append(row)
 
 
 
 
18
 
19
- # Create a DataFrame
20
  df = pd.DataFrame(data, columns=columns)
21
  return df
 
14
  table = page.extract_table()
15
  if table:
16
  for row in table[1:]: # Skip header row
17
+ # Only add rows that have exactly 13 columns
18
+ if len(row) == 13:
19
+ data.append(row)
20
+ else:
21
+ print(f"Skipping row due to column mismatch: {row}")
22
 
23
+ # Create a DataFrame with the specified columns
24
  df = pd.DataFrame(data, columns=columns)
25
  return df