DSatishchandra commited on
Commit
c4a3be0
·
verified ·
1 Parent(s): 310e3a8

Update parse_bhel.py

Browse files
Files changed (1) hide show
  1. parse_bhel.py +6 -2
parse_bhel.py CHANGED
@@ -13,8 +13,12 @@ def parse_bhel_pdf(pdf_path):
13
  table = page.extract_table()
14
  if table:
15
  for row in table[1:]: # Skip header row
16
- data.append(row)
 
 
 
 
17
 
18
- # Create a DataFrame
19
  df = pd.DataFrame(data, columns=columns)
20
  return df
 
13
  table = page.extract_table()
14
  if table:
15
  for row in table[1:]: # Skip header row
16
+ # Only add rows that have exactly 10 columns
17
+ if len(row) == 10:
18
+ data.append(row)
19
+ else:
20
+ print(f"Skipping row due to column mismatch: {row}")
21
 
22
+ # Create a DataFrame with the specified columns
23
  df = pd.DataFrame(data, columns=columns)
24
  return df