Spaces:
Runtime error
Runtime error
Soufiane
commited on
Commit
•
8565879
1
Parent(s):
1ce10d7
initial
Browse files- app.py +57 -0
- doc.py +64 -0
- invoice.py +116 -0
app.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from PIL import Image, ImageOps
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
from invoice import extract_data, extract_tables, INVOICE
|
6 |
+
import cv2
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
def process_image(lang, to_be_extracted, input_image):
|
11 |
+
data = extract_data(lang, to_be_extracted, input_image)
|
12 |
+
return data
|
13 |
+
|
14 |
+
def main():
|
15 |
+
st.title("Image Processing App")
|
16 |
+
st.write("Upload an image and click the 'Process Image' button to process it.")
|
17 |
+
uploaded_image = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png","webp"])
|
18 |
+
|
19 |
+
if uploaded_image is not None:
|
20 |
+
# Display the uploaded image
|
21 |
+
st.image(uploaded_image, caption="Uploaded Image", use_column_width=True)
|
22 |
+
|
23 |
+
lang = st.selectbox("Select Language", ["french", "english", "arabic"])
|
24 |
+
|
25 |
+
# UI for adding elements to extract list
|
26 |
+
st.write("Add elements to extract:")
|
27 |
+
extract_input = st.text_input("Add elements")
|
28 |
+
extract_list = st.session_state.get("extract_list", INVOICE)
|
29 |
+
if extract_input:
|
30 |
+
extract_list.append(extract_input.strip())
|
31 |
+
st.session_state["extract_list"] = extract_list
|
32 |
+
|
33 |
+
# Display the extract list as chips
|
34 |
+
st.write("Elements to extract:")
|
35 |
+
for item in extract_list:
|
36 |
+
st.write(f"`{item}`", unsafe_allow_html=True)
|
37 |
+
|
38 |
+
if st.button("Extract information"):
|
39 |
+
pil_image = Image.open(uploaded_image).convert('RGB')
|
40 |
+
numpy_image = np.array(pil_image)
|
41 |
+
image_info = process_image(lang, extract_list, numpy_image)
|
42 |
+
|
43 |
+
df = pd.DataFrame(list(image_info.items()), columns=["Field", "Value"])
|
44 |
+
st.write("Extracted information:")
|
45 |
+
st.dataframe(df)
|
46 |
+
|
47 |
+
if st.button("Extract Tables"):
|
48 |
+
df = pd.DataFrame([])
|
49 |
+
csv = df.to_csv(index=False, header=False)
|
50 |
+
st.download_button(label="Download CSV", data=csv, file_name='data.csv', mime='text/csv')
|
51 |
+
|
52 |
+
|
53 |
+
else:
|
54 |
+
st.session_state['extract_list'] = INVOICE
|
55 |
+
|
56 |
+
if __name__ == "__main__":
|
57 |
+
main()
|
doc.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import base64
|
3 |
+
from utils import *
|
4 |
+
|
5 |
+
CIN = ["Nom complet", "Date de naissance", "Date de validité", "Lieu de naissance", "Numéro CIN"]
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
def extract_face(image, scale_factor=1.2):
|
10 |
+
# Load the pre-trained face detector
|
11 |
+
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_alt.xml')
|
12 |
+
|
13 |
+
|
14 |
+
# Convert the image to grayscale
|
15 |
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
16 |
+
|
17 |
+
# Detect faces in the image
|
18 |
+
faces = face_cascade.detectMultiScale(gray, scaleFactor=scale_factor, minNeighbors=5, minSize=(30, 30))
|
19 |
+
print(len(faces))
|
20 |
+
if len(faces) == 0:
|
21 |
+
print("No faces found")
|
22 |
+
return None
|
23 |
+
|
24 |
+
# Assume only one face in the image for simplicity
|
25 |
+
(x, y, w, h) = max(faces, key=lambda x: x[2])
|
26 |
+
|
27 |
+
# Expand the bounding box to include a slightly larger region
|
28 |
+
x -= int(0.1 * w)
|
29 |
+
y -= int(0.1 * h)
|
30 |
+
w += int(0.2 * w)
|
31 |
+
h += int(0.2 * h)
|
32 |
+
|
33 |
+
# Ensure the coordinates are within the image boundaries
|
34 |
+
x = max(x, 0)
|
35 |
+
y = max(y, 0)
|
36 |
+
w = min(w, image.shape[1])
|
37 |
+
h = min(h, image.shape[0])
|
38 |
+
|
39 |
+
# Extract the face region from the image
|
40 |
+
face = image[y:y+h, x:x+w]
|
41 |
+
|
42 |
+
# Convert the face to base64 string
|
43 |
+
_, encoded_image = cv2.imencode('.jpg', face)
|
44 |
+
encoded_image_str = base64.b64encode(encoded_image).decode('utf-8')
|
45 |
+
|
46 |
+
# Create a dictionary to store image information
|
47 |
+
image_info = {
|
48 |
+
"width": face.shape[1],
|
49 |
+
"height": face.shape[0],
|
50 |
+
"data": encoded_image_str
|
51 |
+
}
|
52 |
+
|
53 |
+
return image_info
|
54 |
+
|
55 |
+
|
56 |
+
if __name__ == '__main__':
|
57 |
+
lang = "french"
|
58 |
+
to_be_extracted = CIN
|
59 |
+
image_path = "./docs for ocr/CIN 2.png"
|
60 |
+
image = cv2.imread(image_path)
|
61 |
+
|
62 |
+
text_data = extract_data(lang, to_be_extracted, image)
|
63 |
+
print(text_data)
|
64 |
+
face_data = extract_face(image)
|
invoice.py
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
from PIL import Image
|
3 |
+
# from ultralyticsplus import YOLO
|
4 |
+
# from transformers import pipeline
|
5 |
+
import pandas as pd
|
6 |
+
import numpy as np
|
7 |
+
import easyocr
|
8 |
+
from utils import *
|
9 |
+
|
10 |
+
INVOICE = ["Numéro de facture", "Date", "Numéro de commande", "Echéance", "Total"]
|
11 |
+
|
12 |
+
# model = YOLO('keremberke/yolov8s-table-extraction')
|
13 |
+
# model.overrides['conf'] = 0.25 # NMS confidence threshold
|
14 |
+
# model.overrides['iou'] = 0.45 # NMS IoU threshold
|
15 |
+
# model.overrides['agnostic_nms'] = False # NMS class-agnostic
|
16 |
+
# model.overrides['max_det'] = 1000 # maximum number of detections per image
|
17 |
+
|
18 |
+
# pipe = pipeline("object-detection", model="bilguun/table-transformer-structure-recognition")
|
19 |
+
|
20 |
+
|
21 |
+
def detect_tables(image):
|
22 |
+
# image is an np array
|
23 |
+
results = model.predict(image)
|
24 |
+
|
25 |
+
|
26 |
+
result = results[0]
|
27 |
+
xyxy = result.boxes.xyxy
|
28 |
+
scores = result.boxes.conf
|
29 |
+
tables = []
|
30 |
+
for i in range(len(scores)):
|
31 |
+
if scores[i] >= 0.5:
|
32 |
+
table = image[int(xyxy[i,1]):int(xyxy[i,3]), int(xyxy[i,0]):int(xyxy[i,2])]
|
33 |
+
table = Image.fromarray(table)
|
34 |
+
tables.append(table)
|
35 |
+
return tables
|
36 |
+
|
37 |
+
def insert(el, listt, pos):
|
38 |
+
if not listt:
|
39 |
+
listt.append(el)
|
40 |
+
else:
|
41 |
+
inserted = False
|
42 |
+
for i in range(len(listt)):
|
43 |
+
if el[pos] <= listt[i][pos]:
|
44 |
+
listt.insert(i, el)
|
45 |
+
inserted = True
|
46 |
+
break
|
47 |
+
if not inserted:
|
48 |
+
listt.append(el)
|
49 |
+
|
50 |
+
def rec_table(table, reader):
|
51 |
+
col_row = pipe(table)
|
52 |
+
cols = []
|
53 |
+
rows = []
|
54 |
+
for el in col_row:
|
55 |
+
if el["label"] == 'table column':
|
56 |
+
insert(el["box"], cols, pos = "xmin")
|
57 |
+
elif el["label"] == 'table row':
|
58 |
+
insert(el["box"], rows, pos = "ymin")
|
59 |
+
|
60 |
+
table = np.array(table)
|
61 |
+
|
62 |
+
csv = []
|
63 |
+
for row in rows:
|
64 |
+
temp = []
|
65 |
+
for col in cols:
|
66 |
+
box = intersection(row, col)
|
67 |
+
cell = table[box['ymin']:box['ymax'], box['xmin']:box['xmax']]
|
68 |
+
res = get_ocr(cell,reader)
|
69 |
+
temp.append(get_input(res))
|
70 |
+
csv.append(temp)
|
71 |
+
|
72 |
+
df = pd.DataFrame(csv)
|
73 |
+
return df
|
74 |
+
|
75 |
+
|
76 |
+
def intersection(box1, box2):
|
77 |
+
# Extract coordinates of first bounding box
|
78 |
+
x1min, y1min, x1max, y1max = box1['xmin'], box1['ymin'], box1['xmax'], box1['ymax']
|
79 |
+
|
80 |
+
# Extract coordinates of second bounding box
|
81 |
+
x2min, y2min, x2max, y2max = box2['xmin'], box2['ymin'], box2['xmax'], box2['ymax']
|
82 |
+
|
83 |
+
# Calculate coordinates of intersection
|
84 |
+
xmin = max(x1min, x2min)
|
85 |
+
ymin = max(y1min, y2min)
|
86 |
+
xmax = min(x1max, x2max)
|
87 |
+
ymax = min(y1max, y2max)
|
88 |
+
|
89 |
+
# Check if there is no intersection
|
90 |
+
if xmin >= xmax or ymin >= ymax:
|
91 |
+
return None
|
92 |
+
|
93 |
+
# Return the coordinates of the intersection
|
94 |
+
return {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}
|
95 |
+
|
96 |
+
|
97 |
+
def extract_tables(lang, image):
|
98 |
+
reader = easyocr.Reader([langs[lang]])
|
99 |
+
tables = detect_tables(image)
|
100 |
+
|
101 |
+
for i in range(len(tables)):
|
102 |
+
df = rec_table(tables[i], reader)
|
103 |
+
df.to_excel(f'table_{i+1}.xlsx', index=False, header=False)
|
104 |
+
|
105 |
+
if __name__ == '__main__':
|
106 |
+
lang = "french"
|
107 |
+
to_be_extracted = INVOICE
|
108 |
+
image_path = "./docs for ocr/invoices/facture.png"
|
109 |
+
image = cv2.imread(image_path)
|
110 |
+
print(image.shape)
|
111 |
+
|
112 |
+
text_data = extract_data(lang, to_be_extracted, image)
|
113 |
+
print(text_data)
|
114 |
+
|
115 |
+
# extract_tables(lang, image) # extract tables from the image and download them in excel format to the current directory
|
116 |
+
|