ITSAIDI commited on
Commit
37ac053
1 Parent(s): a14278e
Files changed (2) hide show
  1. App.py +2 -161
  2. utilitis.py +161 -0
App.py CHANGED
@@ -1,165 +1,6 @@
1
  import streamlit as st
2
- from paddleocr import PaddleOCR
3
- from PIL import Image, ImageDraw, ImageFont
4
- import torch
5
- from transformers import AutoProcessor,LayoutLMv3ForTokenClassification
6
- import numpy as np
7
-
8
-
9
- model_Hugging_path = "Noureddinesa/Output_LayoutLMv3_v2"
10
-
11
-
12
- #############################################################################
13
- #############################################################################
14
- def Labels():
15
- labels = ['InvNum', 'InvDate', 'Fourni', 'TTC', 'TVA', 'TT', 'Autre']
16
- id2label = {v: k for v, k in enumerate(labels)}
17
- label2id = {k: v for v, k in enumerate(labels)}
18
- return id2label, label2id
19
-
20
- #############################################################################
21
- #############################################################################
22
- def Paddle():
23
- ocr = PaddleOCR(use_angle_cls=False,lang='fr',rec=False)
24
- return ocr
25
-
26
- def processbbox(BBOX, width, height):
27
- bbox = []
28
- bbox.append(BBOX[0][0])
29
- bbox.append(BBOX[0][1])
30
- bbox.append(BBOX[2][0])
31
- bbox.append(BBOX[2][1])
32
- #Scaling
33
- bbox[0]= 1000*bbox[0]/width # X1
34
- bbox[1]= 1000*bbox[1]/height # Y1
35
- bbox[2]= 1000*bbox[2]/width # X2
36
- bbox[3]= 1000*bbox[3]/height # Y2
37
- for i in range(4):
38
- bbox[i] = int(bbox[i])
39
- return bbox
40
-
41
-
42
- def Preprocess(image):
43
- image_array = np.array(image)
44
- ocr = Paddle()
45
- width, height = image.size
46
- results = ocr.ocr(image_array, cls=True)
47
- results = results[0]
48
- test_dict = {'image': image ,'tokens':[], "bboxes":[]}
49
- for item in results :
50
- bbox = processbbox(item[0], width, height)
51
- test_dict['tokens'].append(item[1][0])
52
- test_dict['bboxes'].append(bbox)
53
-
54
- print(test_dict['bboxes'])
55
- print(test_dict['tokens'])
56
- return test_dict
57
-
58
- #############################################################################
59
- #############################################################################
60
- def Encode(image):
61
- example = Preprocess(image)
62
- image = example["image"]
63
- words = example["tokens"]
64
- boxes = example["bboxes"]
65
- processor = AutoProcessor.from_pretrained(model_Hugging_path, apply_ocr=False)
66
- encoding = processor(image, words, boxes=boxes,return_offsets_mapping=True,truncation=True, max_length=512, padding="max_length", return_tensors="pt")
67
- offset_mapping = encoding.pop('offset_mapping')
68
- return encoding, offset_mapping,words
69
-
70
- def unnormalize_box(bbox, width, height):
71
- return [
72
- width * (bbox[0] / 1000),
73
- height * (bbox[1] / 1000),
74
- width * (bbox[2] / 1000),
75
- height * (bbox[3] / 1000),
76
- ]
77
-
78
- def Run_model(image):
79
- encoding,offset_mapping,words = Encode(image)
80
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
81
- # load the fine-tuned model from the hub
82
- model = LayoutLMv3ForTokenClassification.from_pretrained(model_Hugging_path)
83
- model.to(device)
84
- # forward pass
85
- outputs = model(**encoding)
86
-
87
- predictions = outputs.logits.argmax(-1).squeeze().tolist()
88
- token_boxes = encoding.bbox.squeeze().tolist()
89
-
90
- width, height = image.size
91
-
92
- id2label, _ = Labels()
93
- is_subword = np.array(offset_mapping.squeeze().tolist())[:,0] != 0
94
- true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
95
- true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]
96
- return true_predictions,true_boxes,words
97
-
98
-
99
- def Get_Json(true_predictions,words):
100
- Results = {}
101
- i = 0
102
- for prd in true_predictions:
103
- if prd in ['InvNum','Fourni', 'InvDate','TT','TTC','TVA']:
104
- #print(i,prd,words[i-1])
105
- Results[prd] = words[i-1]
106
- i+=1
107
- return Results
108
-
109
-
110
- def Draw(image):
111
- true_predictions, true_boxes,words = Run_model(image)
112
- draw = ImageDraw.Draw(image)
113
-
114
- label2color = {
115
- 'InvNum': 'blue',
116
- 'InvDate': 'green',
117
- 'Fourni': 'orange',
118
- 'TTC':'purple',
119
- 'TVA': 'magenta',
120
- 'TT': 'red',
121
- 'Autre': 'black'
122
- }
123
-
124
- # Adjust the thickness of the rectangle outline and label text position
125
- rectangle_thickness = 4
126
- label_x_offset = 20
127
- label_y_offset = -40
128
-
129
- # Custom font size
130
- custom_font_size = 25
131
-
132
- # Load a font with the custom size
133
- font_path = "arial.ttf" # Specify the path to your font file
134
- custom_font = ImageFont.truetype(font_path, custom_font_size)
135
-
136
- for prediction, box in zip(true_predictions, true_boxes):
137
- predicted_label = prediction
138
- # Check if the predicted label exists in the label2color dictionary
139
- if predicted_label in label2color:
140
- color = label2color[predicted_label]
141
- else:
142
- color = 'black' # Default color if label is not found
143
- if predicted_label != "Autre":
144
- draw.rectangle(box, outline=color, width=rectangle_thickness)
145
- # Draw text using the custom font and size
146
-
147
- draw.rectangle((box[0], box[1]+ label_y_offset,box[2],box[3]+ label_y_offset), fill=color)
148
- draw.text((box[0] + label_x_offset, box[1] + label_y_offset), text=predicted_label, fill='white', font=custom_font)
149
-
150
- # Get the Results Json File
151
- Results = Get_Json(true_predictions,words)
152
-
153
- return image,Results
154
-
155
-
156
- def Add_Results(data):
157
- # Render the table
158
- for key, value in data.items():
159
- data[key] = st.sidebar.text_input(key, value)
160
-
161
- #############################################################################
162
- #############################################################################
163
 
164
  st.markdown("### Drag and Drop Images Here:")
165
  st.write("(PNG, JPG, JPEG)")
 
1
  import streamlit as st
2
+ from utilitis import Draw,Add_Results
3
+ from PIL import Image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  st.markdown("### Drag and Drop Images Here:")
6
  st.write("(PNG, JPG, JPEG)")
utilitis.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from paddleocr import PaddleOCR
3
+ from PIL import ImageDraw, ImageFont
4
+ import torch
5
+ from transformers import AutoProcessor,LayoutLMv3ForTokenClassification
6
+ import numpy as np
7
+
8
+ model_Hugging_path = "Noureddinesa/Output_LayoutLMv3_v2"
9
+
10
+
11
+ #############################################################################
12
+ #############################################################################
13
+ def Labels():
14
+ labels = ['InvNum', 'InvDate', 'Fourni', 'TTC', 'TVA', 'TT', 'Autre']
15
+ id2label = {v: k for v, k in enumerate(labels)}
16
+ label2id = {k: v for v, k in enumerate(labels)}
17
+ return id2label, label2id
18
+
19
+ #############################################################################
20
+ #############################################################################
21
+ def Paddle():
22
+ ocr = PaddleOCR(use_angle_cls=False,lang='fr',rec=False)
23
+ return ocr
24
+
25
+ def processbbox(BBOX, width, height):
26
+ bbox = []
27
+ bbox.append(BBOX[0][0])
28
+ bbox.append(BBOX[0][1])
29
+ bbox.append(BBOX[2][0])
30
+ bbox.append(BBOX[2][1])
31
+ #Scaling
32
+ bbox[0]= 1000*bbox[0]/width # X1
33
+ bbox[1]= 1000*bbox[1]/height # Y1
34
+ bbox[2]= 1000*bbox[2]/width # X2
35
+ bbox[3]= 1000*bbox[3]/height # Y2
36
+ for i in range(4):
37
+ bbox[i] = int(bbox[i])
38
+ return bbox
39
+
40
+
41
+ def Preprocess(image):
42
+ image_array = np.array(image)
43
+ ocr = Paddle()
44
+ width, height = image.size
45
+ results = ocr.ocr(image_array, cls=True)
46
+ results = results[0]
47
+ test_dict = {'image': image ,'tokens':[], "bboxes":[]}
48
+ for item in results :
49
+ bbox = processbbox(item[0], width, height)
50
+ test_dict['tokens'].append(item[1][0])
51
+ test_dict['bboxes'].append(bbox)
52
+
53
+ print(test_dict['bboxes'])
54
+ print(test_dict['tokens'])
55
+ return test_dict
56
+
57
+ #############################################################################
58
+ #############################################################################
59
+ def Encode(image):
60
+ example = Preprocess(image)
61
+ image = example["image"]
62
+ words = example["tokens"]
63
+ boxes = example["bboxes"]
64
+ processor = AutoProcessor.from_pretrained(model_Hugging_path, apply_ocr=False)
65
+ encoding = processor(image, words, boxes=boxes,return_offsets_mapping=True,truncation=True, max_length=512, padding="max_length", return_tensors="pt")
66
+ offset_mapping = encoding.pop('offset_mapping')
67
+ return encoding, offset_mapping,words
68
+
69
+ def unnormalize_box(bbox, width, height):
70
+ return [
71
+ width * (bbox[0] / 1000),
72
+ height * (bbox[1] / 1000),
73
+ width * (bbox[2] / 1000),
74
+ height * (bbox[3] / 1000),
75
+ ]
76
+
77
+ def Run_model(image):
78
+ encoding,offset_mapping,words = Encode(image)
79
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
80
+ # load the fine-tuned model from the hub
81
+ model = LayoutLMv3ForTokenClassification.from_pretrained(model_Hugging_path)
82
+ model.to(device)
83
+ # forward pass
84
+ outputs = model(**encoding)
85
+
86
+ predictions = outputs.logits.argmax(-1).squeeze().tolist()
87
+ token_boxes = encoding.bbox.squeeze().tolist()
88
+
89
+ width, height = image.size
90
+
91
+ id2label, _ = Labels()
92
+ is_subword = np.array(offset_mapping.squeeze().tolist())[:,0] != 0
93
+ true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
94
+ true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]
95
+ return true_predictions,true_boxes,words
96
+
97
+
98
+ def Get_Json(true_predictions,words):
99
+ Results = {}
100
+ i = 0
101
+ for prd in true_predictions:
102
+ if prd in ['InvNum','Fourni', 'InvDate','TT','TTC','TVA']:
103
+ #print(i,prd,words[i-1])
104
+ Results[prd] = words[i-1]
105
+ i+=1
106
+ return Results
107
+
108
+
109
+ def Draw(image):
110
+ true_predictions, true_boxes,words = Run_model(image)
111
+ draw = ImageDraw.Draw(image)
112
+
113
+ label2color = {
114
+ 'InvNum': 'blue',
115
+ 'InvDate': 'green',
116
+ 'Fourni': 'orange',
117
+ 'TTC':'purple',
118
+ 'TVA': 'magenta',
119
+ 'TT': 'red',
120
+ 'Autre': 'black'
121
+ }
122
+
123
+ # Adjust the thickness of the rectangle outline and label text position
124
+ rectangle_thickness = 4
125
+ label_x_offset = 20
126
+ label_y_offset = -40
127
+
128
+ # Custom font size
129
+ custom_font_size = 25
130
+
131
+ # Load a font with the custom size
132
+ font_path = "arial.ttf" # Specify the path to your font file
133
+ custom_font = ImageFont.truetype(font_path, custom_font_size)
134
+
135
+ for prediction, box in zip(true_predictions, true_boxes):
136
+ predicted_label = prediction
137
+ # Check if the predicted label exists in the label2color dictionary
138
+ if predicted_label in label2color:
139
+ color = label2color[predicted_label]
140
+ else:
141
+ color = 'black' # Default color if label is not found
142
+ if predicted_label != "Autre":
143
+ draw.rectangle(box, outline=color, width=rectangle_thickness)
144
+ # Draw text using the custom font and size
145
+
146
+ draw.rectangle((box[0], box[1]+ label_y_offset,box[2],box[3]+ label_y_offset), fill=color)
147
+ draw.text((box[0] + label_x_offset, box[1] + label_y_offset), text=predicted_label, fill='white', font=custom_font)
148
+
149
+ # Get the Results Json File
150
+ Results = Get_Json(true_predictions,words)
151
+
152
+ return image,Results
153
+
154
+
155
+ def Add_Results(data):
156
+ # Render the table
157
+ for key, value in data.items():
158
+ data[key] = st.sidebar.text_input(key, value)
159
+
160
+ #############################################################################
161
+ #############################################################################