Spaces:
Running
Running
fixed bugs in create_csv
Browse files- Layoutlmv3_inference/__pycache__/annotate_image.cpython-311.pyc +0 -0
- Layoutlmv3_inference/__pycache__/inference_handler.cpython-311.pyc +0 -0
- Layoutlmv3_inference/annotate_image.py +1 -1
- app.py +46 -49
- experiment.ipynb +0 -1337
- inferenced/csv_files/Output_0.csv +0 -4
- inferenced/csv_files/Output_1.csv +0 -2
- inferenced/csv_files/Output_2.csv +0 -3
- inferenced/csv_files/Output_3.csv +0 -2
- inferenced/csv_files/Output_4.csv +0 -2
- inferenced/output.csv +0 -9
- inferenced/sample1_711_inference.jpg +0 -0
- inferenced/sample1_grace_inference.jpg +0 -0
- inferenced/sample_711_inference.jpg +0 -0
- inferenced/sample_coop_inference.jpg +0 -0
- inferenced/sample_grace_inference.jpg +0 -0
- log/error_output.log +18 -0
- static/inference/Layoutlmv3_inference/__init__.py +0 -0
- static/inference/Layoutlmv3_inference/__pycache__/__init__.cpython-310.pyc +0 -0
- static/inference/Layoutlmv3_inference/__pycache__/__init__.cpython-311.pyc +0 -0
- static/inference/Layoutlmv3_inference/__pycache__/__init__.cpython-312.pyc +0 -0
- static/inference/Layoutlmv3_inference/__pycache__/annotate_image.cpython-310.pyc +0 -0
- static/inference/Layoutlmv3_inference/__pycache__/annotate_image.cpython-311.pyc +0 -0
- static/inference/Layoutlmv3_inference/__pycache__/inference_handler.cpython-310.pyc +0 -0
- static/inference/Layoutlmv3_inference/__pycache__/inference_handler.cpython-311.pyc +0 -0
- static/inference/Layoutlmv3_inference/__pycache__/ocr.cpython-310.pyc +0 -0
- static/inference/Layoutlmv3_inference/__pycache__/ocr.cpython-311.pyc +0 -0
- static/inference/Layoutlmv3_inference/__pycache__/ocr.cpython-312.pyc +0 -0
- static/inference/Layoutlmv3_inference/__pycache__/utils.cpython-310.pyc +0 -0
- static/inference/Layoutlmv3_inference/__pycache__/utils.cpython-311.pyc +0 -0
- static/inference/Layoutlmv3_inference/annotate_image.py +0 -56
- static/inference/Layoutlmv3_inference/inference_handler.py +0 -199
- static/inference/Layoutlmv3_inference/ocr.py +0 -187
- static/inference/Layoutlmv3_inference/utils.py +0 -50
- static/inference/preprocess.py +0 -206
- static/inference/run_inference.py +0 -27
Layoutlmv3_inference/__pycache__/annotate_image.cpython-311.pyc
CHANGED
Binary files a/Layoutlmv3_inference/__pycache__/annotate_image.cpython-311.pyc and b/Layoutlmv3_inference/__pycache__/annotate_image.cpython-311.pyc differ
|
|
Layoutlmv3_inference/__pycache__/inference_handler.cpython-311.pyc
CHANGED
Binary files a/Layoutlmv3_inference/__pycache__/inference_handler.cpython-311.pyc and b/Layoutlmv3_inference/__pycache__/inference_handler.cpython-311.pyc differ
|
|
Layoutlmv3_inference/annotate_image.py
CHANGED
@@ -50,7 +50,7 @@ def annotate_image(image_path, annotation_object):
|
|
50 |
|
51 |
image_name = os.path.basename(image_path)
|
52 |
image_name = image_name[:image_name.find('.')]
|
53 |
-
output_folder = 'inferenced/'
|
54 |
os.makedirs(output_folder, exist_ok=True)
|
55 |
|
56 |
img.save(os.path.join(output_folder, f'{image_name}_inference.jpg'))
|
|
|
50 |
|
51 |
image_name = os.path.basename(image_path)
|
52 |
image_name = image_name[:image_name.find('.')]
|
53 |
+
output_folder = 'static/temp/inferenced/'
|
54 |
os.makedirs(output_folder, exist_ok=True)
|
55 |
|
56 |
img.save(os.path.join(output_folder, f'{image_name}_inference.jpg'))
|
app.py
CHANGED
@@ -60,7 +60,7 @@ def index():
|
|
60 |
|
61 |
# Source folders
|
62 |
temp_folder = r'static/temp'
|
63 |
-
inferenced_folder = r'inferenced'
|
64 |
|
65 |
# Destination folder path
|
66 |
destination_folder = os.path.join('output_folders', dt_string) # Create a new folder with timestamp
|
@@ -104,8 +104,8 @@ def make_predictions(image_paths):
|
|
104 |
temp = None
|
105 |
try:
|
106 |
# For Windows OS
|
107 |
-
|
108 |
-
|
109 |
|
110 |
model_path = Path(r'model/export')
|
111 |
learner = load_learner(model_path)
|
@@ -129,8 +129,8 @@ def make_predictions(image_paths):
|
|
129 |
except Exception as e:
|
130 |
return {"error in make_predictions": str(e)}
|
131 |
|
132 |
-
|
133 |
-
|
134 |
|
135 |
import copy
|
136 |
@app.route('/predict/<filenames>', methods=['GET', 'POST'])
|
@@ -181,7 +181,7 @@ def predict_files(filenames):
|
|
181 |
@app.route('/get_inference_image')
|
182 |
def get_inference_image():
|
183 |
# Assuming the new image is stored in the 'inferenced' folder with the name 'temp_inference.jpg'
|
184 |
-
inferenced_image = 'inferenced/temp_inference.jpg'
|
185 |
return jsonify(updatedImagePath=inferenced_image), 200 # Return the image path with a 200 status code
|
186 |
|
187 |
|
@@ -231,7 +231,6 @@ def replace_symbols_with_period(value):
|
|
231 |
return value.replace(',', '.')
|
232 |
|
233 |
|
234 |
-
from itertools import zip_longest
|
235 |
|
236 |
@app.route('/create_csv', methods=['GET'])
|
237 |
def create_csv():
|
@@ -240,11 +239,14 @@ def create_csv():
|
|
240 |
json_folder_path = r"static/temp/labeled" # Change this to your folder path
|
241 |
|
242 |
# Path to the output CSV folder
|
243 |
-
output_folder_path = r"inferenced/csv_files"
|
244 |
os.makedirs(output_folder_path, exist_ok=True)
|
245 |
|
246 |
-
|
247 |
-
|
|
|
|
|
|
|
248 |
|
249 |
# Iterate through JSON files in the folder
|
250 |
for filename in os.listdir(json_folder_path):
|
@@ -253,57 +255,39 @@ def create_csv():
|
|
253 |
|
254 |
with open(json_file_path, 'r') as file:
|
255 |
data = json.load(file)
|
256 |
-
all_data.
|
257 |
|
258 |
-
#
|
259 |
label_texts = {}
|
260 |
-
for item in
|
261 |
label = item['label']
|
262 |
text = item['text']
|
263 |
-
|
264 |
-
# Ensure label exists before adding to dictionary
|
265 |
-
if label not in label_texts:
|
266 |
-
label_texts[label] = []
|
267 |
-
label_texts[label].append(text)
|
268 |
-
|
269 |
-
# Order of columns as requested
|
270 |
-
column_order = [
|
271 |
-
'RECEIPTNUMBER', 'MERCHANTNAME', 'MERCHANTADDRESS',
|
272 |
-
'TRANSACTIONDATE', 'TRANSACTIONTIME', 'ITEMS',
|
273 |
-
'PRICE', 'TOTAL', 'VATTAX'
|
274 |
-
]
|
275 |
|
276 |
# Writing data to CSV file with ordered columns
|
277 |
csv_file_path = os.path.join(output_folder_path, os.path.splitext(filename)[0] + '.csv')
|
278 |
with open(csv_file_path, 'w', newline='') as csvfile:
|
279 |
csv_writer = csv.DictWriter(csvfile, fieldnames=column_order, delimiter=",")
|
280 |
-
|
|
|
281 |
|
282 |
-
#
|
283 |
-
|
|
|
|
|
284 |
for i in range(max_length):
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
# Check if items and prices are separated by space
|
290 |
-
if
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
# Create new rows for each combination of items and prices
|
295 |
-
for item, price in zip(item_list, price_list):
|
296 |
-
row_data = {label: replace_symbols_with_period(label_texts[label][i]) if label == 'ITEMS' else replace_symbols_with_period(label_texts[label][i]) for label in column_order}
|
297 |
-
row_data['ITEMS'] = item
|
298 |
-
row_data['PRICE'] = price
|
299 |
-
csv_writer.writerow(row_data)
|
300 |
-
else:
|
301 |
-
# Use get() with default '' to avoid KeyError
|
302 |
-
row_data = {label: replace_symbols_with_period(label_texts.get(label, [])[i]) if i < len(label_texts.get(label, [])) else '' for label in column_order}
|
303 |
-
csv_writer.writerow(row_data)
|
304 |
|
305 |
# Combining contents of CSV files into a single CSV file
|
306 |
-
output_file_path = r"inferenced/output.csv"
|
307 |
with open(output_file_path, 'w', newline='') as combined_csvfile:
|
308 |
combined_csv_writer = csv.DictWriter(combined_csvfile, fieldnames=column_order, delimiter=",")
|
309 |
combined_csv_writer.writeheader()
|
@@ -324,17 +308,30 @@ def create_csv():
|
|
324 |
except Exception as e:
|
325 |
print(f"An error occurred in create_csv: {str(e)}")
|
326 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
327 |
|
328 |
@app.route('/get_data')
|
329 |
def get_data():
|
330 |
-
return send_from_directory('inferenced','output.csv', as_attachment=False)
|
331 |
|
332 |
from flask import jsonify
|
333 |
|
334 |
@app.route('/download_csv', methods=['GET'])
|
335 |
def download_csv():
|
336 |
try:
|
337 |
-
output_file_path = r"inferenced/output.csv" # path to output CSV file
|
338 |
# Check if the file exists
|
339 |
if os.path.exists(output_file_path):
|
340 |
return send_file(output_file_path, as_attachment=True, download_name='output.csv')
|
|
|
60 |
|
61 |
# Source folders
|
62 |
temp_folder = r'static/temp'
|
63 |
+
inferenced_folder = r'static/temp/inferenced'
|
64 |
|
65 |
# Destination folder path
|
66 |
destination_folder = os.path.join('output_folders', dt_string) # Create a new folder with timestamp
|
|
|
104 |
temp = None
|
105 |
try:
|
106 |
# For Windows OS
|
107 |
+
temp = pathlib.PosixPath # Save the original state
|
108 |
+
pathlib.PosixPath = pathlib.WindowsPath # Change to WindowsPath temporarily
|
109 |
|
110 |
model_path = Path(r'model/export')
|
111 |
learner = load_learner(model_path)
|
|
|
129 |
except Exception as e:
|
130 |
return {"error in make_predictions": str(e)}
|
131 |
|
132 |
+
finally:
|
133 |
+
pathlib.PosixPath = temp
|
134 |
|
135 |
import copy
|
136 |
@app.route('/predict/<filenames>', methods=['GET', 'POST'])
|
|
|
181 |
@app.route('/get_inference_image')
|
182 |
def get_inference_image():
|
183 |
# Assuming the new image is stored in the 'inferenced' folder with the name 'temp_inference.jpg'
|
184 |
+
inferenced_image = 'static/temp/inferenced/temp_inference.jpg'
|
185 |
return jsonify(updatedImagePath=inferenced_image), 200 # Return the image path with a 200 status code
|
186 |
|
187 |
|
|
|
231 |
return value.replace(',', '.')
|
232 |
|
233 |
|
|
|
234 |
|
235 |
@app.route('/create_csv', methods=['GET'])
|
236 |
def create_csv():
|
|
|
239 |
json_folder_path = r"static/temp/labeled" # Change this to your folder path
|
240 |
|
241 |
# Path to the output CSV folder
|
242 |
+
output_folder_path = r"static/temp/inferenced/csv_files"
|
243 |
os.makedirs(output_folder_path, exist_ok=True)
|
244 |
|
245 |
+
column_order = [
|
246 |
+
'RECEIPTNUMBER', 'MERCHANTNAME', 'MERCHANTADDRESS',
|
247 |
+
'TRANSACTIONDATE', 'TRANSACTIONTIME', 'ITEMS',
|
248 |
+
'PRICE', 'TOTAL', 'VATTAX'
|
249 |
+
]
|
250 |
|
251 |
# Iterate through JSON files in the folder
|
252 |
for filename in os.listdir(json_folder_path):
|
|
|
255 |
|
256 |
with open(json_file_path, 'r') as file:
|
257 |
data = json.load(file)
|
258 |
+
all_data = data.get('output', [])
|
259 |
|
260 |
+
# Initialize a dictionary to store labels and corresponding texts for this JSON file
|
261 |
label_texts = {}
|
262 |
+
for item in all_data:
|
263 |
label = item['label']
|
264 |
text = item['text']
|
265 |
+
label_texts[label] = text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
|
267 |
# Writing data to CSV file with ordered columns
|
268 |
csv_file_path = os.path.join(output_folder_path, os.path.splitext(filename)[0] + '.csv')
|
269 |
with open(csv_file_path, 'w', newline='') as csvfile:
|
270 |
csv_writer = csv.DictWriter(csvfile, fieldnames=column_order, delimiter=",")
|
271 |
+
if os.path.getsize(csv_file_path) == 0:
|
272 |
+
csv_writer.writeheader()
|
273 |
|
274 |
+
# Constructing rows for the CSV file
|
275 |
+
items = label_texts.get('ITEMS', '').split()
|
276 |
+
prices = label_texts.get('PRICE', '').split()
|
277 |
+
max_length = max(len(items), len(prices))
|
278 |
for i in range(max_length):
|
279 |
+
row_data = {}
|
280 |
+
for label in column_order:
|
281 |
+
# Use get() with default '' to handle missing labels gracefully
|
282 |
+
row_data[label] = label_texts.get(label, '')
|
283 |
# Check if items and prices are separated by space
|
284 |
+
if i < len(items) and i < len(prices):
|
285 |
+
row_data['ITEMS'] = items[i]
|
286 |
+
row_data['PRICE'] = prices[i]
|
287 |
+
csv_writer.writerow(row_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
|
289 |
# Combining contents of CSV files into a single CSV file
|
290 |
+
output_file_path = r"static/temp/inferenced/output.csv"
|
291 |
with open(output_file_path, 'w', newline='') as combined_csvfile:
|
292 |
combined_csv_writer = csv.DictWriter(combined_csvfile, fieldnames=column_order, delimiter=",")
|
293 |
combined_csv_writer.writeheader()
|
|
|
308 |
except Exception as e:
|
309 |
print(f"An error occurred in create_csv: {str(e)}")
|
310 |
return None
|
311 |
+
|
312 |
+
except FileNotFoundError as e:
|
313 |
+
print(f"File not found error: {str(e)}")
|
314 |
+
return jsonify({'error': 'File not found.'}), 404
|
315 |
+
except json.JSONDecodeError as e:
|
316 |
+
print(f"JSON decoding error: {str(e)}")
|
317 |
+
return jsonify({'error': 'JSON decoding error.'}), 500
|
318 |
+
except csv.Error as e:
|
319 |
+
print(f"CSV error: {str(e)}")
|
320 |
+
return jsonify({'error': 'CSV error.'}), 500
|
321 |
+
except Exception as e:
|
322 |
+
print(f"An unexpected error occurred: {str(e)}")
|
323 |
+
return jsonify({'error': 'An unexpected error occurred.'}), 500
|
324 |
|
325 |
@app.route('/get_data')
|
326 |
def get_data():
|
327 |
+
return send_from_directory('static/temp/inferenced','output.csv', as_attachment=False)
|
328 |
|
329 |
from flask import jsonify
|
330 |
|
331 |
@app.route('/download_csv', methods=['GET'])
|
332 |
def download_csv():
|
333 |
try:
|
334 |
+
output_file_path = r"static/temp/inferenced/output.csv" # path to output CSV file
|
335 |
# Check if the file exists
|
336 |
if os.path.exists(output_file_path):
|
337 |
return send_file(output_file_path, as_attachment=True, download_name='output.csv')
|
experiment.ipynb
DELETED
@@ -1,1337 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "code",
|
5 |
-
"execution_count": 17,
|
6 |
-
"metadata": {},
|
7 |
-
"outputs": [],
|
8 |
-
"source": [
|
9 |
-
"# defining inference parameters\n",
|
10 |
-
"model_path = r\"C:\\Users\\Ayoo\\Desktop\\webapp\\model\" # path to Layoutlmv3 model\n",
|
11 |
-
"imag_path = r\"C:\\Users\\Ayoo\\Desktop\\webapp\\predictions\\imgs\" # images folder"
|
12 |
-
]
|
13 |
-
},
|
14 |
-
{
|
15 |
-
"cell_type": "code",
|
16 |
-
"execution_count": 33,
|
17 |
-
"metadata": {},
|
18 |
-
"outputs": [
|
19 |
-
{
|
20 |
-
"name": "stdout",
|
21 |
-
"output_type": "stream",
|
22 |
-
"text": [
|
23 |
-
"^C\n"
|
24 |
-
]
|
25 |
-
},
|
26 |
-
{
|
27 |
-
"name": "stderr",
|
28 |
-
"output_type": "stream",
|
29 |
-
"text": [
|
30 |
-
"2023-12-16 02:35:50.587274: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
|
31 |
-
"WARNING:tensorflow:From C:\\Users\\Ayoo\\AppData\\Roaming\\Python\\Python311\\site-packages\\keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
|
32 |
-
"\n",
|
33 |
-
"c:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\site-packages\\transformers\\modeling_utils.py:881: FutureWarning: The `device` argument is deprecated and will be removed in v5 of Transformers.\n",
|
34 |
-
" warnings.warn(\n"
|
35 |
-
]
|
36 |
-
},
|
37 |
-
{
|
38 |
-
"name": "stdout",
|
39 |
-
"output_type": "stream",
|
40 |
-
"text": [
|
41 |
-
"Preparing for Inference\n",
|
42 |
-
"Starting\n",
|
43 |
-
"Preprocessing\n",
|
44 |
-
"Preprocessing done. Running OCR\n",
|
45 |
-
"JSON file saved\n",
|
46 |
-
"OCR done\n",
|
47 |
-
"Run Done\n",
|
48 |
-
"Cleaned Tesseract output done\n",
|
49 |
-
"Word list done\n",
|
50 |
-
"Box list done\n",
|
51 |
-
"Prepared for Inference Batch\n",
|
52 |
-
"Running Flattened Output\n",
|
53 |
-
"Ready for Annotation\n",
|
54 |
-
"Annotating Images\n"
|
55 |
-
]
|
56 |
-
}
|
57 |
-
],
|
58 |
-
"source": [
|
59 |
-
"! python predictions\\inference\\run_inference.py --model_path {model_path} --images_path {imag_path}"
|
60 |
-
]
|
61 |
-
},
|
62 |
-
{
|
63 |
-
"cell_type": "code",
|
64 |
-
"execution_count": 20,
|
65 |
-
"metadata": {},
|
66 |
-
"outputs": [
|
67 |
-
{
|
68 |
-
"name": "stdout",
|
69 |
-
"output_type": "stream",
|
70 |
-
"text": [
|
71 |
-
"Looking for C:\\Users\\Ayoo\\.keras-ocr\\craft_mlt_25k.h5\n"
|
72 |
-
]
|
73 |
-
},
|
74 |
-
{
|
75 |
-
"ename": "KeyboardInterrupt",
|
76 |
-
"evalue": "",
|
77 |
-
"output_type": "error",
|
78 |
-
"traceback": [
|
79 |
-
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
80 |
-
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
81 |
-
"Cell \u001b[1;32mIn[20], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mkeras_ocr\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m pipeline\u001b[38;5;241m=\u001b[39m\u001b[43mkeras_ocr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpipeline\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mPipeline\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
|
82 |
-
"File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\site-packages\\keras_ocr\\pipeline.py:20\u001b[0m, in \u001b[0;36mPipeline.__init__\u001b[1;34m(self, detector, recognizer, scale, max_size)\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, detector\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, recognizer\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, scale\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2\u001b[39m, max_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2048\u001b[39m):\n\u001b[0;32m 19\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m detector \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m---> 20\u001b[0m detector \u001b[38;5;241m=\u001b[39m \u001b[43mdetection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDetector\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 21\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m recognizer \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 22\u001b[0m recognizer \u001b[38;5;241m=\u001b[39m recognition\u001b[38;5;241m.\u001b[39mRecognizer()\n",
|
83 |
-
"File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\site-packages\\keras_ocr\\detection.py:686\u001b[0m, in \u001b[0;36mDetector.__init__\u001b[1;34m(self, weights, load_from_torch, optimizer, backbone_name)\u001b[0m\n\u001b[0;32m 682\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m (\n\u001b[0;32m 683\u001b[0m pretrained_key \u001b[38;5;129;01min\u001b[39;00m PRETRAINED_WEIGHTS\n\u001b[0;32m 684\u001b[0m ), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSelected weights configuration not found.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 685\u001b[0m weights_config \u001b[38;5;241m=\u001b[39m PRETRAINED_WEIGHTS[pretrained_key]\n\u001b[1;32m--> 686\u001b[0m weights_path \u001b[38;5;241m=\u001b[39m \u001b[43mtools\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdownload_and_verify\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 687\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweights_config\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43murl\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 688\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweights_config\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfilename\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 689\u001b[0m \u001b[43m \u001b[49m\u001b[43msha256\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweights_config\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msha256\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 690\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 691\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 692\u001b[0m weights_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
|
84 |
-
"File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\site-packages\\keras_ocr\\tools.py:527\u001b[0m, in \u001b[0;36mdownload_and_verify\u001b[1;34m(url, sha256, cache_dir, verbose, filename)\u001b[0m\n\u001b[0;32m 525\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDownloading \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m filepath)\n\u001b[0;32m 526\u001b[0m urllib\u001b[38;5;241m.\u001b[39mrequest\u001b[38;5;241m.\u001b[39murlretrieve(url, filepath)\n\u001b[1;32m--> 527\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m sha256 \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m sha256 \u001b[38;5;241m==\u001b[39m \u001b[43msha256sum\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 528\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilepath\u001b[49m\n\u001b[0;32m 529\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError occurred verifying sha256.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 530\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m filepath\n",
|
85 |
-
"File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\site-packages\\keras_ocr\\tools.py:491\u001b[0m, in \u001b[0;36msha256sum\u001b[1;34m(filename)\u001b[0m\n\u001b[0;32m 489\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(filename, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrb\u001b[39m\u001b[38;5;124m\"\u001b[39m, buffering\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[0;32m 490\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m n \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28miter\u001b[39m(\u001b[38;5;28;01mlambda\u001b[39;00m: f\u001b[38;5;241m.\u001b[39mreadinto(mv), \u001b[38;5;241m0\u001b[39m): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m--> 491\u001b[0m h\u001b[38;5;241m.\u001b[39mupdate(mv[:n])\n\u001b[0;32m 492\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m h\u001b[38;5;241m.\u001b[39mhexdigest()\n",
|
86 |
-
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
|
87 |
-
]
|
88 |
-
}
|
89 |
-
],
|
90 |
-
"source": [
|
91 |
-
"import keras_ocr\n",
|
92 |
-
"pipeline=keras_ocr.pipeline.Pipeline()"
|
93 |
-
]
|
94 |
-
},
|
95 |
-
{
|
96 |
-
"cell_type": "code",
|
97 |
-
"execution_count": 4,
|
98 |
-
"metadata": {},
|
99 |
-
"outputs": [
|
100 |
-
{
|
101 |
-
"name": "stdout",
|
102 |
-
"output_type": "stream",
|
103 |
-
"text": [
|
104 |
-
"1/1 [==============================] - 34s 34s/step\n",
|
105 |
-
"7/7 [==============================] - 94s 13s/step\n"
|
106 |
-
]
|
107 |
-
},
|
108 |
-
{
|
109 |
-
"data": {
|
110 |
-
"text/plain": [
|
111 |
-
"[[('feleven',\n",
|
112 |
-
" array([[212.58102 , 34.90136 ],\n",
|
113 |
-
" [577.45886 , 34.901367],\n",
|
114 |
-
" [577.45886 , 114.22263 ],\n",
|
115 |
-
" [212.58102 , 114.22263 ]], dtype=float32)),\n",
|
116 |
-
" ('es',\n",
|
117 |
-
" array([[574.28613, 82.49414],\n",
|
118 |
-
" [593.32324, 82.49414],\n",
|
119 |
-
" [593.32324, 107.87695],\n",
|
120 |
-
" [574.28613, 107.87695]], dtype=float32)),\n",
|
121 |
-
" ('store',\n",
|
122 |
-
" array([[453.71777, 203.0625 ],\n",
|
123 |
-
" [567.9404 , 203.0625 ],\n",
|
124 |
-
" [567.9404 , 253.82812],\n",
|
125 |
-
" [453.71777, 253.82812]], dtype=float32)),\n",
|
126 |
-
" ('nahj',\n",
|
127 |
-
" array([[120.56836, 209.4082 ],\n",
|
128 |
-
" [187.19824, 209.4082 ],\n",
|
129 |
-
" [187.19824, 253.82812],\n",
|
130 |
-
" [120.56836, 253.82812]], dtype=float32)),\n",
|
131 |
-
" ('conveni',\n",
|
132 |
-
" array([[203.0625 , 209.4082 ],\n",
|
133 |
-
" [352.18652, 209.4082 ],\n",
|
134 |
-
" [352.18652, 253.82812],\n",
|
135 |
-
" [203.0625 , 253.82812]], dtype=float32)),\n",
|
136 |
-
" ('enco',\n",
|
137 |
-
" array([[352.18652, 209.4082 ],\n",
|
138 |
-
" [441.02637, 209.4082 ],\n",
|
139 |
-
" [441.02637, 253.82812],\n",
|
140 |
-
" [352.18652, 253.82812]], dtype=float32)),\n",
|
141 |
-
" ('qwned',\n",
|
142 |
-
" array([[ 34.901367, 260.17383 ],\n",
|
143 |
-
" [149.12402 , 260.17383 ],\n",
|
144 |
-
" [149.12402 , 304.59375 ],\n",
|
145 |
-
" [ 34.901367, 304.59375 ]], dtype=float32)),\n",
|
146 |
-
" ('operated',\n",
|
147 |
-
" array([[203.0625 , 260.17383],\n",
|
148 |
-
" [377.56934, 260.17383],\n",
|
149 |
-
" [377.56934, 307.7666 ],\n",
|
150 |
-
" [203.0625 , 307.7666 ]], dtype=float32)),\n",
|
151 |
-
" ('nancy',\n",
|
152 |
-
" array([[475.92773, 260.17383],\n",
|
153 |
-
" [586.97754, 260.17383],\n",
|
154 |
-
" [586.97754, 304.59375],\n",
|
155 |
-
" [475.92773, 304.59375]], dtype=float32)),\n",
|
156 |
-
" ('byl',\n",
|
157 |
-
" array([[393.4336 , 263.34668],\n",
|
158 |
-
" [456.89062, 263.34668],\n",
|
159 |
-
" [456.89062, 307.7666 ],\n",
|
160 |
-
" [393.4336 , 307.7666 ]], dtype=float32)),\n",
|
161 |
-
" ('a',\n",
|
162 |
-
" array([[602.8418 , 263.34668],\n",
|
163 |
-
" [634.5703 , 263.34668],\n",
|
164 |
-
" [634.5703 , 301.4209 ],\n",
|
165 |
-
" [602.8418 , 301.4209 ]], dtype=float32)),\n",
|
166 |
-
" ('cl',\n",
|
167 |
-
" array([[244.30957, 314.1123 ],\n",
|
168 |
-
" [288.7295 , 314.1123 ],\n",
|
169 |
-
" [288.7295 , 355.35938],\n",
|
170 |
-
" [244.30957, 355.35938]], dtype=float32)),\n",
|
171 |
-
" ('inacosa',\n",
|
172 |
-
" array([[291.90234, 314.1123 ],\n",
|
173 |
-
" [437.85352, 314.1123 ],\n",
|
174 |
-
" [437.85352, 355.35938],\n",
|
175 |
-
" [291.90234, 355.35938]], dtype=float32)),\n",
|
176 |
-
" ('tregtin',\n",
|
177 |
-
" array([[123.74121, 358.53223],\n",
|
178 |
-
" [276.0381 , 358.53223],\n",
|
179 |
-
" [276.0381 , 406.125 ],\n",
|
180 |
-
" [123.74121, 406.125 ]], dtype=float32)),\n",
|
181 |
-
" ('va',\n",
|
182 |
-
" array([[ 76.14844, 361.70508],\n",
|
183 |
-
" [123.74121, 361.70508],\n",
|
184 |
-
" [123.74121, 406.125 ],\n",
|
185 |
-
" [ 76.14844, 406.125 ]], dtype=float32)),\n",
|
186 |
-
" ('hssysigm',\n",
|
187 |
-
" array([[285.55664, 361.70508],\n",
|
188 |
-
" [485.4463 , 361.70508],\n",
|
189 |
-
" [485.4463 , 406.125 ],\n",
|
190 |
-
" [285.55664, 406.125 ]], dtype=float32)),\n",
|
191 |
-
" ('gbsr0o2',\n",
|
192 |
-
" array([[475.92773, 361.70508],\n",
|
193 |
-
" [631.39746, 361.70508],\n",
|
194 |
-
" [631.39746, 406.125 ],\n",
|
195 |
-
" [475.92773, 406.125 ]], dtype=float32)),\n",
|
196 |
-
" ('pobli',\n",
|
197 |
-
" array([[ 98.3584 , 412.4707 ],\n",
|
198 |
-
" [187.19824, 412.4707 ],\n",
|
199 |
-
" [187.19824, 460.06348],\n",
|
200 |
-
" [ 98.3584 , 460.06348]], dtype=float32)),\n",
|
201 |
-
" ('acii',\n",
|
202 |
-
" array([[180.85254, 415.64355],\n",
|
203 |
-
" [250.65527, 415.64355],\n",
|
204 |
-
" [250.65527, 460.06348],\n",
|
205 |
-
" [180.85254, 460.06348]], dtype=float32)),\n",
|
206 |
-
" ('leons',\n",
|
207 |
-
" array([[326.8037 , 415.64355],\n",
|
208 |
-
" [434.68066, 415.64355],\n",
|
209 |
-
" [434.68066, 460.06348],\n",
|
210 |
-
" [326.8037 , 460.06348]], dtype=float32)),\n",
|
211 |
-
" ('ilulos',\n",
|
212 |
-
" array([[456.89062, 415.64355],\n",
|
213 |
-
" [602.8418 , 415.64355],\n",
|
214 |
-
" [602.8418 , 460.06348],\n",
|
215 |
-
" [456.89062, 460.06348]], dtype=float32)),\n",
|
216 |
-
" ('ors',\n",
|
217 |
-
" array([[241.13672, 418.8164 ],\n",
|
218 |
-
" [304.59375, 418.8164 ],\n",
|
219 |
-
" [304.59375, 456.89062],\n",
|
220 |
-
" [241.13672, 456.89062]], dtype=float32)),\n",
|
221 |
-
" ('fit',\n",
|
222 |
-
" array([[225.27246, 466.40918],\n",
|
223 |
-
" [291.90234, 466.40918],\n",
|
224 |
-
" [291.90234, 510.8291 ],\n",
|
225 |
-
" [225.27246, 510.8291 ]], dtype=float32)),\n",
|
226 |
-
" ('ipp',\n",
|
227 |
-
" array([[314.1123 , 469.58203],\n",
|
228 |
-
" [380.7422 , 469.58203],\n",
|
229 |
-
" [380.7422 , 514.00195],\n",
|
230 |
-
" [314.1123 , 514.00195]], dtype=float32)),\n",
|
231 |
-
" ('ines',\n",
|
232 |
-
" array([[374.39648, 469.58203],\n",
|
233 |
-
" [463.23633, 469.58203],\n",
|
234 |
-
" [463.23633, 510.8291 ],\n",
|
235 |
-
" [374.39648, 510.8291 ]], dtype=float32)),\n",
|
236 |
-
" ('tel',\n",
|
237 |
-
" array([[225.27246, 517.1748 ],\n",
|
238 |
-
" [288.7295 , 517.1748 ],\n",
|
239 |
-
" [288.7295 , 561.5947 ],\n",
|
240 |
-
" [225.27246, 561.5947 ]], dtype=float32)),\n",
|
241 |
-
" ('null',\n",
|
242 |
-
" array([[371.22363, 517.1748 ],\n",
|
243 |
-
" [466.40918, 517.1748 ],\n",
|
244 |
-
" [466.40918, 561.5947 ],\n",
|
245 |
-
" [371.22363, 561.5947 ]], dtype=float32)),\n",
|
246 |
-
" ('h',\n",
|
247 |
-
" array([[307.7666 , 520.34766],\n",
|
248 |
-
" [339.49512, 520.34766],\n",
|
249 |
-
" [339.49512, 558.4219 ],\n",
|
250 |
-
" [307.7666 , 558.4219 ]], dtype=float32)),\n",
|
251 |
-
" ('osd1',\n",
|
252 |
-
" array([[ 98.3584 , 618.70605],\n",
|
253 |
-
" [206.23535, 618.70605],\n",
|
254 |
-
" [206.23535, 663.126 ],\n",
|
255 |
-
" [ 98.3584 , 663.126 ]], dtype=float32)),\n",
|
256 |
-
" ('fzozx',\n",
|
257 |
-
" array([[203.0625 , 618.70605],\n",
|
258 |
-
" [314.1123 , 618.70605],\n",
|
259 |
-
" [314.1123 , 663.126 ],\n",
|
260 |
-
" [203.0625 , 663.126 ]], dtype=float32)),\n",
|
261 |
-
" ('leoost',\n",
|
262 |
-
" array([[434.68066, 618.70605],\n",
|
263 |
-
" [609.1875 , 618.70605],\n",
|
264 |
-
" [609.1875 , 663.126 ],\n",
|
265 |
-
" [434.68066, 663.126 ]], dtype=float32)),\n",
|
266 |
-
" ('smony',\n",
|
267 |
-
" array([[314.1123 , 621.8789 ],\n",
|
268 |
-
" [415.64355, 621.8789 ],\n",
|
269 |
-
" [415.64355, 663.126 ],\n",
|
270 |
-
" [314.1123 , 663.126 ]], dtype=float32)),\n",
|
271 |
-
" ('rcpt',\n",
|
272 |
-
" array([[ 12.691406, 723.41016 ],\n",
|
273 |
-
" [101.53125 , 723.41016 ],\n",
|
274 |
-
" [101.53125 , 767.8301 ],\n",
|
275 |
-
" [ 12.691406, 767.8301 ]], dtype=float32)),\n",
|
276 |
-
" ('h2a81',\n",
|
277 |
-
" array([[117.39551, 723.41016],\n",
|
278 |
-
" [228.44531, 723.41016],\n",
|
279 |
-
" [228.44531, 767.8301 ],\n",
|
280 |
-
" [117.39551, 767.8301 ]], dtype=float32)),\n",
|
281 |
-
" ('3a7',\n",
|
282 |
-
" array([[218.92676, 723.41016],\n",
|
283 |
-
" [291.90234, 723.41016],\n",
|
284 |
-
" [291.90234, 767.8301 ],\n",
|
285 |
-
" [218.92676, 767.8301 ]], dtype=float32)),\n",
|
286 |
-
" ('rcft',\n",
|
287 |
-
" array([[475.92773, 723.41016],\n",
|
288 |
-
" [567.9404 , 723.41016],\n",
|
289 |
-
" [567.9404 , 767.8301 ],\n",
|
290 |
-
" [475.92773, 767.8301 ]], dtype=float32)),\n",
|
291 |
-
" ('cnt',\n",
|
292 |
-
" array([[580.63184, 723.41016],\n",
|
293 |
-
" [647.2617 , 723.41016],\n",
|
294 |
-
" [647.2617 , 764.6572 ],\n",
|
295 |
-
" [580.63184, 764.6572 ]], dtype=float32)),\n",
|
296 |
-
" ('ho',\n",
|
297 |
-
" array([[637.74316, 723.41016],\n",
|
298 |
-
" [694.8545 , 723.41016],\n",
|
299 |
-
" [694.8545 , 767.8301 ],\n",
|
300 |
-
" [637.74316, 767.8301 ]], dtype=float32)),\n",
|
301 |
-
" ('storehsise',\n",
|
302 |
-
" array([[ 12.691406, 774.1758 ],\n",
|
303 |
-
" [231.61816 , 774.1758 ],\n",
|
304 |
-
" [231.61816 , 818.5957 ],\n",
|
305 |
-
" [ 12.691406, 818.5957 ]], dtype=float32)),\n",
|
306 |
-
" ('snit',\n",
|
307 |
-
" array([[434.68066, 774.1758 ],\n",
|
308 |
-
" [504.4834 , 774.1758 ],\n",
|
309 |
-
" [504.4834 , 818.5957 ],\n",
|
310 |
-
" [434.68066, 818.5957 ]], dtype=float32)),\n",
|
311 |
-
" ('xtiakt',\n",
|
312 |
-
" array([[520.34766, 774.1758 ],\n",
|
313 |
-
" [650.4346 , 774.1758 ],\n",
|
314 |
-
" [650.4346 , 818.5957 ],\n",
|
315 |
-
" [520.34766, 818.5957 ]], dtype=float32)),\n",
|
316 |
-
" ('70',\n",
|
317 |
-
" array([[647.2617, 774.1758],\n",
|
318 |
-
" [694.8545, 774.1758],\n",
|
319 |
-
" [694.8545, 818.5957],\n",
|
320 |
-
" [647.2617, 818.5957]], dtype=float32)),\n",
|
321 |
-
" ('091',\n",
|
322 |
-
" array([[326.8037 , 821.76855],\n",
|
323 |
-
" [396.60645, 821.76855],\n",
|
324 |
-
" [396.60645, 869.3613 ],\n",
|
325 |
-
" [326.8037 , 869.3613 ]], dtype=float32)),\n",
|
326 |
-
" ('min',\n",
|
327 |
-
" array([[ 15.864258, 824.9414 ],\n",
|
328 |
-
" [ 85.66699 , 824.9414 ],\n",
|
329 |
-
" [ 85.66699 , 869.3613 ],\n",
|
330 |
-
" [ 15.864258, 869.3613 ]], dtype=float32)),\n",
|
331 |
-
" ('1811201',\n",
|
332 |
-
" array([[161.81543, 824.9414 ],\n",
|
333 |
-
" [310.93945, 824.9414 ],\n",
|
334 |
-
" [310.93945, 869.3613 ],\n",
|
335 |
-
" [161.81543, 869.3613 ]], dtype=float32)),\n",
|
336 |
-
" ('105s1',\n",
|
337 |
-
" array([[437.85352, 824.9414 ],\n",
|
338 |
-
" [520.34766, 824.9414 ],\n",
|
339 |
-
" [520.34766, 869.3613 ],\n",
|
340 |
-
" [437.85352, 869.3613 ]], dtype=float32)),\n",
|
341 |
-
" ('ha',\n",
|
342 |
-
" array([[ 98.3584 , 828.11426],\n",
|
343 |
-
" [139.60547, 828.11426],\n",
|
344 |
-
" [139.60547, 869.3613 ],\n",
|
345 |
-
" [ 98.3584 , 869.3613 ]], dtype=float32)),\n",
|
346 |
-
" ('41',\n",
|
347 |
-
" array([[393.4336 , 828.11426],\n",
|
348 |
-
" [441.02637, 828.11426],\n",
|
349 |
-
" [441.02637, 869.3613 ],\n",
|
350 |
-
" [393.4336 , 869.3613 ]], dtype=float32)),\n",
|
351 |
-
" ('staff',\n",
|
352 |
-
" array([[ 12.691406, 878.8799 ],\n",
|
353 |
-
" [126.91406 , 878.8799 ],\n",
|
354 |
-
" [126.91406 , 923.2998 ],\n",
|
355 |
-
" [ 12.691406, 923.2998 ]], dtype=float32)),\n",
|
356 |
-
" ('angel',\n",
|
357 |
-
" array([[142.77832, 878.8799 ],\n",
|
358 |
-
" [247.48242, 878.8799 ],\n",
|
359 |
-
" [247.48242, 923.2998 ],\n",
|
360 |
-
" [142.77832, 923.2998 ]], dtype=float32)),\n",
|
361 |
-
" ('duantle',\n",
|
362 |
-
" array([[329.97656, 878.8799 ],\n",
|
363 |
-
" [463.23633, 878.8799 ],\n",
|
364 |
-
" [463.23633, 923.2998 ],\n",
|
365 |
-
" [329.97656, 923.2998 ]], dtype=float32)),\n",
|
366 |
-
" ('i',\n",
|
367 |
-
" array([[250.65527, 885.2256 ],\n",
|
368 |
-
" [266.51953, 885.2256 ],\n",
|
369 |
-
" [266.51953, 916.9541 ],\n",
|
370 |
-
" [250.65527, 916.9541 ]], dtype=float32)),\n",
|
371 |
-
" ('ca',\n",
|
372 |
-
" array([[263.34668, 885.2256 ],\n",
|
373 |
-
" [314.1123 , 885.2256 ],\n",
|
374 |
-
" [314.1123 , 923.2998 ],\n",
|
375 |
-
" [263.34668, 923.2998 ]], dtype=float32)),\n",
|
376 |
-
" ('fkoreanbun',\n",
|
377 |
-
" array([[ 15.864258, 980.41113 ],\n",
|
378 |
-
" [ 250.65527 , 980.41113 ],\n",
|
379 |
-
" [ 250.65527 , 1024.831 ],\n",
|
380 |
-
" [ 15.864258, 1024.831 ]], dtype=float32)),\n",
|
381 |
-
" ('s5',\n",
|
382 |
-
" array([[ 561.5947 , 980.41113],\n",
|
383 |
-
" [ 612.36035, 980.41113],\n",
|
384 |
-
" [ 612.36035, 1021.6582 ],\n",
|
385 |
-
" [ 561.5947 , 1021.6582 ]], dtype=float32)),\n",
|
386 |
-
" ('oflj',\n",
|
387 |
-
" array([[ 621.8789 , 980.41113],\n",
|
388 |
-
" [ 694.8545 , 980.41113],\n",
|
389 |
-
" [ 694.8545 , 1021.6582 ],\n",
|
390 |
-
" [ 621.8789 , 1021.6582 ]], dtype=float32)),\n",
|
391 |
-
" ('nis',\n",
|
392 |
-
" array([[ 15.864258, 1031.1768 ],\n",
|
393 |
-
" [ 60.28418 , 1031.1768 ],\n",
|
394 |
-
" [ 60.28418 , 1075.5967 ],\n",
|
395 |
-
" [ 15.864258, 1075.5967 ]], dtype=float32)),\n",
|
396 |
-
" ('inyasabeetig',\n",
|
397 |
-
" array([[ 104.7041 , 1031.1768 ],\n",
|
398 |
-
" [ 377.56934, 1031.1768 ],\n",
|
399 |
-
" [ 377.56934, 1078.7695 ],\n",
|
400 |
-
" [ 104.7041 , 1078.7695 ]], dtype=float32)),\n",
|
401 |
-
" ('40',\n",
|
402 |
-
" array([[ 561.5947, 1031.1768],\n",
|
403 |
-
" [ 615.5332, 1031.1768],\n",
|
404 |
-
" [ 615.5332, 1072.4238],\n",
|
405 |
-
" [ 561.5947, 1072.4238]], dtype=float32)),\n",
|
406 |
-
" ('oov',\n",
|
407 |
-
" array([[ 621.8789, 1031.1768],\n",
|
408 |
-
" [ 694.8545, 1031.1768],\n",
|
409 |
-
" [ 694.8545, 1072.4238],\n",
|
410 |
-
" [ 621.8789, 1072.4238]], dtype=float32)),\n",
|
411 |
-
" ('ss',\n",
|
412 |
-
" array([[ 53.938477, 1034.3496 ],\n",
|
413 |
-
" [ 104.7041 , 1034.3496 ],\n",
|
414 |
-
" [ 104.7041 , 1075.5967 ],\n",
|
415 |
-
" [ 53.938477, 1075.5967 ]], dtype=float32)),\n",
|
416 |
-
" ('behotogcremychees',\n",
|
417 |
-
" array([[ 12.691406, 1081.9424 ],\n",
|
418 |
-
" [ 399.7793 , 1081.9424 ],\n",
|
419 |
-
" [ 399.7793 , 1129.5352 ],\n",
|
420 |
-
" [ 12.691406, 1129.5352 ]], dtype=float32)),\n",
|
421 |
-
" ('19',\n",
|
422 |
-
" array([[ 139.60547, 1132.708 ],\n",
|
423 |
-
" [ 190.3711 , 1132.708 ],\n",
|
424 |
-
" [ 190.3711 , 1177.1279 ],\n",
|
425 |
-
" [ 139.60547, 1177.1279 ]], dtype=float32)),\n",
|
426 |
-
" ('do',\n",
|
427 |
-
" array([[ 203.0625 , 1135.8809 ],\n",
|
428 |
-
" [ 250.65527, 1135.8809 ],\n",
|
429 |
-
" [ 250.65527, 1177.1279 ],\n",
|
430 |
-
" [ 203.0625 , 1177.1279 ]], dtype=float32)),\n",
|
431 |
-
" ('a',\n",
|
432 |
-
" array([[ 266.51953, 1139.0537 ],\n",
|
433 |
-
" [ 288.7295 , 1139.0537 ],\n",
|
434 |
-
" [ 288.7295 , 1173.9551 ],\n",
|
435 |
-
" [ 266.51953, 1173.9551 ]], dtype=float32)),\n",
|
436 |
-
" ('b',\n",
|
437 |
-
" array([[ 368.05078, 1135.8809 ],\n",
|
438 |
-
" [ 396.60645, 1135.8809 ],\n",
|
439 |
-
" [ 396.60645, 1173.9551 ],\n",
|
440 |
-
" [ 368.05078, 1173.9551 ]], dtype=float32)),\n",
|
441 |
-
" ('1544',\n",
|
442 |
-
" array([[ 539.38477, 1135.8809 ],\n",
|
443 |
-
" [ 615.5332 , 1135.8809 ],\n",
|
444 |
-
" [ 615.5332 , 1177.1279 ],\n",
|
445 |
-
" [ 539.38477, 1177.1279 ]], dtype=float32)),\n",
|
446 |
-
" ('oou',\n",
|
447 |
-
" array([[ 621.8789, 1135.8809],\n",
|
448 |
-
" [ 694.8545, 1135.8809],\n",
|
449 |
-
" [ 694.8545, 1177.1279],\n",
|
450 |
-
" [ 621.8789, 1177.1279]], dtype=float32)),\n",
|
451 |
-
" ('choeog',\n",
|
452 |
-
" array([[ 266.51953, 1183.4736 ],\n",
|
453 |
-
" [ 399.7793 , 1183.4736 ],\n",
|
454 |
-
" [ 399.7793 , 1231.0664 ],\n",
|
455 |
-
" [ 266.51953, 1231.0664 ]], dtype=float32)),\n",
|
456 |
-
" ('chocvronz',\n",
|
457 |
-
" array([[ 12.691406, 1186.6465 ],\n",
|
458 |
-
" [ 209.4082 , 1186.6465 ],\n",
|
459 |
-
" [ 209.4082 , 1231.0664 ],\n",
|
460 |
-
" [ 12.691406, 1231.0664 ]], dtype=float32)),\n",
|
461 |
-
" ('in1',\n",
|
462 |
-
" array([[ 206.23535, 1186.6465 ],\n",
|
463 |
-
" [ 269.69238, 1186.6465 ],\n",
|
464 |
-
" [ 269.69238, 1227.8936 ],\n",
|
465 |
-
" [ 206.23535, 1227.8936 ]], dtype=float32)),\n",
|
466 |
-
" ('1s',\n",
|
467 |
-
" array([[ 142.77832, 1237.4121 ],\n",
|
468 |
-
" [ 206.23535, 1237.4121 ],\n",
|
469 |
-
" [ 206.23535, 1281.832 ],\n",
|
470 |
-
" [ 142.77832, 1281.832 ]], dtype=float32)),\n",
|
471 |
-
" ('0',\n",
|
472 |
-
" array([[ 203.0625 , 1237.4121 ],\n",
|
473 |
-
" [ 250.65527, 1237.4121 ],\n",
|
474 |
-
" [ 250.65527, 1281.832 ],\n",
|
475 |
-
" [ 203.0625 , 1281.832 ]], dtype=float32)),\n",
|
476 |
-
" ('x',\n",
|
477 |
-
" array([[ 263.34668, 1237.4121 ],\n",
|
478 |
-
" [ 291.90234, 1237.4121 ],\n",
|
479 |
-
" [ 291.90234, 1275.4863 ],\n",
|
480 |
-
" [ 263.34668, 1275.4863 ]], dtype=float32)),\n",
|
481 |
-
" ('l',\n",
|
482 |
-
" array([[ 371.22363, 1237.4121 ],\n",
|
483 |
-
" [ 396.60645, 1237.4121 ],\n",
|
484 |
-
" [ 396.60645, 1275.4863 ],\n",
|
485 |
-
" [ 371.22363, 1275.4863 ]], dtype=float32)),\n",
|
486 |
-
" ('50',\n",
|
487 |
-
" array([[ 561.5947, 1237.4121],\n",
|
488 |
-
" [ 615.5332, 1237.4121],\n",
|
489 |
-
" [ 615.5332, 1278.6592],\n",
|
490 |
-
" [ 561.5947, 1278.6592]], dtype=float32)),\n",
|
491 |
-
" ('doq',\n",
|
492 |
-
" array([[ 621.8789, 1237.4121],\n",
|
493 |
-
" [ 694.8545, 1237.4121],\n",
|
494 |
-
" [ 694.8545, 1278.6592],\n",
|
495 |
-
" [ 621.8789, 1278.6592]], dtype=float32)),\n",
|
496 |
-
" ('total',\n",
|
497 |
-
" array([[ 15.864258, 1338.9434 ],\n",
|
498 |
-
" [ 120.56836 , 1338.9434 ],\n",
|
499 |
-
" [ 120.56836 , 1386.5361 ],\n",
|
500 |
-
" [ 15.864258, 1386.5361 ]], dtype=float32)),\n",
|
501 |
-
" ('10',\n",
|
502 |
-
" array([[ 145.95117, 1338.9434 ],\n",
|
503 |
-
" [ 225.27246, 1338.9434 ],\n",
|
504 |
-
" [ 225.27246, 1383.3633 ],\n",
|
505 |
-
" [ 145.95117, 1383.3633 ]], dtype=float32)),\n",
|
506 |
-
" ('3599',\n",
|
507 |
-
" array([[ 558.4219 , 1338.9434 ],\n",
|
508 |
-
" [ 637.74316, 1338.9434 ],\n",
|
509 |
-
" [ 637.74316, 1383.3633 ],\n",
|
510 |
-
" [ 558.4219 , 1383.3633 ]], dtype=float32)),\n",
|
511 |
-
" ('oq',\n",
|
512 |
-
" array([[ 640.916 , 1342.1162],\n",
|
513 |
-
" [ 694.8545, 1342.1162],\n",
|
514 |
-
" [ 694.8545, 1383.3633],\n",
|
515 |
-
" [ 640.916 , 1383.3633]], dtype=float32)),\n",
|
516 |
-
" ('cash',\n",
|
517 |
-
" array([[ 53.938477, 1389.709 ],\n",
|
518 |
-
" [ 149.12402 , 1389.709 ],\n",
|
519 |
-
" [ 149.12402 , 1434.1289 ],\n",
|
520 |
-
" [ 53.938477, 1434.1289 ]], dtype=float32)),\n",
|
521 |
-
" ('dool',\n",
|
522 |
-
" array([[ 558.4219, 1389.709 ],\n",
|
523 |
-
" [ 647.2617, 1389.709 ],\n",
|
524 |
-
" [ 647.2617, 1434.1289],\n",
|
525 |
-
" [ 558.4219, 1434.1289]], dtype=float32)),\n",
|
526 |
-
" ('o0',\n",
|
527 |
-
" array([[ 640.916 , 1389.709 ],\n",
|
528 |
-
" [ 691.68164, 1389.709 ],\n",
|
529 |
-
" [ 691.68164, 1434.1289 ],\n",
|
530 |
-
" [ 640.916 , 1434.1289 ]], dtype=float32)),\n",
|
531 |
-
" ('change',\n",
|
532 |
-
" array([[ 53.938477, 1440.4746 ],\n",
|
533 |
-
" [ 187.19824 , 1440.4746 ],\n",
|
534 |
-
" [ 187.19824 , 1484.8945 ],\n",
|
535 |
-
" [ 53.938477, 1484.8945 ]], dtype=float32)),\n",
|
536 |
-
" ('841',\n",
|
537 |
-
" array([[ 558.4219, 1440.4746],\n",
|
538 |
-
" [ 628.2246, 1440.4746],\n",
|
539 |
-
" [ 628.2246, 1484.8945],\n",
|
540 |
-
" [ 558.4219, 1484.8945]], dtype=float32)),\n",
|
541 |
-
" ('sdo',\n",
|
542 |
-
" array([[ 625.05176, 1440.4746 ],\n",
|
543 |
-
" [ 694.8545 , 1440.4746 ],\n",
|
544 |
-
" [ 694.8545 , 1484.8945 ],\n",
|
545 |
-
" [ 625.05176, 1484.8945 ]], dtype=float32)),\n",
|
546 |
-
" ('vatable',\n",
|
547 |
-
" array([[ 53.938477, 1545.1787 ],\n",
|
548 |
-
" [ 209.4082 , 1545.1787 ],\n",
|
549 |
-
" [ 209.4082 , 1589.5986 ],\n",
|
550 |
-
" [ 53.938477, 1589.5986 ]], dtype=float32)),\n",
|
551 |
-
" ('szos',\n",
|
552 |
-
" array([[ 558.4219 , 1545.1787 ],\n",
|
553 |
-
" [ 644.08887, 1545.1787 ],\n",
|
554 |
-
" [ 644.08887, 1589.5986 ],\n",
|
555 |
-
" [ 558.4219 , 1589.5986 ]], dtype=float32)),\n",
|
556 |
-
" ('54',\n",
|
557 |
-
" array([[ 640.916 , 1545.1787 ],\n",
|
558 |
-
" [ 691.68164, 1545.1787 ],\n",
|
559 |
-
" [ 691.68164, 1589.5986 ],\n",
|
560 |
-
" [ 640.916 , 1589.5986 ]], dtype=float32)),\n",
|
561 |
-
" ('vat',\n",
|
562 |
-
" array([[ 53.938477, 1595.9443 ],\n",
|
563 |
-
" [ 145.95117 , 1595.9443 ],\n",
|
564 |
-
" [ 145.95117 , 1646.71 ],\n",
|
565 |
-
" [ 53.938477, 1646.71 ]], dtype=float32)),\n",
|
566 |
-
" ('8b',\n",
|
567 |
-
" array([[ 580.63184, 1595.9443 ],\n",
|
568 |
-
" [ 644.08887, 1595.9443 ],\n",
|
569 |
-
" [ 644.08887, 1640.3643 ],\n",
|
570 |
-
" [ 580.63184, 1640.3643 ]], dtype=float32)),\n",
|
571 |
-
" ('tax',\n",
|
572 |
-
" array([[ 139.60547, 1599.1172 ],\n",
|
573 |
-
" [ 209.4082 , 1599.1172 ],\n",
|
574 |
-
" [ 209.4082 , 1640.3643 ],\n",
|
575 |
-
" [ 139.60547, 1640.3643 ]], dtype=float32)),\n",
|
576 |
-
" ('4g',\n",
|
577 |
-
" array([[ 644.08887, 1599.1172 ],\n",
|
578 |
-
" [ 691.68164, 1599.1172 ],\n",
|
579 |
-
" [ 691.68164, 1640.3643 ],\n",
|
580 |
-
" [ 644.08887, 1640.3643 ]], dtype=float32)),\n",
|
581 |
-
" ('zerd',\n",
|
582 |
-
" array([[ 53.938477, 1646.71 ],\n",
|
583 |
-
" [ 149.12402 , 1646.71 ],\n",
|
584 |
-
" [ 149.12402 , 1694.3027 ],\n",
|
585 |
-
" [ 53.938477, 1694.3027 ]], dtype=float32)),\n",
|
586 |
-
" ('ra',\n",
|
587 |
-
" array([[ 158.64258, 1649.8828 ],\n",
|
588 |
-
" [ 209.4082 , 1649.8828 ],\n",
|
589 |
-
" [ 209.4082 , 1694.3027 ],\n",
|
590 |
-
" [ 158.64258, 1694.3027 ]], dtype=float32)),\n",
|
591 |
-
" ('ted',\n",
|
592 |
-
" array([[ 203.0625 , 1649.8828 ],\n",
|
593 |
-
" [ 272.86523, 1649.8828 ],\n",
|
594 |
-
" [ 272.86523, 1691.1299 ],\n",
|
595 |
-
" [ 203.0625 , 1691.1299 ]], dtype=float32)),\n",
|
596 |
-
" ('0',\n",
|
597 |
-
" array([[ 599.66895, 1649.8828 ],\n",
|
598 |
-
" [ 628.2246 , 1649.8828 ],\n",
|
599 |
-
" [ 628.2246 , 1687.957 ],\n",
|
600 |
-
" [ 599.66895, 1687.957 ]], dtype=float32)),\n",
|
601 |
-
" ('00',\n",
|
602 |
-
" array([[ 640.916 , 1649.8828],\n",
|
603 |
-
" [ 694.8545, 1649.8828],\n",
|
604 |
-
" [ 694.8545, 1691.1299],\n",
|
605 |
-
" [ 640.916 , 1691.1299]], dtype=float32)),\n",
|
606 |
-
" ('vat',\n",
|
607 |
-
" array([[ 53.938477, 1700.6484 ],\n",
|
608 |
-
" [ 123.74121 , 1700.6484 ],\n",
|
609 |
-
" [ 123.74121 , 1745.0684 ],\n",
|
610 |
-
" [ 53.938477, 1745.0684 ]], dtype=float32)),\n",
|
611 |
-
" ('mexept',\n",
|
612 |
-
" array([[ 117.39551, 1700.6484 ],\n",
|
613 |
-
" [ 257.00098, 1700.6484 ],\n",
|
614 |
-
" [ 257.00098, 1748.2412 ],\n",
|
615 |
-
" [ 117.39551, 1748.2412 ]], dtype=float32)),\n",
|
616 |
-
" ('ted',\n",
|
617 |
-
" array([[ 247.48242, 1700.6484 ],\n",
|
618 |
-
" [ 314.1123 , 1700.6484 ],\n",
|
619 |
-
" [ 314.1123 , 1745.0684 ],\n",
|
620 |
-
" [ 247.48242, 1745.0684 ]], dtype=float32)),\n",
|
621 |
-
" ('0',\n",
|
622 |
-
" array([[ 602.8418, 1703.8213],\n",
|
623 |
-
" [ 628.2246, 1703.8213],\n",
|
624 |
-
" [ 628.2246, 1738.7227],\n",
|
625 |
-
" [ 602.8418, 1738.7227]], dtype=float32)),\n",
|
626 |
-
" ('od',\n",
|
627 |
-
" array([[ 640.916 , 1703.8213 ],\n",
|
628 |
-
" [ 691.68164, 1703.8213 ],\n",
|
629 |
-
" [ 691.68164, 1741.8955 ],\n",
|
630 |
-
" [ 640.916 , 1741.8955 ]], dtype=float32)),\n",
|
631 |
-
" ('7616664',\n",
|
632 |
-
" array([[ 329.97656, 1799.0068 ],\n",
|
633 |
-
" [ 482.27344, 1799.0068 ],\n",
|
634 |
-
" [ 482.27344, 1846.5996 ],\n",
|
635 |
-
" [ 329.97656, 1846.5996 ]], dtype=float32)),\n",
|
636 |
-
" ('sol',\n",
|
637 |
-
" array([[ 12.691406, 1802.1797 ],\n",
|
638 |
-
" [ 79.32129 , 1802.1797 ],\n",
|
639 |
-
" [ 79.32129 , 1846.5996 ],\n",
|
640 |
-
" [ 12.691406, 1846.5996 ]], dtype=float32)),\n",
|
641 |
-
" ('d',\n",
|
642 |
-
" array([[ 79.32129, 1805.3525 ],\n",
|
643 |
-
" [ 101.53125, 1805.3525 ],\n",
|
644 |
-
" [ 101.53125, 1843.4268 ],\n",
|
645 |
-
" [ 79.32129, 1843.4268 ]], dtype=float32)),\n",
|
646 |
-
" ('tos',\n",
|
647 |
-
" array([[ 120.56836, 1802.1797 ],\n",
|
648 |
-
" [ 184.02539, 1802.1797 ],\n",
|
649 |
-
" [ 184.02539, 1846.5996 ],\n",
|
650 |
-
" [ 120.56836, 1846.5996 ]], dtype=float32)),\n",
|
651 |
-
" ('hobos',\n",
|
652 |
-
" array([[ 203.0625, 1802.1797],\n",
|
653 |
-
" [ 333.1494, 1802.1797],\n",
|
654 |
-
" [ 333.1494, 1846.5996],\n",
|
655 |
-
" [ 203.0625, 1846.5996]], dtype=float32)),\n",
|
656 |
-
" ('name',\n",
|
657 |
-
" array([[ 12.691406, 1852.9453 ],\n",
|
658 |
-
" [ 104.7041 , 1852.9453 ],\n",
|
659 |
-
" [ 104.7041 , 1897.3652 ],\n",
|
660 |
-
" [ 12.691406, 1897.3652 ]], dtype=float32)),\n",
|
661 |
-
" ('eeten',\n",
|
662 |
-
" array([[ 126.91406, 1887.8467 ],\n",
|
663 |
-
" [ 199.88965, 1887.8467 ],\n",
|
664 |
-
" [ 199.88965, 1897.3652 ],\n",
|
665 |
-
" [ 126.91406, 1897.3652 ]], dtype=float32)),\n",
|
666 |
-
" ('addr',\n",
|
667 |
-
" array([[ 12.691406, 1906.8838 ],\n",
|
668 |
-
" [ 104.7041 , 1906.8838 ],\n",
|
669 |
-
" [ 104.7041 , 1948.1309 ],\n",
|
670 |
-
" [ 12.691406, 1948.1309 ]], dtype=float32)),\n",
|
671 |
-
" ('ess',\n",
|
672 |
-
" array([[ 98.3584 , 1910.0566 ],\n",
|
673 |
-
" [ 168.16113, 1910.0566 ],\n",
|
674 |
-
" [ 168.16113, 1951.3037 ],\n",
|
675 |
-
" [ 98.3584 , 1951.3037 ]], dtype=float32)),\n",
|
676 |
-
" ('tins',\n",
|
677 |
-
" array([[ 12.691406, 1954.4766 ],\n",
|
678 |
-
" [ 98.3584 , 1954.4766 ],\n",
|
679 |
-
" [ 98.3584 , 1998.8965 ],\n",
|
680 |
-
" [ 12.691406, 1998.8965 ]], dtype=float32)),\n",
|
681 |
-
" ('fpti',\n",
|
682 |
-
" array([[ 13.045723, 2057.3926 ],\n",
|
683 |
-
" [ 81.36672 , 2062.2727 ],\n",
|
684 |
-
" [ 78.322716, 2104.889 ],\n",
|
685 |
-
" [ 10.001719, 2100.0088 ]], dtype=float32)),\n",
|
686 |
-
" ('ippl',\n",
|
687 |
-
" array([[ 101.53125, 2059.1807 ],\n",
|
688 |
-
" [ 171.33398, 2059.1807 ],\n",
|
689 |
-
" [ 171.33398, 2106.7734 ],\n",
|
690 |
-
" [ 101.53125, 2106.7734 ]], dtype=float32)),\n",
|
691 |
-
" ('seven',\n",
|
692 |
-
" array([[ 241.13672, 2059.1807 ],\n",
|
693 |
-
" [ 355.35938, 2059.1807 ],\n",
|
694 |
-
" [ 355.35938, 2103.6006 ],\n",
|
695 |
-
" [ 241.13672, 2103.6006 ]], dtype=float32)),\n",
|
696 |
-
" ('corpor',\n",
|
697 |
-
" array([[ 371.50757, 2057.8103 ],\n",
|
698 |
-
" [ 499.50806, 2065.8103 ],\n",
|
699 |
-
" [ 496.71796, 2110.4524 ],\n",
|
700 |
-
" [ 368.7175 , 2102.4526 ]], dtype=float32)),\n",
|
701 |
-
" ('s',\n",
|
702 |
-
" array([[ 164.98828, 2065.5264 ],\n",
|
703 |
-
" [ 180.85254, 2065.5264 ],\n",
|
704 |
-
" [ 180.85254, 2100.4277 ],\n",
|
705 |
-
" [ 164.98828, 2100.4277 ]], dtype=float32)),\n",
|
706 |
-
" ('ne',\n",
|
707 |
-
" array([[ 177.67969, 2062.3535 ],\n",
|
708 |
-
" [ 228.44531, 2062.3535 ],\n",
|
709 |
-
" [ 228.44531, 2103.6006 ],\n",
|
710 |
-
" [ 177.67969, 2103.6006 ]], dtype=float32)),\n",
|
711 |
-
" ('at',\n",
|
712 |
-
" array([[ 494.96484, 2062.3535 ],\n",
|
713 |
-
" [ 542.5576 , 2062.3535 ],\n",
|
714 |
-
" [ 542.5576 , 2103.6006 ],\n",
|
715 |
-
" [ 494.96484, 2103.6006 ]], dtype=float32)),\n",
|
716 |
-
" ('on',\n",
|
717 |
-
" array([[ 558.4219, 2065.5264],\n",
|
718 |
-
" [ 609.1875, 2065.5264],\n",
|
719 |
-
" [ 609.1875, 2103.6006],\n",
|
720 |
-
" [ 558.4219, 2103.6006]], dtype=float32)),\n",
|
721 |
-
" ('jth',\n",
|
722 |
-
" array([[ 12.691406, 2109.9463 ],\n",
|
723 |
-
" [ 82.49414 , 2109.9463 ],\n",
|
724 |
-
" [ 82.49414 , 2154.3662 ],\n",
|
725 |
-
" [ 12.691406, 2154.3662 ]], dtype=float32)),\n",
|
726 |
-
" ('the',\n",
|
727 |
-
" array([[ 225.27246, 2109.9463 ],\n",
|
728 |
-
" [ 291.90234, 2109.9463 ],\n",
|
729 |
-
" [ 291.90234, 2154.3662 ],\n",
|
730 |
-
" [ 225.27246, 2154.3662 ]], dtype=float32)),\n",
|
731 |
-
" ('co',\n",
|
732 |
-
" array([[ 304.59375, 2109.9463 ],\n",
|
733 |
-
" [ 355.35938, 2109.9463 ],\n",
|
734 |
-
" [ 355.35938, 2154.3662 ],\n",
|
735 |
-
" [ 304.59375, 2154.3662 ]], dtype=float32)),\n",
|
736 |
-
" ('tower',\n",
|
737 |
-
" array([[ 498.1377 , 2109.9463 ],\n",
|
738 |
-
" [ 606.01465, 2109.9463 ],\n",
|
739 |
-
" [ 606.01465, 2154.3662 ],\n",
|
740 |
-
" [ 498.1377 , 2154.3662 ]], dtype=float32)),\n",
|
741 |
-
" ('f',\n",
|
742 |
-
" array([[ 95.18555, 2113.1191 ],\n",
|
743 |
-
" [ 120.56836, 2113.1191 ],\n",
|
744 |
-
" [ 120.56836, 2151.1934 ],\n",
|
745 |
-
" [ 95.18555, 2151.1934 ]], dtype=float32)),\n",
|
746 |
-
" ('t',\n",
|
747 |
-
" array([[ 352.18652, 2116.292 ],\n",
|
748 |
-
" [ 368.05078, 2116.292 ],\n",
|
749 |
-
" [ 368.05078, 2151.1934 ],\n",
|
750 |
-
" [ 352.18652, 2151.1934 ]], dtype=float32)),\n",
|
751 |
-
" ('iqor',\n",
|
752 |
-
" array([[ 120.56836, 2116.292 ],\n",
|
753 |
-
" [ 206.23535, 2116.292 ],\n",
|
754 |
-
" [ 206.23535, 2154.3662 ],\n",
|
755 |
-
" [ 120.56836, 2154.3662 ]], dtype=float32)),\n",
|
756 |
-
" ('umb',\n",
|
757 |
-
" array([[ 368.05078, 2116.292 ],\n",
|
758 |
-
" [ 441.02637, 2116.292 ],\n",
|
759 |
-
" [ 441.02637, 2154.3662 ],\n",
|
760 |
-
" [ 368.05078, 2154.3662 ]], dtype=float32)),\n",
|
761 |
-
" ('id',\n",
|
762 |
-
" array([[ 434.68066, 2116.292 ],\n",
|
763 |
-
" [ 479.1006 , 2116.292 ],\n",
|
764 |
-
" [ 479.1006 , 2154.3662 ],\n",
|
765 |
-
" [ 434.68066, 2154.3662 ]], dtype=float32)),\n",
|
766 |
-
" ('avenues',\n",
|
767 |
-
" array([[ 203.0625 , 2160.712 ],\n",
|
768 |
-
" [ 349.01367, 2160.712 ],\n",
|
769 |
-
" [ 349.01367, 2208.3047 ],\n",
|
770 |
-
" [ 203.0625 , 2208.3047 ]], dtype=float32)),\n",
|
771 |
-
" ('ort',\n",
|
772 |
-
" array([[ 31.728516, 2163.8848 ],\n",
|
773 |
-
" [ 101.53125 , 2163.8848 ],\n",
|
774 |
-
" [ 101.53125 , 2205.1318 ],\n",
|
775 |
-
" [ 31.728516, 2205.1318 ]], dtype=float32)),\n",
|
776 |
-
" ('i',\n",
|
777 |
-
" array([[ 101.53125, 2167.0576 ],\n",
|
778 |
-
" [ 114.22266, 2167.0576 ],\n",
|
779 |
-
" [ 114.22266, 2198.7861 ],\n",
|
780 |
-
" [ 101.53125, 2198.7861 ]], dtype=float32)),\n",
|
781 |
-
" ('manda',\n",
|
782 |
-
" array([[ 368.05078, 2163.8848 ],\n",
|
783 |
-
" [ 479.1006 , 2163.8848 ],\n",
|
784 |
-
" [ 479.1006 , 2205.1318 ],\n",
|
785 |
-
" [ 368.05078, 2205.1318 ]], dtype=float32)),\n",
|
786 |
-
" ('gas',\n",
|
787 |
-
" array([[ 117.39551, 2167.0576 ],\n",
|
788 |
-
" [ 187.19824, 2167.0576 ],\n",
|
789 |
-
" [ 187.19824, 2211.4775 ],\n",
|
790 |
-
" [ 117.39551, 2211.4775 ]], dtype=float32)),\n",
|
791 |
-
" ('l',\n",
|
792 |
-
" array([[ 479.1006 , 2170.2305 ],\n",
|
793 |
-
" [ 488.61914, 2170.2305 ],\n",
|
794 |
-
" [ 488.61914, 2195.6133 ],\n",
|
795 |
-
" [ 479.1006 , 2195.6133 ]], dtype=float32)),\n",
|
796 |
-
" ('lyonig',\n",
|
797 |
-
" array([[ 494.96484, 2170.2305 ],\n",
|
798 |
-
" [ 606.01465, 2170.2305 ],\n",
|
799 |
-
" [ 606.01465, 2211.4775 ],\n",
|
800 |
-
" [ 494.96484, 2211.4775 ]], dtype=float32)),\n",
|
801 |
-
" ('ci',\n",
|
802 |
-
" array([[ 31.728516, 2214.6504 ],\n",
|
803 |
-
" [ 79.32129 , 2214.6504 ],\n",
|
804 |
-
" [ 79.32129 , 2259.0703 ],\n",
|
805 |
-
" [ 31.728516, 2259.0703 ]], dtype=float32)),\n",
|
806 |
-
" ('ty',\n",
|
807 |
-
" array([[ 76.14844, 2217.8232 ],\n",
|
808 |
-
" [ 123.74121, 2217.8232 ],\n",
|
809 |
-
" [ 123.74121, 2259.0703 ],\n",
|
810 |
-
" [ 76.14844, 2259.0703 ]], dtype=float32)),\n",
|
811 |
-
" ('sgrooo',\n",
|
812 |
-
" array([[ 304.59375, 2262.2432 ],\n",
|
813 |
-
" [ 441.02637, 2262.2432 ],\n",
|
814 |
-
" [ 441.02637, 2309.836 ],\n",
|
815 |
-
" [ 304.59375, 2309.836 ]], dtype=float32)),\n",
|
816 |
-
" ('tins',\n",
|
817 |
-
" array([[ 15.864258, 2265.416 ],\n",
|
818 |
-
" [ 98.3584 , 2265.416 ],\n",
|
819 |
-
" [ 98.3584 , 2309.836 ],\n",
|
820 |
-
" [ 15.864258, 2309.836 ]], dtype=float32)),\n",
|
821 |
-
" ('doo',\n",
|
822 |
-
" array([[ 117.39551, 2265.416 ],\n",
|
823 |
-
" [ 203.0625 , 2265.416 ],\n",
|
824 |
-
" [ 203.0625 , 2309.836 ],\n",
|
825 |
-
" [ 117.39551, 2309.836 ]], dtype=float32)),\n",
|
826 |
-
" ('sioul',\n",
|
827 |
-
" array([[ 199.88965, 2265.416 ],\n",
|
828 |
-
" [ 310.93945, 2265.416 ],\n",
|
829 |
-
" [ 310.93945, 2309.836 ],\n",
|
830 |
-
" [ 199.88965, 2309.836 ]], dtype=float32)),\n",
|
831 |
-
" ('bir',\n",
|
832 |
-
" array([[ 12.691406, 2316.1816 ],\n",
|
833 |
-
" [ 82.49414 , 2316.1816 ],\n",
|
834 |
-
" [ 82.49414 , 2360.6016 ],\n",
|
835 |
-
" [ 12.691406, 2360.6016 ]], dtype=float32)),\n",
|
836 |
-
" ('accr',\n",
|
837 |
-
" array([[ 95.18555, 2319.3545 ],\n",
|
838 |
-
" [ 187.19824, 2319.3545 ],\n",
|
839 |
-
" [ 187.19824, 2363.7744 ],\n",
|
840 |
-
" [ 95.18555, 2363.7744 ]], dtype=float32)),\n",
|
841 |
-
" ('h',\n",
|
842 |
-
" array([[ 203.0625 , 2322.5273 ],\n",
|
843 |
-
" [ 225.27246, 2322.5273 ],\n",
|
844 |
-
" [ 225.27246, 2357.4287 ],\n",
|
845 |
-
" [ 203.0625 , 2357.4287 ]], dtype=float32)),\n",
|
846 |
-
" ('smooojso1',\n",
|
847 |
-
" array([[ 72.975586, 2366.9473 ],\n",
|
848 |
-
" [ 263.34668 , 2366.9473 ],\n",
|
849 |
-
" [ 263.34668 , 2411.3672 ],\n",
|
850 |
-
" [ 72.975586, 2411.3672 ]], dtype=float32)),\n",
|
851 |
-
" ('sjuousa',\n",
|
852 |
-
" array([[ 263.34668, 2366.9473 ],\n",
|
853 |
-
" [ 479.1006 , 2366.9473 ],\n",
|
854 |
-
" [ 479.1006 , 2411.3672 ],\n",
|
855 |
-
" [ 263.34668, 2411.3672 ]], dtype=float32)),\n",
|
856 |
-
" ('96oz',\n",
|
857 |
-
" array([[ 494.96484, 2366.9473 ],\n",
|
858 |
-
" [ 586.97754, 2366.9473 ],\n",
|
859 |
-
" [ 586.97754, 2411.3672 ],\n",
|
860 |
-
" [ 494.96484, 2411.3672 ]], dtype=float32)),\n",
|
861 |
-
" ('11',\n",
|
862 |
-
" array([[ 34.901367, 2370.12 ],\n",
|
863 |
-
" [ 79.32129 , 2370.12 ],\n",
|
864 |
-
" [ 79.32129 , 2411.3672 ],\n",
|
865 |
-
" [ 34.901367, 2411.3672 ]], dtype=float32)),\n",
|
866 |
-
" ('accrdater',\n",
|
867 |
-
" array([[ 12.691406, 2417.713 ],\n",
|
868 |
-
" [ 203.0625 , 2417.713 ],\n",
|
869 |
-
" [ 203.0625 , 2465.3057 ],\n",
|
870 |
-
" [ 12.691406, 2465.3057 ]], dtype=float32)),\n",
|
871 |
-
" ('d8i01',\n",
|
872 |
-
" array([[ 222.09961, 2417.713 ],\n",
|
873 |
-
" [ 329.97656, 2417.713 ],\n",
|
874 |
-
" [ 329.97656, 2462.1328 ],\n",
|
875 |
-
" [ 222.09961, 2462.1328 ]], dtype=float32)),\n",
|
876 |
-
" ('220',\n",
|
877 |
-
" array([[ 329.97656, 2417.713 ],\n",
|
878 |
-
" [ 441.02637, 2417.713 ],\n",
|
879 |
-
" [ 441.02637, 2462.1328 ],\n",
|
880 |
-
" [ 329.97656, 2462.1328 ]], dtype=float32)),\n",
|
881 |
-
" ('17',\n",
|
882 |
-
" array([[ 31.728516, 2471.6514 ],\n",
|
883 |
-
" [ 85.66699 , 2471.6514 ],\n",
|
884 |
-
" [ 85.66699 , 2512.8984 ],\n",
|
885 |
-
" [ 31.728516, 2512.8984 ]], dtype=float32)),\n",
|
886 |
-
" ('151',\n",
|
887 |
-
" array([[ 76.14844, 2471.6514 ],\n",
|
888 |
-
" [ 139.60547, 2471.6514 ],\n",
|
889 |
-
" [ 139.60547, 2516.0713 ],\n",
|
890 |
-
" [ 76.14844, 2516.0713 ]], dtype=float32)),\n",
|
891 |
-
" ('izoz5',\n",
|
892 |
-
" array([[ 139.60547, 2471.6514 ],\n",
|
893 |
-
" [ 250.65527, 2471.6514 ],\n",
|
894 |
-
" [ 250.65527, 2516.0713 ],\n",
|
895 |
-
" [ 139.60547, 2516.0713 ]], dtype=float32)),\n",
|
896 |
-
" ('fermi',\n",
|
897 |
-
" array([[ 12.691406, 2519.2441 ],\n",
|
898 |
-
" [ 120.56836 , 2519.2441 ],\n",
|
899 |
-
" [ 120.56836 , 2566.837 ],\n",
|
900 |
-
" [ 12.691406, 2566.837 ]], dtype=float32)),\n",
|
901 |
-
" ('t',\n",
|
902 |
-
" array([[ 117.39551, 2525.5898 ],\n",
|
903 |
-
" [ 142.77832, 2525.5898 ],\n",
|
904 |
-
" [ 142.77832, 2563.664 ],\n",
|
905 |
-
" [ 117.39551, 2563.664 ]], dtype=float32)),\n",
|
906 |
-
" ('hs',\n",
|
907 |
-
" array([[ 158.64258, 2525.5898 ],\n",
|
908 |
-
" [ 199.88965, 2525.5898 ],\n",
|
909 |
-
" [ 199.88965, 2563.664 ],\n",
|
910 |
-
" [ 158.64258, 2563.664 ]], dtype=float32)),\n",
|
911 |
-
" ('fpzoirtias',\n",
|
912 |
-
" array([[ 31.728516, 2570.0098 ],\n",
|
913 |
-
" [ 479.1006 , 2570.0098 ],\n",
|
914 |
-
" [ 479.1006 , 2617.6025 ],\n",
|
915 |
-
" [ 31.728516, 2617.6025 ]], dtype=float32)),\n",
|
916 |
-
" ('dooniz',\n",
|
917 |
-
" array([[ 469.58203, 2573.1826 ],\n",
|
918 |
-
" [ 586.97754, 2573.1826 ],\n",
|
919 |
-
" [ 586.97754, 2617.6025 ],\n",
|
920 |
-
" [ 469.58203, 2617.6025 ]], dtype=float32)),\n",
|
921 |
-
" ('get',\n",
|
922 |
-
" array([[ 31.728516, 2674.7139 ],\n",
|
923 |
-
" [ 101.53125 , 2674.7139 ],\n",
|
924 |
-
" [ 101.53125 , 2719.1338 ],\n",
|
925 |
-
" [ 31.728516, 2719.1338 ]], dtype=float32)),\n",
|
926 |
-
" ('for',\n",
|
927 |
-
" array([[ 602.8418, 2674.7139],\n",
|
928 |
-
" [ 669.4717, 2674.7139],\n",
|
929 |
-
" [ 669.4717, 2719.1338],\n",
|
930 |
-
" [ 602.8418, 2719.1338]], dtype=float32)),\n",
|
931 |
-
" ('chance',\n",
|
932 |
-
" array([[ 158.87543, 2676.548 ],\n",
|
933 |
-
" [ 292.87747, 2680.6086 ],\n",
|
934 |
-
" [ 291.59088, 2723.0664 ],\n",
|
935 |
-
" [ 157.58882, 2719.0059 ]], dtype=float32)),\n",
|
936 |
-
" ('to',\n",
|
937 |
-
" array([[ 304.59375, 2677.8867 ],\n",
|
938 |
-
" [ 355.35938, 2677.8867 ],\n",
|
939 |
-
" [ 355.35938, 2719.1338 ],\n",
|
940 |
-
" [ 304.59375, 2719.1338 ]], dtype=float32)),\n",
|
941 |
-
" ('win',\n",
|
942 |
-
" array([[ 368.05078, 2677.8867 ],\n",
|
943 |
-
" [ 441.02637, 2677.8867 ],\n",
|
944 |
-
" [ 441.02637, 2722.3066 ],\n",
|
945 |
-
" [ 368.05078, 2722.3066 ]], dtype=float32)),\n",
|
946 |
-
" ('trip',\n",
|
947 |
-
" array([[ 494.96484, 2677.8867 ],\n",
|
948 |
-
" [ 586.97754, 2677.8867 ],\n",
|
949 |
-
" [ 586.97754, 2722.3066 ],\n",
|
950 |
-
" [ 494.96484, 2722.3066 ]], dtype=float32)),\n",
|
951 |
-
" ('t',\n",
|
952 |
-
" array([[ 114.22266, 2681.0596 ],\n",
|
953 |
-
" [ 139.60547, 2681.0596 ],\n",
|
954 |
-
" [ 139.60547, 2715.961 ],\n",
|
955 |
-
" [ 114.22266, 2715.961 ]], dtype=float32)),\n",
|
956 |
-
" ('a',\n",
|
957 |
-
" array([[ 453.71777, 2687.4053 ],\n",
|
958 |
-
" [ 475.92773, 2687.4053 ],\n",
|
959 |
-
" [ 475.92773, 2719.1338 ],\n",
|
960 |
-
" [ 453.71777, 2719.1338 ]], dtype=float32)),\n",
|
961 |
-
" ('f',\n",
|
962 |
-
" array([[ 57.11133, 2731.8252 ],\n",
|
963 |
-
" [ 79.32129, 2731.8252 ],\n",
|
964 |
-
" [ 79.32129, 2769.8994 ],\n",
|
965 |
-
" [ 57.11133, 2769.8994 ]], dtype=float32)),\n",
|
966 |
-
" ('to',\n",
|
967 |
-
" array([[ 95.18555, 2728.6523 ],\n",
|
968 |
-
" [ 142.77832, 2728.6523 ],\n",
|
969 |
-
" [ 142.77832, 2773.0723 ],\n",
|
970 |
-
" [ 95.18555, 2773.0723 ]], dtype=float32)),\n",
|
971 |
-
" ('kored',\n",
|
972 |
-
" array([[ 158.64258, 2728.6523 ],\n",
|
973 |
-
" [ 269.69238, 2728.6523 ],\n",
|
974 |
-
" [ 269.69238, 2773.0723 ],\n",
|
975 |
-
" [ 158.64258, 2773.0723 ]], dtype=float32)),\n",
|
976 |
-
" ('pis0',\n",
|
977 |
-
" array([[ 558.4219, 2728.6523],\n",
|
978 |
-
" [ 650.4346, 2728.6523],\n",
|
979 |
-
" [ 650.4346, 2773.0723],\n",
|
980 |
-
" [ 558.4219, 2773.0723]], dtype=float32)),\n",
|
981 |
-
" ('when',\n",
|
982 |
-
" array([[ 285.55664, 2731.8252 ],\n",
|
983 |
-
" [ 377.56934, 2731.8252 ],\n",
|
984 |
-
" [ 377.56934, 2773.0723 ],\n",
|
985 |
-
" [ 285.55664, 2773.0723 ]], dtype=float32)),\n",
|
986 |
-
" ('buy',\n",
|
987 |
-
" array([[ 472.75488, 2731.8252 ],\n",
|
988 |
-
" [ 542.5576 , 2731.8252 ],\n",
|
989 |
-
" [ 542.5576 , 2773.0723 ],\n",
|
990 |
-
" [ 472.75488, 2773.0723 ]], dtype=float32)),\n",
|
991 |
-
" ('you',\n",
|
992 |
-
" array([[ 390.26074, 2734.998 ],\n",
|
993 |
-
" [ 460.06348, 2734.998 ],\n",
|
994 |
-
" [ 460.06348, 2776.245 ],\n",
|
995 |
-
" [ 390.26074, 2776.245 ]], dtype=float32)),\n",
|
996 |
-
" ('of',\n",
|
997 |
-
" array([[ 158.64258, 2779.418 ],\n",
|
998 |
-
" [ 206.23535, 2779.418 ],\n",
|
999 |
-
" [ 206.23535, 2823.838 ],\n",
|
1000 |
-
" [ 158.64258, 2823.838 ]], dtype=float32)),\n",
|
1001 |
-
" ('jel',\n",
|
1002 |
-
" array([[ 225.27246, 2779.418 ],\n",
|
1003 |
-
" [ 307.7666 , 2779.418 ],\n",
|
1004 |
-
" [ 307.7666 , 2823.838 ],\n",
|
1005 |
-
" [ 225.27246, 2823.838 ]], dtype=float32)),\n",
|
1006 |
-
" ('worth',\n",
|
1007 |
-
" array([[ 31.728516, 2782.5908 ],\n",
|
1008 |
-
" [ 145.95117 , 2782.5908 ],\n",
|
1009 |
-
" [ 145.95117 , 2830.1836 ],\n",
|
1010 |
-
" [ 31.728516, 2830.1836 ]], dtype=float32)),\n",
|
1011 |
-
" ('tens',\n",
|
1012 |
-
" array([[ 434.68066, 2782.5908 ],\n",
|
1013 |
-
" [ 533.03906, 2782.5908 ],\n",
|
1014 |
-
" [ 533.03906, 2820.665 ],\n",
|
1015 |
-
" [ 434.68066, 2820.665 ]], dtype=float32)),\n",
|
1016 |
-
" ('ean',\n",
|
1017 |
-
" array([[ 558.4219, 2782.5908],\n",
|
1018 |
-
" [ 650.4346, 2782.5908],\n",
|
1019 |
-
" [ 650.4346, 2823.838 ],\n",
|
1020 |
-
" [ 558.4219, 2823.838 ]], dtype=float32)),\n",
|
1021 |
-
" ('even',\n",
|
1022 |
-
" array([[ 304.59375, 2785.7637 ],\n",
|
1023 |
-
" [ 396.60645, 2785.7637 ],\n",
|
1024 |
-
" [ 396.60645, 2823.838 ],\n",
|
1025 |
-
" [ 304.59375, 2823.838 ]], dtype=float32)),\n",
|
1026 |
-
" ('s',\n",
|
1027 |
-
" array([[ 31.728516, 2830.1836 ],\n",
|
1028 |
-
" [ 57.11133 , 2830.1836 ],\n",
|
1029 |
-
" [ 57.11133 , 2871.4307 ],\n",
|
1030 |
-
" [ 31.728516, 2871.4307 ]], dtype=float32)),\n",
|
1031 |
-
" ('era',\n",
|
1032 |
-
" array([[ 72.975586, 2830.1836 ],\n",
|
1033 |
-
" [ 142.77832 , 2830.1836 ],\n",
|
1034 |
-
" [ 142.77832 , 2877.7764 ],\n",
|
1035 |
-
" [ 72.975586, 2877.7764 ]], dtype=float32)),\n",
|
1036 |
-
" ('ffle',\n",
|
1037 |
-
" array([[ 139.60547, 2830.1836 ],\n",
|
1038 |
-
" [ 228.44531, 2830.1836 ],\n",
|
1039 |
-
" [ 228.44531, 2877.7764 ],\n",
|
1040 |
-
" [ 139.60547, 2877.7764 ]], dtype=float32)),\n",
|
1041 |
-
" ('entr',\n",
|
1042 |
-
" array([[ 241.13672, 2833.3564 ],\n",
|
1043 |
-
" [ 336.32227, 2833.3564 ],\n",
|
1044 |
-
" [ 336.32227, 2874.6035 ],\n",
|
1045 |
-
" [ 241.13672, 2874.6035 ]], dtype=float32)),\n",
|
1046 |
-
" ('ies',\n",
|
1047 |
-
" array([[ 329.97656, 2836.5293 ],\n",
|
1048 |
-
" [ 393.4336 , 2836.5293 ],\n",
|
1049 |
-
" [ 393.4336 , 2871.4307 ],\n",
|
1050 |
-
" [ 329.97656, 2871.4307 ]], dtype=float32)),\n",
|
1051 |
-
" ('aphen',\n",
|
1052 |
-
" array([[ 412.4707 , 2836.5293 ],\n",
|
1053 |
-
" [ 501.31055, 2836.5293 ],\n",
|
1054 |
-
" [ 501.31055, 2877.7764 ],\n",
|
1055 |
-
" [ 412.4707 , 2877.7764 ]], dtype=float32)),\n",
|
1056 |
-
" ('duy',\n",
|
1057 |
-
" array([[ 596.4961, 2836.5293],\n",
|
1058 |
-
" [ 669.4717, 2836.5293],\n",
|
1059 |
-
" [ 669.4717, 2877.7764],\n",
|
1060 |
-
" [ 596.4961, 2877.7764]], dtype=float32)),\n",
|
1061 |
-
" ('you',\n",
|
1062 |
-
" array([[ 517.1748 , 2839.7021 ],\n",
|
1063 |
-
" [ 586.97754, 2839.7021 ],\n",
|
1064 |
-
" [ 586.97754, 2877.7764 ],\n",
|
1065 |
-
" [ 517.1748 , 2877.7764 ]], dtype=float32)),\n",
|
1066 |
-
" ('dis',\n",
|
1067 |
-
" array([[ 31.728516, 2884.122 ],\n",
|
1068 |
-
" [ 79.32129 , 2884.122 ],\n",
|
1069 |
-
" [ 79.32129 , 2925.3691 ],\n",
|
1070 |
-
" [ 31.728516, 2925.3691 ]], dtype=float32)),\n",
|
1071 |
-
" ('scdunted',\n",
|
1072 |
-
" array([[ 76.14844, 2884.122 ],\n",
|
1073 |
-
" [ 250.65527, 2884.122 ],\n",
|
1074 |
-
" [ 250.65527, 2928.542 ],\n",
|
1075 |
-
" [ 76.14844, 2928.542 ]], dtype=float32)),\n",
|
1076 |
-
" ('booster',\n",
|
1077 |
-
" array([[ 263.34668, 2884.122 ],\n",
|
1078 |
-
" [ 415.64355, 2884.122 ],\n",
|
1079 |
-
" [ 415.64355, 2928.542 ],\n",
|
1080 |
-
" [ 263.34668, 2928.542 ]], dtype=float32)),\n",
|
1081 |
-
" ('tenss',\n",
|
1082 |
-
" array([[ 453.71777, 2884.122 ],\n",
|
1083 |
-
" [ 548.9033 , 2884.122 ],\n",
|
1084 |
-
" [ 548.9033 , 2928.542 ],\n",
|
1085 |
-
" [ 453.71777, 2928.542 ]], dtype=float32)),\n",
|
1086 |
-
" ('fper',\n",
|
1087 |
-
" array([[ 577.459 , 2884.122 ],\n",
|
1088 |
-
" [ 647.2617, 2884.122 ],\n",
|
1089 |
-
" [ 647.2617, 2928.542 ],\n",
|
1090 |
-
" [ 577.459 , 2928.542 ]], dtype=float32)),\n",
|
1091 |
-
" ('dii',\n",
|
1092 |
-
" array([[ 31.728516, 2934.8877 ],\n",
|
1093 |
-
" [ 101.53125 , 2934.8877 ],\n",
|
1094 |
-
" [ 101.53125 , 2979.3076 ],\n",
|
1095 |
-
" [ 31.728516, 2979.3076 ]], dtype=float32)),\n",
|
1096 |
-
" ('fair',\n",
|
1097 |
-
" array([[ 117.39551, 2934.8877 ],\n",
|
1098 |
-
" [ 209.4082 , 2934.8877 ],\n",
|
1099 |
-
" [ 209.4082 , 2979.3076 ],\n",
|
1100 |
-
" [ 117.39551, 2979.3076 ]], dtype=float32)),\n",
|
1101 |
-
" ('trade',\n",
|
1102 |
-
" array([[ 222.09961, 2934.8877 ],\n",
|
1103 |
-
" [ 333.1494 , 2934.8877 ],\n",
|
1104 |
-
" [ 333.1494 , 2979.3076 ],\n",
|
1105 |
-
" [ 222.09961, 2979.3076 ]], dtype=float32)),\n",
|
1106 |
-
" ('permt',\n",
|
1107 |
-
" array([[ 346.57706, 2933.5906 ],\n",
|
1108 |
-
" [ 458.4858 , 2939.4805 ],\n",
|
1109 |
-
" [ 456.2683 , 2981.6128 ],\n",
|
1110 |
-
" [ 344.35956, 2975.7231 ]], dtype=float32)),\n",
|
1111 |
-
" ('nunbers',\n",
|
1112 |
-
" array([[ 494.96484, 2934.8877 ],\n",
|
1113 |
-
" [ 644.08887, 2934.8877 ],\n",
|
1114 |
-
" [ 644.08887, 2979.3076 ],\n",
|
1115 |
-
" [ 494.96484, 2979.3076 ]], dtype=float32)),\n",
|
1116 |
-
" ('t',\n",
|
1117 |
-
" array([[ 453.71777, 2941.2334 ],\n",
|
1118 |
-
" [ 475.92773, 2941.2334 ],\n",
|
1119 |
-
" [ 475.92773, 2976.1348 ],\n",
|
1120 |
-
" [ 453.71777, 2976.1348 ]], dtype=float32)),\n",
|
1121 |
-
" ('18015',\n",
|
1122 |
-
" array([[ 117.39551, 2985.6533 ],\n",
|
1123 |
-
" [ 247.48242, 2985.6533 ],\n",
|
1124 |
-
" [ 247.48242, 3030.0732 ],\n",
|
1125 |
-
" [ 117.39551, 3030.0732 ]], dtype=float32)),\n",
|
1126 |
-
" ('series',\n",
|
1127 |
-
" array([[ 263.34668, 2985.6533 ],\n",
|
1128 |
-
" [ 396.60645, 2985.6533 ],\n",
|
1129 |
-
" [ 396.60645, 3030.0732 ],\n",
|
1130 |
-
" [ 263.34668, 3030.0732 ]], dtype=float32)),\n",
|
1131 |
-
" ('of',\n",
|
1132 |
-
" array([[ 409.29785, 2985.6533 ],\n",
|
1133 |
-
" [ 456.89062, 2985.6533 ],\n",
|
1134 |
-
" [ 456.89062, 3030.0732 ],\n",
|
1135 |
-
" [ 409.29785, 3030.0732 ]], dtype=float32)),\n",
|
1136 |
-
" ('edz5',\n",
|
1137 |
-
" array([[ 472.75488, 2985.6533 ],\n",
|
1138 |
-
" [ 571.1133 , 2985.6533 ],\n",
|
1139 |
-
" [ 571.1133 , 3026.9004 ],\n",
|
1140 |
-
" [ 472.75488, 3026.9004 ]], dtype=float32)),\n",
|
1141 |
-
" ('facebooks',\n",
|
1142 |
-
" array([[ 53.938477, 3036.419 ],\n",
|
1143 |
-
" [ 234.79102 , 3036.419 ],\n",
|
1144 |
-
" [ 234.79102 , 3080.8389 ],\n",
|
1145 |
-
" [ 53.938477, 3080.8389 ]], dtype=float32)),\n",
|
1146 |
-
" ('71',\n",
|
1147 |
-
" array([[ 329.97656, 3036.419 ],\n",
|
1148 |
-
" [ 371.22363, 3036.419 ],\n",
|
1149 |
-
" [ 371.22363, 3077.666 ],\n",
|
1150 |
-
" [ 329.97656, 3077.666 ]], dtype=float32)),\n",
|
1151 |
-
" ('iphi',\n",
|
1152 |
-
" array([[ 371.22363, 3036.419 ],\n",
|
1153 |
-
" [ 456.89062, 3036.419 ],\n",
|
1154 |
-
" [ 456.89062, 3080.8389 ],\n",
|
1155 |
-
" [ 371.22363, 3080.8389 ]], dtype=float32)),\n",
|
1156 |
-
" ('comf',\n",
|
1157 |
-
" array([[ 241.13672, 3039.5918 ],\n",
|
1158 |
-
" [ 333.1494 , 3039.5918 ],\n",
|
1159 |
-
" [ 333.1494 , 3080.8389 ],\n",
|
1160 |
-
" [ 241.13672, 3080.8389 ]], dtype=float32)),\n",
|
1161 |
-
" ('l',\n",
|
1162 |
-
" array([[ 456.89062, 3042.7646 ],\n",
|
1163 |
-
" [ 469.58203, 3042.7646 ],\n",
|
1164 |
-
" [ 469.58203, 3071.3203 ],\n",
|
1165 |
-
" [ 456.89062, 3071.3203 ]], dtype=float32)),\n",
|
1166 |
-
" ('pp',\n",
|
1167 |
-
" array([[ 491.792 , 3039.5918],\n",
|
1168 |
-
" [ 542.5576, 3039.5918],\n",
|
1169 |
-
" [ 542.5576, 3080.8389],\n",
|
1170 |
-
" [ 491.792 , 3080.8389]], dtype=float32)),\n",
|
1171 |
-
" ('fes',\n",
|
1172 |
-
" array([[ 552.0762 , 3039.5918 ],\n",
|
1173 |
-
" [ 631.39746, 3039.5918 ],\n",
|
1174 |
-
" [ 631.39746, 3077.666 ],\n",
|
1175 |
-
" [ 552.0762 , 3077.666 ]], dtype=float32)),\n",
|
1176 |
-
" ('this',\n",
|
1177 |
-
" array([[ 53.938477, 3137.9502 ],\n",
|
1178 |
-
" [ 142.77832 , 3137.9502 ],\n",
|
1179 |
-
" [ 142.77832 , 3185.543 ],\n",
|
1180 |
-
" [ 53.938477, 3185.543 ]], dtype=float32)),\n",
|
1181 |
-
" ('is',\n",
|
1182 |
-
" array([[ 158.64258, 3137.9502 ],\n",
|
1183 |
-
" [ 206.23535, 3137.9502 ],\n",
|
1184 |
-
" [ 206.23535, 3185.543 ],\n",
|
1185 |
-
" [ 158.64258, 3185.543 ]], dtype=float32)),\n",
|
1186 |
-
" ('official',\n",
|
1187 |
-
" array([[ 282.3838 , 3137.9502 ],\n",
|
1188 |
-
" [ 456.89062, 3137.9502 ],\n",
|
1189 |
-
" [ 456.89062, 3185.543 ],\n",
|
1190 |
-
" [ 282.3838 , 3185.543 ]], dtype=float32)),\n",
|
1191 |
-
" ('receift',\n",
|
1192 |
-
" array([[ 472.75488, 3137.9502 ],\n",
|
1193 |
-
" [ 628.2246 , 3137.9502 ],\n",
|
1194 |
-
" [ 628.2246 , 3185.543 ],\n",
|
1195 |
-
" [ 472.75488, 3185.543 ]], dtype=float32)),\n",
|
1196 |
-
" ('in',\n",
|
1197 |
-
" array([[ 222.09961, 3141.123 ],\n",
|
1198 |
-
" [ 269.69238, 3141.123 ],\n",
|
1199 |
-
" [ 269.69238, 3182.37 ],\n",
|
1200 |
-
" [ 222.09961, 3182.37 ]], dtype=float32))]]"
|
1201 |
-
]
|
1202 |
-
},
|
1203 |
-
"execution_count": 4,
|
1204 |
-
"metadata": {},
|
1205 |
-
"output_type": "execute_result"
|
1206 |
-
}
|
1207 |
-
],
|
1208 |
-
"source": [
|
1209 |
-
"pipeline.recognize([r\"temp\\20230508_122035_preprocessed.png\"])"
|
1210 |
-
]
|
1211 |
-
},
|
1212 |
-
{
|
1213 |
-
"cell_type": "code",
|
1214 |
-
"execution_count": 5,
|
1215 |
-
"metadata": {},
|
1216 |
-
"outputs": [
|
1217 |
-
{
|
1218 |
-
"name": "stdout",
|
1219 |
-
"output_type": "stream",
|
1220 |
-
"text": [
|
1221 |
-
"Requirement already satisfied: requests in c:\\users\\ayoo\\anaconda3\\envs\\mlenv\\lib\\site-packages (2.31.0)\n",
|
1222 |
-
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\ayoo\\anaconda3\\envs\\mlenv\\lib\\site-packages (from requests) (3.2.0)\n",
|
1223 |
-
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\ayoo\\anaconda3\\envs\\mlenv\\lib\\site-packages (from requests) (3.4)\n",
|
1224 |
-
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\ayoo\\anaconda3\\envs\\mlenv\\lib\\site-packages (from requests) (1.26.16)\n",
|
1225 |
-
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\ayoo\\anaconda3\\envs\\mlenv\\lib\\site-packages (from requests) (2023.7.22)\n"
|
1226 |
-
]
|
1227 |
-
}
|
1228 |
-
],
|
1229 |
-
"source": [
|
1230 |
-
"!pip install requests"
|
1231 |
-
]
|
1232 |
-
},
|
1233 |
-
{
|
1234 |
-
"cell_type": "code",
|
1235 |
-
"execution_count": 16,
|
1236 |
-
"metadata": {},
|
1237 |
-
"outputs": [
|
1238 |
-
{
|
1239 |
-
"name": "stdout",
|
1240 |
-
"output_type": "stream",
|
1241 |
-
"text": [
|
1242 |
-
"{'ParsedResults': [{'TextOverlay': {'Lines': [{'LineText': '7-ELEVEN.', 'Words': [{'WordText': '7', 'Left': 205.0, 'Top': 38.0, 'Height': 84.0, 'Width': 398.0}, {'WordText': '-', 'Left': 205.0, 'Top': 38.0, 'Height': 84.0, 'Width': 398.0}, {'WordText': 'ELEVEN', 'Left': 205.0, 'Top': 38.0, 'Height': 84.0, 'Width': 398.0}, {'WordText': '.', 'Left': 205.0, 'Top': 38.0, 'Height': 84.0, 'Width': 398.0}], 'MaxHeight': 84.0, 'MinTop': 38.0}, {'LineText': 'NHJ Convenience Store', 'Words': [{'WordText': 'NHJ', 'Left': 117.0, 'Top': 215.0, 'Height': 36.0, 'Width': 76.0}, {'WordText': 'Convenience', 'Left': 198.0, 'Top': 215.0, 'Height': 36.0, 'Width': 247.0}, {'WordText': 'Store', 'Left': 450.0, 'Top': 215.0, 'Height': 36.0, 'Width': 114.0}], 'MaxHeight': 36.0, 'MinTop': 215.0}, {'LineText': 'Owned & Operated by: Nancy A.', 'Words': [{'WordText': 'Owned', 'Left': 33.0, 'Top': 260.0, 'Height': 52.0, 'Width': 117.0}, {'WordText': '&', 'Left': 156.0, 'Top': 261.0, 'Height': 52.0, 'Width': 32.0}, {'WordText': 'Operated', 'Left': 195.0, 'Top': 261.0, 'Height': 52.0, 'Width': 182.0}, {'WordText': 'by', 'Left': 384.0, 'Top': 261.0, 'Height': 52.0, 'Width': 71.0}, {'WordText': ':', 'Left': 384.0, 'Top': 261.0, 'Height': 52.0, 'Width': 71.0}, {'WordText': 'Nancy', 'Left': 462.0, 'Top': 261.0, 'Height': 52.0, 'Width': 130.0}, {'WordText': 'A', 'Left': 598.0, 'Top': 260.0, 'Height': 52.0, 'Width': 47.0}, {'WordText': '.', 'Left': 598.0, 'Top': 260.0, 'Height': 52.0, 'Width': 47.0}], 'MaxHeight': 52.0, 'MinTop': 260.0}, {'LineText': 'Climacosa', 'Words': [{'WordText': 'Climacosa', 'Left': 244.0, 'Top': 315.0, 'Height': 38.0, 'Width': 193.0}], 'MaxHeight': 38.0, 'MinTop': 315.0}, {'LineText': 'VATREGTIN #933-598-685-002', 'Words': [{'WordText': 'VATREGTIN', 'Left': 75.0, 'Top': 361.0, 'Height': 43.0, 'Width': 204.0}, {'WordText': '#', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}, {'WordText': '933', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}, {'WordText': '-', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}, {'WordText': '598', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}, {'WordText': '-', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}, {'WordText': '685', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}, {'WordText': '-', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}, {'WordText': '002', 'Left': 285.0, 'Top': 361.0, 'Height': 43.0, 'Width': 347.0}], 'MaxHeight': 43.0, 'MinTop': 361.0}, {'LineText': 'Poblacion, Leon, Iloilo,', 'Words': [{'WordText': 'Poblacion', 'Left': 94.0, 'Top': 417.0, 'Height': 49.0, 'Width': 220.0}, {'WordText': ',', 'Left': 94.0, 'Top': 417.0, 'Height': 49.0, 'Width': 220.0}, {'WordText': 'Leon', 'Left': 321.0, 'Top': 417.0, 'Height': 49.0, 'Width': 122.0}, {'WordText': ',', 'Left': 321.0, 'Top': 417.0, 'Height': 49.0, 'Width': 122.0}, {'WordText': 'Iloilo', 'Left': 449.0, 'Top': 417.0, 'Height': 49.0, 'Width': 154.0}, {'WordText': ',', 'Left': 449.0, 'Top': 417.0, 'Height': 49.0, 'Width': 154.0}], 'MaxHeight': 49.0, 'MinTop': 417.0}, {'LineText': 'Philippines', 'Words': [{'WordText': 'Philippines', 'Left': 225.0, 'Top': 468.0, 'Height': 44.0, 'Width': 238.0}], 'MaxHeight': 44.0, 'MinTop': 468.0}, {'LineText': 'lel #: NULL', 'Words': [{'WordText': 'lel', 'Left': 221.0, 'Top': 524.0, 'Height': 40.0, 'Width': 78.0}, {'WordText': '#:', 'Left': 304.0, 'Top': 524.0, 'Height': 39.0, 'Width': 59.0}, {'WordText': 'NULL', 'Left': 368.0, 'Top': 523.0, 'Height': 40.0, 'Width': 89.0}], 'MaxHeight': 41.0, 'MinTop': 523.0}, {'LineText': '05/01/2023 (Mon) 23:00:57', 'Words': [{'WordText': '05', 'Left': 98.0, 'Top': 622.0, 'Height': 42.0, 'Width': 216.0}, {'WordText': '/', 'Left': 98.0, 'Top': 622.0, 'Height': 42.0, 'Width': 215.0}, {'WordText': '01', 'Left': 98.0, 'Top': 622.0, 'Height': 42.0, 'Width': 216.0}, {'WordText': '/', 'Left': 98.0, 'Top': 622.0, 'Height': 42.0, 'Width': 215.0}, {'WordText': '2023', 'Left': 98.0, 'Top': 622.0, 'Height': 42.0, 'Width': 216.0}, {'WordText': '(', 'Left': 319.0, 'Top': 622.0, 'Height': 42.0, 'Width': 105.0}, {'WordText': 'Mon', 'Left': 319.0, 'Top': 622.0, 'Height': 42.0, 'Width': 105.0}, {'WordText': ')', 'Left': 319.0, 'Top': 622.0, 'Height': 42.0, 'Width': 105.0}, {'WordText': '23', 'Left': 429.0, 'Top': 622.0, 'Height': 42.0, 'Width': 181.0}, {'WordText': ':', 'Left': 429.0, 'Top': 622.0, 'Height': 42.0, 'Width': 181.0}, {'WordText': '00', 'Left': 429.0, 'Top': 622.0, 'Height': 42.0, 'Width': 181.0}, {'WordText': ':', 'Left': 429.0, 'Top': 622.0, 'Height': 42.0, 'Width': 181.0}, {'WordText': '57', 'Left': 429.0, 'Top': 622.0, 'Height': 42.0, 'Width': 181.0}], 'MaxHeight': 42.0, 'MinTop': 622.0}, {'LineText': 'RCPT #2481347', 'Words': [{'WordText': 'RCPT', 'Left': 13.0, 'Top': 723.0, 'Height': 42.0, 'Width': 94.0}, {'WordText': '#', 'Left': 113.0, 'Top': 723.0, 'Height': 42.0, 'Width': 184.0}, {'WordText': '2481347', 'Left': 113.0, 'Top': 723.0, 'Height': 42.0, 'Width': 184.0}], 'MaxHeight': 42.0, 'MinTop': 723.0}, {'LineText': 'ROPT CNTHO', 'Words': [{'WordText': 'ROPT', 'Left': 472.0, 'Top': 723.0, 'Height': 49.0, 'Width': 96.0}, {'WordText': 'CNTHO', 'Left': 574.0, 'Top': 722.0, 'Height': 49.0, 'Width': 120.0}], 'MaxHeight': 50.0, 'MinTop': 722.0}, {'LineText': 'STORE#3058', 'Words': [{'WordText': 'STORE', 'Left': 13.0, 'Top': 771.0, 'Height': 47.0, 'Width': 219.0}, {'WordText': '#', 'Left': 13.0, 'Top': 771.0, 'Height': 47.0, 'Width': 219.0}, {'WordText': '3058', 'Left': 13.0, 'Top': 771.0, 'Height': 47.0, 'Width': 219.0}], 'MaxHeight': 47.0, 'MinTop': 771.0}, {'LineText': 'SN# :XTI43170', 'Words': [{'WordText': 'SN', 'Left': 433.0, 'Top': 771.0, 'Height': 49.0, 'Width': 66.0}, {'WordText': '#', 'Left': 433.0, 'Top': 771.0, 'Height': 49.0, 'Width': 66.0}, {'WordText': ':', 'Left': 505.0, 'Top': 771.0, 'Height': 50.0, 'Width': 189.0}, {'WordText': 'XTI43170', 'Left': 505.0, 'Top': 771.0, 'Height': 50.0, 'Width': 189.0}], 'MaxHeight': 50.0, 'MinTop': 771.0}, {'LineText': 'MIN #: 18112011091411051', 'Words': [{'WordText': 'MIN', 'Left': 13.0, 'Top': 830.0, 'Height': 39.0, 'Width': 73.0}, {'WordText': '#:', 'Left': 91.0, 'Top': 830.0, 'Height': 39.0, 'Width': 58.0}, {'WordText': '18112011091411051', 'Left': 154.0, 'Top': 830.0, 'Height': 39.0, 'Width': 360.0}], 'MaxHeight': 39.0, 'MinTop': 830.0}, {'LineText': 'STAFF: Angelica Duante', 'Words': [{'WordText': 'STAFF', 'Left': 13.0, 'Top': 879.0, 'Height': 43.0, 'Width': 124.0}, {'WordText': ':', 'Left': 13.0, 'Top': 879.0, 'Height': 43.0, 'Width': 124.0}, {'WordText': 'Angelica', 'Left': 142.0, 'Top': 879.0, 'Height': 43.0, 'Width': 177.0}, {'WordText': 'Duante', 'Left': 325.0, 'Top': 879.0, 'Height': 43.0, 'Width': 138.0}], 'MaxHeight': 43.0, 'MinTop': 879.0}, {'LineText': '7FKoreanßun', 'Words': [{'WordText': '7FKoreanßun', 'Left': 16.0, 'Top': 979.0, 'Height': 45.0, 'Width': 235.0}], 'MaxHeight': 45.0, 'MinTop': 979.0}, {'LineText': 'NissinYaSaBeet77g', 'Words': [{'WordText': 'NissinYaSaBeet77g', 'Left': 13.0, 'Top': 1032.0, 'Height': 42.0, 'Width': 365.0}], 'MaxHeight': 42.0, 'MinTop': 1032.0}, {'LineText': 'BBHOTDOGCREMYCHEES', 'Words': [{'WordText': 'BBHOTDOGCREMYCHEES', 'Left': 13.0, 'Top': 1084.0, 'Height': 39.0, 'Width': 384.0}], 'MaxHeight': 39.0, 'MinTop': 1084.0}, {'LineText': '39.00 Х 6', 'Words': [{'WordText': '39.00', 'Left': 140.0, 'Top': 1136.0, 'Height': 43.0, 'Width': 116.0}, {'WordText': 'Х', 'Left': 261.0, 'Top': 1136.0, 'Height': 43.0, 'Width': 100.0}, {'WordText': '6', 'Left': 366.0, 'Top': 1135.0, 'Height': 42.0, 'Width': 29.0}], 'MaxHeight': 44.0, 'MinTop': 1135.0}, {'LineText': 'chocvron? in1Ch020g', 'Words': [{'WordText': 'chocvron', 'Left': 13.0, 'Top': 1185.0, 'Height': 43.0, 'Width': 193.0}, {'WordText': '?', 'Left': 13.0, 'Top': 1185.0, 'Height': 43.0, 'Width': 193.0}, {'WordText': 'in1Ch020g', 'Left': 212.0, 'Top': 1185.0, 'Height': 43.0, 'Width': 186.0}], 'MaxHeight': 43.0, 'MinTop': 1185.0}, {'LineText': '15.00 X', 'Words': [{'WordText': '15.00', 'Left': 140.0, 'Top': 1240.0, 'Height': 43.0, 'Width': 116.0}, {'WordText': 'X', 'Left': 261.0, 'Top': 1240.0, 'Height': 42.0, 'Width': 42.0}], 'MaxHeight': 43.0, 'MinTop': 1240.0}, {'LineText': '2', 'Words': [{'WordText': '2', 'Left': 355.0, 'Top': 1240.0, 'Height': 39.0, 'Width': 39.0}], 'MaxHeight': 39.0, 'MinTop': 1240.0}, {'LineText': '55.004', 'Words': [{'WordText': '55.004', 'Left': 557.0, 'Top': 979.0, 'Height': 47.0, 'Width': 137.0}], 'MaxHeight': 47.0, 'MinTop': 979.0}, {'LineText': '40.000', 'Words': [{'WordText': '40.000', 'Left': 560.0, 'Top': 1031.0, 'Height': 48.0, 'Width': 134.0}], 'MaxHeight': 48.0, 'MinTop': 1031.0}, {'LineText': '234.000', 'Words': [{'WordText': '234.000', 'Left': 534.0, 'Top': 1135.0, 'Height': 47.0, 'Width': 160.0}], 'MaxHeight': 47.0, 'MinTop': 1135.0}, {'LineText': '30.000', 'Words': [{'WordText': '30.000', 'Left': 557.0, 'Top': 1237.0, 'Height': 46.0, 'Width': 137.0}], 'MaxHeight': 46.0, 'MinTop': 1237.0}, {'LineText': 'Total (10)', 'Words': [{'WordText': 'Total', 'Left': 13.0, 'Top': 1340.0, 'Height': 44.0, 'Width': 121.0}, {'WordText': '(', 'Left': 139.0, 'Top': 1342.0, 'Height': 44.0, 'Width': 86.0}, {'WordText': '10', 'Left': 139.0, 'Top': 1342.0, 'Height': 44.0, 'Width': 86.0}, {'WordText': ')', 'Left': 139.0, 'Top': 1342.0, 'Height': 44.0, 'Width': 86.0}], 'MaxHeight': 46.0, 'MinTop': 1340.0}, {'LineText': 'CASH', 'Words': [{'WordText': 'CASH', 'Left': 55.0, 'Top': 1390.0, 'Height': 43.0, 'Width': 91.0}], 'MaxHeight': 43.0, 'MinTop': 1390.0}, {'LineText': 'CHANGE', 'Words': [{'WordText': 'CHANGE', 'Left': 52.0, 'Top': 1442.0, 'Height': 43.0, 'Width': 137.0}], 'MaxHeight': 43.0, 'MinTop': 1442.0}, {'LineText': '359.00', 'Words': [{'WordText': '359.00', 'Left': 557.0, 'Top': 1341.0, 'Height': 47.0, 'Width': 137.0}], 'MaxHeight': 47.0, 'MinTop': 1341.0}, {'LineText': '1000.00', 'Words': [{'WordText': '1000.00', 'Left': 537.0, 'Top': 1389.0, 'Height': 48.0, 'Width': 154.0}], 'MaxHeight': 48.0, 'MinTop': 1389.0}, {'LineText': '641.00', 'Words': [{'WordText': '641.00', 'Left': 557.0, 'Top': 1442.0, 'Height': 46.0, 'Width': 134.0}], 'MaxHeight': 46.0, 'MinTop': 1442.0}, {'LineText': 'VATable', 'Words': [{'WordText': 'VATable', 'Left': 52.0, 'Top': 1546.0, 'Height': 40.0, 'Width': 157.0}], 'MaxHeight': 40.0, 'MinTop': 1546.0}, {'LineText': 'VAT_Tax', 'Words': [{'WordText': 'VAT_Tax', 'Left': 52.0, 'Top': 1598.0, 'Height': 50.0, 'Width': 157.0}], 'MaxHeight': 50.0, 'MinTop': 1598.0}, {'LineText': 'Zero_Rated', 'Words': [{'WordText': 'Zero_Rated', 'Left': 52.0, 'Top': 1649.0, 'Height': 48.0, 'Width': 219.0}], 'MaxHeight': 48.0, 'MinTop': 1649.0}, {'LineText': 'VAT_Exempted', 'Words': [{'WordText': 'VAT_Exempted', 'Left': 52.0, 'Top': 1699.0, 'Height': 50.0, 'Width': 264.0}], 'MaxHeight': 50.0, 'MinTop': 1699.0}, {'LineText': '320.54', 'Words': [{'WordText': '320.54', 'Left': 557.0, 'Top': 1546.0, 'Height': 46.0, 'Width': 134.0}], 'MaxHeight': 46.0, 'MinTop': 1546.0}, {'LineText': '38.46', 'Words': [{'WordText': '38.46', 'Left': 577.0, 'Top': 1598.0, 'Height': 43.0, 'Width': 114.0}], 'MaxHeight': 43.0, 'MinTop': 1598.0}, {'LineText': '0.00', 'Words': [{'WordText': '0.00', 'Left': 600.0, 'Top': 1651.0, 'Height': 42.0, 'Width': 91.0}], 'MaxHeight': 42.0, 'MinTop': 1651.0}, {'LineText': '0.00', 'Words': [{'WordText': '0.00', 'Left': 599.0, 'Top': 1702.0, 'Height': 43.0, 'Width': 95.0}], 'MaxHeight': 43.0, 'MinTop': 1702.0}, {'LineText': 'Sold To: 9906087698684', 'Words': [{'WordText': 'Sold', 'Left': 13.0, 'Top': 1803.0, 'Height': 42.0, 'Width': 94.0}, {'WordText': 'To', 'Left': 113.0, 'Top': 1803.0, 'Height': 42.0, 'Width': 79.0}, {'WordText': ':', 'Left': 113.0, 'Top': 1803.0, 'Height': 42.0, 'Width': 79.0}, {'WordText': '9906087698684', 'Left': 197.0, 'Top': 1803.0, 'Height': 42.0, 'Width': 285.0}], 'MaxHeight': 42.0, 'MinTop': 1803.0}, {'LineText': 'Name:', 'Words': [{'WordText': 'Name', 'Left': 10.0, 'Top': 1856.0, 'Height': 39.0, 'Width': 111.0}, {'WordText': ':', 'Left': 10.0, 'Top': 1856.0, 'Height': 39.0, 'Width': 111.0}], 'MaxHeight': 39.0, 'MinTop': 1856.0}, {'LineText': 'Address:', 'Words': [{'WordText': 'Address', 'Left': 13.0, 'Top': 1907.0, 'Height': 40.0, 'Width': 170.0}, {'WordText': ':', 'Left': 13.0, 'Top': 1907.0, 'Height': 40.0, 'Width': 170.0}], 'MaxHeight': 40.0, 'MinTop': 1907.0}, {'LineText': 'TIN:', 'Words': [{'WordText': 'TIN', 'Left': 13.0, 'Top': 1957.0, 'Height': 39.0, 'Width': 85.0}, {'WordText': ':', 'Left': 13.0, 'Top': 1957.0, 'Height': 39.0, 'Width': 85.0}], 'MaxHeight': 39.0, 'MinTop': 1957.0}, {'LineText': 'Philippine Seven Corporation', 'Words': [{'WordText': 'Philippine', 'Left': 10.0, 'Top': 2060.0, 'Height': 43.0, 'Width': 226.0}, {'WordText': 'Seven', 'Left': 241.0, 'Top': 2060.0, 'Height': 43.0, 'Width': 118.0}, {'WordText': 'Corporation', 'Left': 365.0, 'Top': 2060.0, 'Height': 43.0, 'Width': 241.0}], 'MaxHeight': 43.0, 'MinTop': 2060.0}, {'LineText': '7th Floor The Columbia Tower', 'Words': [{'WordText': '7th', 'Left': 13.0, 'Top': 2116.0, 'Height': 36.0, 'Width': 72.0}, {'WordText': 'Floor', 'Left': 90.0, 'Top': 2116.0, 'Height': 36.0, 'Width': 126.0}, {'WordText': 'The', 'Left': 220.0, 'Top': 2116.0, 'Height': 36.0, 'Width': 76.0}, {'WordText': 'Columbia', 'Left': 301.0, 'Top': 2116.0, 'Height': 36.0, 'Width': 189.0}, {'WordText': 'Tower', 'Left': 495.0, 'Top': 2116.0, 'Height': 36.0, 'Width': 108.0}], 'MaxHeight': 36.0, 'MinTop': 2116.0}, {'LineText': 'Ortigas Avenue, Mandaluyong', 'Words': [{'WordText': 'Ortigas', 'Left': 33.0, 'Top': 2161.0, 'Height': 49.0, 'Width': 154.0}, {'WordText': 'Avenue', 'Left': 192.0, 'Top': 2161.0, 'Height': 49.0, 'Width': 159.0}, {'WordText': ',', 'Left': 192.0, 'Top': 2161.0, 'Height': 49.0, 'Width': 159.0}, {'WordText': 'Mandaluyong', 'Left': 358.0, 'Top': 2161.0, 'Height': 49.0, 'Width': 248.0}], 'MaxHeight': 49.0, 'MinTop': 2161.0}, {'LineText': 'City', 'Words': [{'WordText': 'City', 'Left': 29.0, 'Top': 2214.0, 'Height': 42.0, 'Width': 94.0}], 'MaxHeight': 42.0, 'MinTop': 2214.0}, {'LineText': 'TIN: 000-390-189-000', 'Words': [{'WordText': 'TIN', 'Left': 13.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 90.0}, {'WordText': ':', 'Left': 13.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 90.0}, {'WordText': '000', 'Left': 109.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 331.0}, {'WordText': '-', 'Left': 109.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 331.0}, {'WordText': '390', 'Left': 109.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 331.0}, {'WordText': '-', 'Left': 109.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 331.0}, {'WordText': '189', 'Left': 109.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 331.0}, {'WordText': '-', 'Left': 109.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 331.0}, {'WordText': '000', 'Left': 109.0, 'Top': 2266.0, 'Height': 46.0, 'Width': 331.0}], 'MaxHeight': 46.0, 'MinTop': 2266.0}, {'LineText': 'BIR ACCI #', 'Words': [{'WordText': 'BIR', 'Left': 10.0, 'Top': 2318.0, 'Height': 39.0, 'Width': 78.0}, {'WordText': 'ACCI', 'Left': 93.0, 'Top': 2318.0, 'Height': 39.0, 'Width': 98.0}, {'WordText': '#', 'Left': 195.0, 'Top': 2318.0, 'Height': 39.0, 'Width': 30.0}], 'MaxHeight': 40.0, 'MinTop': 2318.0}, {'LineText': '116-000390189-000346 19602', 'Words': [{'WordText': '116', 'Left': 33.0, 'Top': 2366.0, 'Height': 43.0, 'Width': 430.0}, {'WordText': '-', 'Left': 33.0, 'Top': 2366.0, 'Height': 43.0, 'Width': 430.0}, {'WordText': '000390189', 'Left': 33.0, 'Top': 2366.0, 'Height': 43.0, 'Width': 430.0}, {'WordText': '-', 'Left': 33.0, 'Top': 2366.0, 'Height': 43.0, 'Width': 430.0}, {'WordText': '000346', 'Left': 33.0, 'Top': 2366.0, 'Height': 43.0, 'Width': 430.0}, {'WordText': '19602', 'Left': 468.0, 'Top': 2366.0, 'Height': 43.0, 'Width': 118.0}], 'MaxHeight': 43.0, 'MinTop': 2366.0}, {'LineText': 'AcciDate: 08/01/2020', 'Words': [{'WordText': 'AcciDate', 'Left': 13.0, 'Top': 2419.0, 'Height': 42.0, 'Width': 194.0}, {'WordText': ':', 'Left': 13.0, 'Top': 2419.0, 'Height': 42.0, 'Width': 194.0}, {'WordText': '08', 'Left': 212.0, 'Top': 2419.0, 'Height': 42.0, 'Width': 266.0}, {'WordText': '/', 'Left': 213.0, 'Top': 2419.0, 'Height': 42.0, 'Width': 266.0}, {'WordText': '01', 'Left': 212.0, 'Top': 2419.0, 'Height': 42.0, 'Width': 266.0}, {'WordText': '/', 'Left': 213.0, 'Top': 2419.0, 'Height': 42.0, 'Width': 266.0}, {'WordText': '2020', 'Left': 212.0, 'Top': 2419.0, 'Height': 42.0, 'Width': 266.0}], 'MaxHeight': 42.0, 'MinTop': 2419.0}, {'LineText': '07/31/2025', 'Words': [{'WordText': '07', 'Left': 29.0, 'Top': 2470.0, 'Height': 44.0, 'Width': 219.0}, {'WordText': '/', 'Left': 29.0, 'Top': 2470.0, 'Height': 44.0, 'Width': 219.0}, {'WordText': '31', 'Left': 29.0, 'Top': 2470.0, 'Height': 44.0, 'Width': 219.0}, {'WordText': '/', 'Left': 29.0, 'Top': 2470.0, 'Height': 44.0, 'Width': 219.0}, {'WordText': '2025', 'Left': 29.0, 'Top': 2470.0, 'Height': 44.0, 'Width': 219.0}], 'MaxHeight': 44.0, 'MinTop': 2470.0}, {'LineText': 'Permit #:', 'Words': [{'WordText': 'Permit', 'Left': 10.0, 'Top': 2526.0, 'Height': 40.0, 'Width': 142.0}, {'WordText': '#:', 'Left': 156.0, 'Top': 2527.0, 'Height': 39.0, 'Width': 46.0}], 'MaxHeight': 40.0, 'MinTop': 2526.0}, {'LineText': 'FP112018-074-0194656-00002', 'Words': [{'WordText': 'FP112018', 'Left': 33.0, 'Top': 2572.0, 'Height': 39.0, 'Width': 554.0}, {'WordText': '-', 'Left': 33.0, 'Top': 2572.0, 'Height': 39.0, 'Width': 554.0}, {'WordText': '074', 'Left': 33.0, 'Top': 2572.0, 'Height': 39.0, 'Width': 554.0}, {'WordText': '-', 'Left': 33.0, 'Top': 2572.0, 'Height': 39.0, 'Width': 554.0}, {'WordText': '0194656', 'Left': 33.0, 'Top': 2572.0, 'Height': 39.0, 'Width': 554.0}, {'WordText': '-', 'Left': 33.0, 'Top': 2572.0, 'Height': 39.0, 'Width': 554.0}, {'WordText': '00002', 'Left': 33.0, 'Top': 2572.0, 'Height': 39.0, 'Width': 554.0}], 'MaxHeight': 39.0, 'MinTop': 2572.0}, {'LineText': 'Get a chance to win a trip for', 'Words': [{'WordText': 'Get', 'Left': 29.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 78.0}, {'WordText': 'a', 'Left': 112.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 34.0}, {'WordText': 'chance', 'Left': 151.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 146.0}, {'WordText': 'to', 'Left': 302.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 54.0}, {'WordText': 'win', 'Left': 361.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 83.0}, {'WordText': 'a', 'Left': 448.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 34.0}, {'WordText': 'trip', 'Left': 487.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 107.0}, {'WordText': 'for', 'Left': 599.0, 'Top': 2679.0, 'Height': 39.0, 'Width': 69.0}], 'MaxHeight': 39.0, 'MinTop': 2679.0}, {'LineText': '2 to Korea when you buy PISO', 'Words': [{'WordText': '2', 'Left': 52.0, 'Top': 2731.0, 'Height': 39.0, 'Width': 34.0}, {'WordText': 'to', 'Left': 91.0, 'Top': 2731.0, 'Height': 39.0, 'Width': 58.0}, {'WordText': 'Korea', 'Left': 154.0, 'Top': 2731.0, 'Height': 39.0, 'Width': 122.0}, {'WordText': 'when', 'Left': 281.0, 'Top': 2731.0, 'Height': 39.0, 'Width': 102.0}, {'WordText': 'you', 'Left': 388.0, 'Top': 2731.0, 'Height': 39.0, 'Width': 78.0}, {'WordText': 'buy', 'Left': 471.0, 'Top': 2731.0, 'Height': 39.0, 'Width': 78.0}, {'WordText': 'PISO', 'Left': 554.0, 'Top': 2731.0, 'Height': 39.0, 'Width': 91.0}], 'MaxHeight': 39.0, 'MinTop': 2731.0}, {'LineText': 'worth of 7-Eleven items. Earn', 'Words': [{'WordText': 'worth', 'Left': 29.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 116.0}, {'WordText': 'of', 'Left': 152.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 61.0}, {'WordText': '7', 'Left': 219.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 184.0}, {'WordText': '-', 'Left': 219.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 184.0}, {'WordText': 'Eleven', 'Left': 219.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 184.0}, {'WordText': 'items', 'Left': 409.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 135.0}, {'WordText': '.', 'Left': 409.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 135.0}, {'WordText': 'Earn', 'Left': 550.0, 'Top': 2783.0, 'Height': 49.0, 'Width': 99.0}], 'MaxHeight': 49.0, 'MinTop': 2783.0}, {'LineText': '3 eRaffle entries when you buy', 'Words': [{'WordText': '3', 'Left': 29.0, 'Top': 2832.0, 'Height': 39.0, 'Width': 34.0}, {'WordText': 'eRaffle', 'Left': 68.0, 'Top': 2832.0, 'Height': 39.0, 'Width': 166.0}, {'WordText': 'entries', 'Left': 239.0, 'Top': 2832.0, 'Height': 39.0, 'Width': 161.0}, {'WordText': 'when', 'Left': 404.0, 'Top': 2832.0, 'Height': 39.0, 'Width': 102.0}, {'WordText': 'you', 'Left': 512.0, 'Top': 2832.0, 'Height': 39.0, 'Width': 78.0}, {'WordText': 'buy', 'Left': 594.0, 'Top': 2832.0, 'Height': 39.0, 'Width': 70.0}], 'MaxHeight': 39.0, 'MinTop': 2832.0}, {'LineText': 'discounted booster Items. Per', 'Words': [{'WordText': 'discounted', 'Left': 33.0, 'Top': 2888.0, 'Height': 42.0, 'Width': 221.0}, {'WordText': 'booster', 'Left': 259.0, 'Top': 2888.0, 'Height': 42.0, 'Width': 168.0}, {'WordText': 'Items', 'Left': 432.0, 'Top': 2888.0, 'Height': 42.0, 'Width': 137.0}, {'WordText': '.', 'Left': 432.0, 'Top': 2888.0, 'Height': 42.0, 'Width': 137.0}, {'WordText': 'Per', 'Left': 574.0, 'Top': 2888.0, 'Height': 42.0, 'Width': 68.0}], 'MaxHeight': 42.0, 'MinTop': 2888.0}, {'LineText': 'DTI FAIR TRADE Permit Number:', 'Words': [{'WordText': 'DTI', 'Left': 29.0, 'Top': 2933.0, 'Height': 42.0, 'Width': 78.0}, {'WordText': 'FAIR', 'Left': 113.0, 'Top': 2933.0, 'Height': 42.0, 'Width': 100.0}, {'WordText': 'TRADE', 'Left': 218.0, 'Top': 2933.0, 'Height': 42.0, 'Width': 121.0}, {'WordText': 'Permit', 'Left': 344.0, 'Top': 2933.0, 'Height': 42.0, 'Width': 142.0}, {'WordText': 'Number', 'Left': 491.0, 'Top': 2933.0, 'Height': 42.0, 'Width': 151.0}, {'WordText': ':', 'Left': 491.0, 'Top': 2933.0, 'Height': 42.0, 'Width': 151.0}], 'MaxHeight': 42.0, 'MinTop': 2933.0}, {'LineText': '163019 Series of 2023..', 'Words': [{'WordText': '163019', 'Left': 117.0, 'Top': 2988.0, 'Height': 40.0, 'Width': 135.0}, {'WordText': 'Series', 'Left': 257.0, 'Top': 2988.0, 'Height': 40.0, 'Width': 145.0}, {'WordText': 'of', 'Left': 407.0, 'Top': 2988.0, 'Height': 40.0, 'Width': 55.0}, {'WordText': '2023', 'Left': 467.0, 'Top': 2988.0, 'Height': 40.0, 'Width': 165.0}, {'WordText': '..', 'Left': 467.0, 'Top': 2988.0, 'Height': 40.0, 'Width': 165.0}], 'MaxHeight': 40.0, 'MinTop': 2988.0}, {'LineText': 'facebook.com/711philippines.', 'Words': [{'WordText': 'facebook.com', 'Left': 52.0, 'Top': 3037.0, 'Height': 42.0, 'Width': 590.0}, {'WordText': '/', 'Left': 52.0, 'Top': 3037.0, 'Height': 42.0, 'Width': 590.0}, {'WordText': '711philippines', 'Left': 52.0, 'Top': 3037.0, 'Height': 42.0, 'Width': 590.0}, {'WordText': '.', 'Left': 52.0, 'Top': 3037.0, 'Height': 42.0, 'Width': 590.0}], 'MaxHeight': 42.0, 'MinTop': 3037.0}, {'LineText': '- THIS IS AN OFFICIAL RECEIPT -', 'Words': [{'WordText': '-', 'Left': 0.0, 'Top': 3138.0, 'Height': 46.0, 'Width': 46.0}, {'WordText': 'THIS', 'Left': 52.0, 'Top': 3138.0, 'Height': 46.0, 'Width': 98.0}, {'WordText': 'IS', 'Left': 155.0, 'Top': 3138.0, 'Height': 46.0, 'Width': 57.0}, {'WordText': 'AN', 'Left': 219.0, 'Top': 3138.0, 'Height': 46.0, 'Width': 52.0}, {'WordText': 'OFFICIAL', 'Left': 276.0, 'Top': 3138.0, 'Height': 46.0, 'Width': 184.0}, {'WordText': 'RECEIPT', 'Left': 466.0, 'Top': 3138.0, 'Height': 46.0, 'Width': 167.0}, {'WordText': '-', 'Left': 638.0, 'Top': 3138.0, 'Height': 46.0, 'Width': 30.0}], 'MaxHeight': 46.0, 'MinTop': 3138.0}], 'HasOverlay': True}, 'TextOrientation': '0', 'FileParseExitCode': 1, 'ParsedText': '7-ELEVEN.\\nNHJ Convenience Store\\nOwned & Operated by: Nancy A.\\nClimacosa\\nVATREGTIN #933-598-685-002\\nPoblacion, Leon, Iloilo,\\nPhilippines\\nlel #: NULL\\n05/01/2023 (Mon) 23:00:57\\nRCPT #2481347\\nROPT CNTHO\\nSTORE#3058\\nSN# :XTI43170\\nMIN #: 18112011091411051\\nSTAFF: Angelica Duante\\n7FKoreanßun\\nNissinYaSaBeet77g\\nBBHOTDOGCREMYCHEES\\n39.00 Х 6\\nchocvron? in1Ch020g\\n15.00 X\\n2\\n55.004\\n40.000\\n234.000\\n30.000\\nTotal (10)\\nCASH\\nCHANGE\\n359.00\\n1000.00\\n641.00\\nVATable\\nVAT_Tax\\nZero_Rated\\nVAT_Exempted\\n320.54\\n38.46\\n0.00\\n0.00\\nSold To: 9906087698684\\nName:\\nAddress:\\nTIN:\\nPhilippine Seven Corporation\\n7th Floor The Columbia Tower\\nOrtigas Avenue, Mandaluyong\\nCity\\nTIN: 000-390-189-000\\nBIR ACCI #\\n116-000390189-000346 19602\\nAcciDate: 08/01/2020\\n07/31/2025\\nPermit #:\\nFP112018-074-0194656-00002\\nGet a chance to win a trip for\\n2 to Korea when you buy PISO\\nworth of 7-Eleven items. Earn\\n3 eRaffle entries when you buy\\ndiscounted booster Items. Per\\nDTI FAIR TRADE Permit Number:\\n163019 Series of 2023..\\nfacebook.com/711philippines.\\n- THIS IS AN OFFICIAL RECEIPT -', 'ErrorMessage': '', 'ErrorDetails': ''}], 'OCRExitCode': 1, 'IsErroredOnProcessing': False, 'ProcessingTimeInMilliseconds': '2593', 'SearchablePDFURL': 'Searchable PDF not generated as it was not requested.'}\n"
|
1243 |
-
]
|
1244 |
-
}
|
1245 |
-
],
|
1246 |
-
"source": [
|
1247 |
-
"# Import requests library\n",
|
1248 |
-
"import requests\n",
|
1249 |
-
"\n",
|
1250 |
-
"# Define the OCR API endpoint\n",
|
1251 |
-
"url = \"https://api.ocr.space/parse/image\"\n",
|
1252 |
-
"\n",
|
1253 |
-
"# Define the API key and the language\n",
|
1254 |
-
"api_key = \"K88232854988957\"\n",
|
1255 |
-
"language = \"eng\"\n",
|
1256 |
-
"\n",
|
1257 |
-
"# Define the image file path\n",
|
1258 |
-
"image_file = r\"C:\\Users\\Ayoo\\Desktop\\webapp\\predictions\\imgs\\20230508_122035.jpg\"\n",
|
1259 |
-
"\n",
|
1260 |
-
"# Open the image file as binary\n",
|
1261 |
-
"with open(image_file, \"rb\") as f:\n",
|
1262 |
-
" # Define the payload for the API request\n",
|
1263 |
-
" payload = {\n",
|
1264 |
-
" \"apikey\": api_key,\n",
|
1265 |
-
" \"language\": language,\n",
|
1266 |
-
" \"isOverlayRequired\": True, # Optional, set to True if you want the coordinates of the words\n",
|
1267 |
-
" \"OCREngine\": 2 # OCR Engine 2 for Layoutlmv3\n",
|
1268 |
-
" }\n",
|
1269 |
-
" # Define the file parameter for the API request\n",
|
1270 |
-
" file = {\n",
|
1271 |
-
" \"file\": f\n",
|
1272 |
-
" }\n",
|
1273 |
-
" # Send the POST request to the OCR API\n",
|
1274 |
-
" response = requests.post(url, data=payload, files=file)\n",
|
1275 |
-
"\n",
|
1276 |
-
"# Check the status code of the response\n",
|
1277 |
-
"if response.status_code == 200:\n",
|
1278 |
-
" # Parse the JSON response\n",
|
1279 |
-
" result = response.json()\n",
|
1280 |
-
" # Print the parsed text\n",
|
1281 |
-
" print(result)\n",
|
1282 |
-
"else:\n",
|
1283 |
-
" # Print the error message\n",
|
1284 |
-
" print(\"Error: \" + response.text)\n"
|
1285 |
-
]
|
1286 |
-
},
|
1287 |
-
{
|
1288 |
-
"cell_type": "code",
|
1289 |
-
"execution_count": 13,
|
1290 |
-
"metadata": {},
|
1291 |
-
"outputs": [
|
1292 |
-
{
|
1293 |
-
"ename": "TypeError",
|
1294 |
-
"evalue": "Object of type Response is not JSON serializable",
|
1295 |
-
"output_type": "error",
|
1296 |
-
"traceback": [
|
1297 |
-
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
1298 |
-
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
|
1299 |
-
"Cell \u001b[1;32mIn[13], line 4\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mjson\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;66;03m# Assuming 'response' is the JSON response from the OCR API\u001b[39;00m\n\u001b[1;32m----> 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mjson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdumps\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m4\u001b[39;49m\u001b[43m)\u001b[49m)\n",
|
1300 |
-
"File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\json\\__init__.py:238\u001b[0m, in \u001b[0;36mdumps\u001b[1;34m(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)\u001b[0m\n\u001b[0;32m 232\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 233\u001b[0m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;241m=\u001b[39m JSONEncoder\n\u001b[0;32m 234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[0;32m 235\u001b[0m \u001b[43m \u001b[49m\u001b[43mskipkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mskipkeys\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mensure_ascii\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mensure_ascii\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 236\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_circular\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcheck_circular\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mallow_nan\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mallow_nan\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 237\u001b[0m \u001b[43m \u001b[49m\u001b[43mseparators\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mseparators\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdefault\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdefault\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msort_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m--> 238\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n",
|
1301 |
-
"File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\json\\encoder.py:202\u001b[0m, in \u001b[0;36mJSONEncoder.encode\u001b[1;34m(self, o)\u001b[0m\n\u001b[0;32m 200\u001b[0m chunks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39miterencode(o, _one_shot\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m 201\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(chunks, (\u001b[38;5;28mlist\u001b[39m, \u001b[38;5;28mtuple\u001b[39m)):\n\u001b[1;32m--> 202\u001b[0m chunks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(chunks)\n\u001b[0;32m 203\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(chunks)\n",
|
1302 |
-
"File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\json\\encoder.py:439\u001b[0m, in \u001b[0;36m_make_iterencode.<locals>._iterencode\u001b[1;34m(o, _current_indent_level)\u001b[0m\n\u001b[0;32m 437\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCircular reference detected\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 438\u001b[0m markers[markerid] \u001b[38;5;241m=\u001b[39m o\n\u001b[1;32m--> 439\u001b[0m o \u001b[38;5;241m=\u001b[39m \u001b[43m_default\u001b[49m\u001b[43m(\u001b[49m\u001b[43mo\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 440\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m _iterencode(o, _current_indent_level)\n\u001b[0;32m 441\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m markers \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
|
1303 |
-
"File \u001b[1;32mc:\\Users\\Ayoo\\anaconda3\\envs\\mlenv\\Lib\\json\\encoder.py:180\u001b[0m, in \u001b[0;36mJSONEncoder.default\u001b[1;34m(self, o)\u001b[0m\n\u001b[0;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdefault\u001b[39m(\u001b[38;5;28mself\u001b[39m, o):\n\u001b[0;32m 162\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Implement this method in a subclass such that it returns\u001b[39;00m\n\u001b[0;32m 163\u001b[0m \u001b[38;5;124;03m a serializable object for ``o``, or calls the base implementation\u001b[39;00m\n\u001b[0;32m 164\u001b[0m \u001b[38;5;124;03m (to raise a ``TypeError``).\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 178\u001b[0m \n\u001b[0;32m 179\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 180\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mObject of type \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mo\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 181\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mis not JSON serializable\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
|
1304 |
-
"\u001b[1;31mTypeError\u001b[0m: Object of type Response is not JSON serializable"
|
1305 |
-
]
|
1306 |
-
}
|
1307 |
-
],
|
1308 |
-
"source": [
|
1309 |
-
"import json\n",
|
1310 |
-
"\n",
|
1311 |
-
"# Assuming 'response' is the JSON response from the OCR API\n",
|
1312 |
-
"print(json.dumps(response, indent=4))\n"
|
1313 |
-
]
|
1314 |
-
}
|
1315 |
-
],
|
1316 |
-
"metadata": {
|
1317 |
-
"kernelspec": {
|
1318 |
-
"display_name": "mlenv",
|
1319 |
-
"language": "python",
|
1320 |
-
"name": "python3"
|
1321 |
-
},
|
1322 |
-
"language_info": {
|
1323 |
-
"codemirror_mode": {
|
1324 |
-
"name": "ipython",
|
1325 |
-
"version": 3
|
1326 |
-
},
|
1327 |
-
"file_extension": ".py",
|
1328 |
-
"mimetype": "text/x-python",
|
1329 |
-
"name": "python",
|
1330 |
-
"nbconvert_exporter": "python",
|
1331 |
-
"pygments_lexer": "ipython3",
|
1332 |
-
"version": "3.11.5"
|
1333 |
-
}
|
1334 |
-
},
|
1335 |
-
"nbformat": 4,
|
1336 |
-
"nbformat_minor": 2
|
1337 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inferenced/csv_files/Output_0.csv
DELETED
@@ -1,4 +0,0 @@
|
|
1 |
-
RECEIPTNUMBER,MERCHANTNAME,MERCHANTADDRESS,TRANSACTIONDATE,TRANSACTIONTIME,ITEMS,PRICE,TOTAL,VATTAX
|
2 |
-
# 2480507,7 - ELEVEN �,Poblacion . Leon . Iloilo . Philippines,04 / 30 / 2023 ( Surt ),17 : 13 : 00,C26rnTeaLemon500ml,39.000,88.00,9.43
|
3 |
-
# 2480507,7 - ELEVEN �,Poblacion . Leon . Iloilo . Philippines,04 / 30 / 2023 ( Surt ),17 : 13 : 00,COBRENRGYORNK350ML,28.000,88.00,9.43
|
4 |
-
# 2480507,7 - ELEVEN �,Poblacion . Leon . Iloilo . Philippines,04 / 30 / 2023 ( Surt ),17 : 13 : 00,OTSHIBMPFRSPLNS50G,21.000,88.00,9.43
|
|
|
|
|
|
|
|
|
|
inferenced/csv_files/Output_1.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
RECEIPTNUMBER,MERCHANTNAME,MERCHANTADDRESS,TRANSACTIONDATE,TRANSACTIONTIME,ITEMS,PRICE,TOTAL,VATTAX
|
2 |
-
01053710,Iloilo Grace Pharmacy,C & J Building Jalandoni Extension Bolilao,08 / 12 / 2023,10 : 07,PharmtonEsentialCao,23.75,23 - 75,
|
|
|
|
|
|
inferenced/csv_files/Output_2.csv
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
RECEIPTNUMBER,MERCHANTNAME,MERCHANTADDRESS,TRANSACTIONDATE,TRANSACTIONTIME,ITEMS,PRICE,TOTAL,VATTAX
|
2 |
-
# 1457229,7 - ELEVEN �,Poblacion . Leon . Iloilo .,05 / 01 / 2023 ( Mon ),16 : 54 : 23,NESTEALEMICET500ML,35.000,76.00,8.14
|
3 |
-
# 1457229,7 - ELEVEN �,Poblacion . Leon . Iloilo .,05 / 01 / 2023 ( Mon ),16 : 54 : 23,ArlaGStrwbryT200ml,41.000,76.00,8.14
|
|
|
|
|
|
|
|
inferenced/csv_files/Output_3.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
RECEIPTNUMBER,MERCHANTNAME,MERCHANTADDRESS,TRANSACTIONDATE,TRANSACTIONTIME,ITEMS,PRICE,TOTAL,VATTAX
|
2 |
-
000036410,WVSU Multi Purpose Cooperative,Luna Street Lapaz Iloilo City,10 - 25 - 2023,01 : 29 : 49 PM,COKE,13.00,13.00,1.39
|
|
|
|
|
|
inferenced/csv_files/Output_4.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
RECEIPTNUMBER,MERCHANTNAME,MERCHANTADDRESS,TRANSACTIONDATE,TRANSACTIONTIME,ITEMS,PRICE,TOTAL,VATTAX
|
2 |
-
01053735,Iloilo Grace Pharmacy,C & J Building Jalandoni Extension Bolilao,09 / 12 / 2023,11 : 07,EQDryTravelM18,3.31.00,331 - 00,35.46
|
|
|
|
|
|
inferenced/output.csv
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
RECEIPTNUMBER,MERCHANTNAME,MERCHANTADDRESS,TRANSACTIONDATE,TRANSACTIONTIME,ITEMS,PRICE,TOTAL,VATTAX
|
2 |
-
# 2480507,7 - ELEVEN �,Poblacion . Leon . Iloilo . Philippines,04 / 30 / 2023 ( Surt ),17 : 13 : 00,C26rnTeaLemon500ml,39.000,88.00,9.43
|
3 |
-
# 2480507,7 - ELEVEN �,Poblacion . Leon . Iloilo . Philippines,04 / 30 / 2023 ( Surt ),17 : 13 : 00,COBRENRGYORNK350ML,28.000,88.00,9.43
|
4 |
-
# 2480507,7 - ELEVEN �,Poblacion . Leon . Iloilo . Philippines,04 / 30 / 2023 ( Surt ),17 : 13 : 00,OTSHIBMPFRSPLNS50G,21.000,88.00,9.43
|
5 |
-
01053710,Iloilo Grace Pharmacy,C & J Building Jalandoni Extension Bolilao,08 / 12 / 2023,10 : 07,PharmtonEsentialCao,23.75,23 - 75,
|
6 |
-
# 1457229,7 - ELEVEN �,Poblacion . Leon . Iloilo .,05 / 01 / 2023 ( Mon ),16 : 54 : 23,NESTEALEMICET500ML,35.000,76.00,8.14
|
7 |
-
# 1457229,7 - ELEVEN �,Poblacion . Leon . Iloilo .,05 / 01 / 2023 ( Mon ),16 : 54 : 23,ArlaGStrwbryT200ml,41.000,76.00,8.14
|
8 |
-
000036410,WVSU Multi Purpose Cooperative,Luna Street Lapaz Iloilo City,10 - 25 - 2023,01 : 29 : 49 PM,COKE,13.00,13.00,1.39
|
9 |
-
01053735,Iloilo Grace Pharmacy,C & J Building Jalandoni Extension Bolilao,09 / 12 / 2023,11 : 07,EQDryTravelM18,3.31.00,331 - 00,35.46
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inferenced/sample1_711_inference.jpg
DELETED
Binary file (295 kB)
|
|
inferenced/sample1_grace_inference.jpg
DELETED
Binary file (186 kB)
|
|
inferenced/sample_711_inference.jpg
DELETED
Binary file (298 kB)
|
|
inferenced/sample_coop_inference.jpg
DELETED
Binary file (276 kB)
|
|
inferenced/sample_grace_inference.jpg
DELETED
Binary file (205 kB)
|
|
log/error_output.log
CHANGED
@@ -308,3 +308,21 @@ Traceback (most recent call last):
|
|
308 |
TypeError: The view function for 'create_csv' did not return a valid response. The function either returned None or ended without a return statement.
|
309 |
2024-02-22 10:18:01,539 INFO werkzeug 127.0.0.1 - - [22/Feb/2024 10:18:01] "[35m[1mGET /create_csv HTTP/1.1[0m" 500 -
|
310 |
2024-02-22 10:18:02,099 INFO werkzeug 127.0.0.1 - - [22/Feb/2024 10:18:02] "[33mGET /get_data HTTP/1.1[0m" 404 -
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
308 |
TypeError: The view function for 'create_csv' did not return a valid response. The function either returned None or ended without a return statement.
|
309 |
2024-02-22 10:18:01,539 INFO werkzeug 127.0.0.1 - - [22/Feb/2024 10:18:01] "[35m[1mGET /create_csv HTTP/1.1[0m" 500 -
|
310 |
2024-02-22 10:18:02,099 INFO werkzeug 127.0.0.1 - - [22/Feb/2024 10:18:02] "[33mGET /get_data HTTP/1.1[0m" 404 -
|
311 |
+
2024-02-22 17:02:51,698 ERROR app 'NoneType' object is not iterable
|
312 |
+
2024-02-22 17:02:51,706 INFO werkzeug 127.0.0.1 - - [22/Feb/2024 17:02:51] "[32mGET /run_inference HTTP/1.1[0m" 302 -
|
313 |
+
2024-02-22 17:02:51,754 ERROR app Exception on /create_csv [GET]
|
314 |
+
Traceback (most recent call last):
|
315 |
+
File "C:\Users\Ayoo\anaconda3\envs\mlenv\Lib\site-packages\flask\app.py", line 2190, in wsgi_app
|
316 |
+
response = self.full_dispatch_request()
|
317 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
318 |
+
File "C:\Users\Ayoo\anaconda3\envs\mlenv\Lib\site-packages\flask\app.py", line 1487, in full_dispatch_request
|
319 |
+
return self.finalize_request(rv)
|
320 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^
|
321 |
+
File "C:\Users\Ayoo\anaconda3\envs\mlenv\Lib\site-packages\flask\app.py", line 1506, in finalize_request
|
322 |
+
response = self.make_response(rv)
|
323 |
+
^^^^^^^^^^^^^^^^^^^^^^
|
324 |
+
File "C:\Users\Ayoo\anaconda3\envs\mlenv\Lib\site-packages\flask\app.py", line 1801, in make_response
|
325 |
+
raise TypeError(
|
326 |
+
TypeError: The view function for 'create_csv' did not return a valid response. The function either returned None or ended without a return statement.
|
327 |
+
2024-02-22 17:02:51,766 INFO werkzeug 127.0.0.1 - - [22/Feb/2024 17:02:51] "[35m[1mGET /create_csv HTTP/1.1[0m" 500 -
|
328 |
+
2024-02-22 17:02:52,348 INFO werkzeug 127.0.0.1 - - [22/Feb/2024 17:02:52] "[33mGET /get_data HTTP/1.1[0m" 404 -
|
static/inference/Layoutlmv3_inference/__init__.py
DELETED
File without changes
|
static/inference/Layoutlmv3_inference/__pycache__/__init__.cpython-310.pyc
DELETED
Binary file (176 Bytes)
|
|
static/inference/Layoutlmv3_inference/__pycache__/__init__.cpython-311.pyc
DELETED
Binary file (195 Bytes)
|
|
static/inference/Layoutlmv3_inference/__pycache__/__init__.cpython-312.pyc
DELETED
Binary file (180 Bytes)
|
|
static/inference/Layoutlmv3_inference/__pycache__/annotate_image.cpython-310.pyc
DELETED
Binary file (2.04 kB)
|
|
static/inference/Layoutlmv3_inference/__pycache__/annotate_image.cpython-311.pyc
DELETED
Binary file (3.87 kB)
|
|
static/inference/Layoutlmv3_inference/__pycache__/inference_handler.cpython-310.pyc
DELETED
Binary file (6.83 kB)
|
|
static/inference/Layoutlmv3_inference/__pycache__/inference_handler.cpython-311.pyc
DELETED
Binary file (13.5 kB)
|
|
static/inference/Layoutlmv3_inference/__pycache__/ocr.cpython-310.pyc
DELETED
Binary file (3.51 kB)
|
|
static/inference/Layoutlmv3_inference/__pycache__/ocr.cpython-311.pyc
DELETED
Binary file (9.92 kB)
|
|
static/inference/Layoutlmv3_inference/__pycache__/ocr.cpython-312.pyc
DELETED
Binary file (5.24 kB)
|
|
static/inference/Layoutlmv3_inference/__pycache__/utils.cpython-310.pyc
DELETED
Binary file (2.41 kB)
|
|
static/inference/Layoutlmv3_inference/__pycache__/utils.cpython-311.pyc
DELETED
Binary file (3.84 kB)
|
|
static/inference/Layoutlmv3_inference/annotate_image.py
DELETED
@@ -1,56 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
from PIL import Image, ImageDraw, ImageFont
|
3 |
-
from .utils import image_label_2_color
|
4 |
-
|
5 |
-
|
6 |
-
def get_flattened_output(docs):
|
7 |
-
print("Running Flattened Output")
|
8 |
-
flattened_output = []
|
9 |
-
annotation_key = 'output'
|
10 |
-
for doc in docs:
|
11 |
-
flattened_output_item = {annotation_key: []}
|
12 |
-
doc_annotation = doc[annotation_key]
|
13 |
-
for i, span in enumerate(doc_annotation):
|
14 |
-
if len(span['words']) > 1:
|
15 |
-
for span_chunk in span['words']:
|
16 |
-
flattened_output_item[annotation_key].append(
|
17 |
-
{
|
18 |
-
'label': span['label'],
|
19 |
-
'text': span_chunk['text'],
|
20 |
-
'words': [span_chunk]
|
21 |
-
}
|
22 |
-
)
|
23 |
-
|
24 |
-
else:
|
25 |
-
flattened_output_item[annotation_key].append(span)
|
26 |
-
flattened_output.append(flattened_output_item)
|
27 |
-
return flattened_output
|
28 |
-
|
29 |
-
|
30 |
-
def annotate_image(image_path, annotation_object):
|
31 |
-
print("Annotating Images")
|
32 |
-
img = None
|
33 |
-
image = Image.open(image_path).convert('RGBA')
|
34 |
-
tmp = image.copy()
|
35 |
-
label2color = image_label_2_color(annotation_object)
|
36 |
-
overlay = Image.new('RGBA', tmp.size, (0, 0, 0)+(0,))
|
37 |
-
draw = ImageDraw.Draw(overlay)
|
38 |
-
font = ImageFont.load_default()
|
39 |
-
|
40 |
-
predictions = [span['label'] for span in annotation_object['output']]
|
41 |
-
boxes = [span['words'][0]['box'] for span in annotation_object['output']]
|
42 |
-
for prediction, box in zip(predictions, boxes):
|
43 |
-
draw.rectangle(box, outline=label2color[prediction],
|
44 |
-
width=3, fill=label2color[prediction]+(int(255*0.33),))
|
45 |
-
draw.text((box[0] + 10, box[1] - 10), text=prediction,
|
46 |
-
fill=label2color[prediction], font=font)
|
47 |
-
|
48 |
-
img = Image.alpha_composite(tmp, overlay)
|
49 |
-
img = img.convert("RGB")
|
50 |
-
|
51 |
-
image_name = os.path.basename(image_path)
|
52 |
-
image_name = image_name[:image_name.find('.')]
|
53 |
-
output_folder = 'inferenced/'
|
54 |
-
os.makedirs(output_folder, exist_ok=True)
|
55 |
-
|
56 |
-
img.save(os.path.join(output_folder, f'{image_name}_inference.jpg'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static/inference/Layoutlmv3_inference/inference_handler.py
DELETED
@@ -1,199 +0,0 @@
|
|
1 |
-
from .utils import load_model,load_processor,normalize_box,compare_boxes,adjacent
|
2 |
-
from .annotate_image import get_flattened_output,annotate_image
|
3 |
-
from PIL import Image,ImageDraw, ImageFont
|
4 |
-
import logging
|
5 |
-
import torch
|
6 |
-
import json
|
7 |
-
import os
|
8 |
-
|
9 |
-
|
10 |
-
logger = logging.getLogger(__name__)
|
11 |
-
|
12 |
-
class ModelHandler(object):
|
13 |
-
def __init__(self):
|
14 |
-
self.model = None
|
15 |
-
self.model_dir = None
|
16 |
-
self.device = 'cpu'
|
17 |
-
self.error = None
|
18 |
-
self.initialized = False
|
19 |
-
self._raw_input_data = None
|
20 |
-
self._processed_data = None
|
21 |
-
self._images_size = None
|
22 |
-
|
23 |
-
def initialize(self, context):
|
24 |
-
try:
|
25 |
-
logger.info("Loading transformer model")
|
26 |
-
self._context = context
|
27 |
-
properties = self._context
|
28 |
-
self.model_dir = properties.get("model_dir")
|
29 |
-
self.model = self.load(self.model_dir)
|
30 |
-
self.initialized = True
|
31 |
-
except Exception as e:
|
32 |
-
logger.error(f"Error initializing model: {str(e)}")
|
33 |
-
self.error = str(e)
|
34 |
-
|
35 |
-
def preprocess(self, batch):
|
36 |
-
try:
|
37 |
-
inference_dict = batch
|
38 |
-
self._raw_input_data = inference_dict
|
39 |
-
processor = load_processor()
|
40 |
-
images = [Image.open(path).convert("RGB")
|
41 |
-
for path in inference_dict['image_path']]
|
42 |
-
self._images_size = [img.size for img in images]
|
43 |
-
words = inference_dict['words']
|
44 |
-
boxes = [[normalize_box(box, images[i].size[0], images[i].size[1])
|
45 |
-
for box in doc] for i, doc in enumerate(inference_dict['bboxes'])]
|
46 |
-
encoded_inputs = processor(
|
47 |
-
images, words, boxes=boxes, return_tensors="pt", padding="max_length", truncation=True)
|
48 |
-
self._processed_data = encoded_inputs
|
49 |
-
return encoded_inputs
|
50 |
-
except Exception as e:
|
51 |
-
logger.error(f"Error in preprocessing: {str(e)}")
|
52 |
-
self.error = str(e)
|
53 |
-
return None
|
54 |
-
|
55 |
-
def load(self, model_dir):
|
56 |
-
try:
|
57 |
-
model = load_model(model_dir)
|
58 |
-
return model
|
59 |
-
except Exception as e:
|
60 |
-
logger.error(f"Error loading LayoutLMv3 model: {str(e)}")
|
61 |
-
self.error = str(e)
|
62 |
-
return None
|
63 |
-
|
64 |
-
def inference(self, model_input):
|
65 |
-
try:
|
66 |
-
with torch.no_grad():
|
67 |
-
inference_outputs = self.model(**model_input)
|
68 |
-
predictions = inference_outputs.logits.argmax(-1).tolist()
|
69 |
-
results = []
|
70 |
-
for i in range(len(predictions)):
|
71 |
-
tmp = dict()
|
72 |
-
tmp[f'output_{i}'] = predictions[i]
|
73 |
-
results.append(tmp)
|
74 |
-
return [results]
|
75 |
-
except Exception as e:
|
76 |
-
logger.error(f"Error in inference: {str(e)}")
|
77 |
-
self.error = str(e)
|
78 |
-
return None
|
79 |
-
|
80 |
-
def postprocess(self, inference_output):
|
81 |
-
try:
|
82 |
-
docs = []
|
83 |
-
k = 0
|
84 |
-
for page, doc_words in enumerate(self._raw_input_data['words']):
|
85 |
-
doc_list = []
|
86 |
-
width, height = self._images_size[page]
|
87 |
-
for i, doc_word in enumerate(doc_words, start=0):
|
88 |
-
word_tagging = None
|
89 |
-
word_labels = []
|
90 |
-
word = dict()
|
91 |
-
word['id'] = k
|
92 |
-
k += 1
|
93 |
-
word['text'] = doc_word
|
94 |
-
word['pageNum'] = page + 1
|
95 |
-
word['box'] = self._raw_input_data['bboxes'][page][i]
|
96 |
-
_normalized_box = normalize_box(
|
97 |
-
self._raw_input_data['bboxes'][page][i], width, height)
|
98 |
-
for j, box in enumerate(self._processed_data['bbox'].tolist()[page]):
|
99 |
-
if compare_boxes(box, _normalized_box):
|
100 |
-
if self.model.config.id2label[inference_output[0][page][f'output_{page}'][j]] != 'O':
|
101 |
-
word_labels.append(
|
102 |
-
self.model.config.id2label[inference_output[0][page][f'output_{page}'][j]][2:])
|
103 |
-
else:
|
104 |
-
word_labels.append('other')
|
105 |
-
if word_labels != []:
|
106 |
-
word_tagging = word_labels[0] if word_labels[0] != 'other' else word_labels[-1]
|
107 |
-
else:
|
108 |
-
word_tagging = 'other'
|
109 |
-
word['label'] = word_tagging
|
110 |
-
word['pageSize'] = {'width': width, 'height': height}
|
111 |
-
if word['label'] != 'other':
|
112 |
-
doc_list.append(word)
|
113 |
-
spans = []
|
114 |
-
def adjacents(entity): return [
|
115 |
-
adj for adj in doc_list if adjacent(entity, adj)]
|
116 |
-
output_test_tmp = doc_list[:]
|
117 |
-
for entity in doc_list:
|
118 |
-
if adjacents(entity) == []:
|
119 |
-
spans.append([entity])
|
120 |
-
output_test_tmp.remove(entity)
|
121 |
-
|
122 |
-
while output_test_tmp != []:
|
123 |
-
span = [output_test_tmp[0]]
|
124 |
-
output_test_tmp = output_test_tmp[1:]
|
125 |
-
while output_test_tmp != [] and adjacent(span[-1], output_test_tmp[0]):
|
126 |
-
span.append(output_test_tmp[0])
|
127 |
-
output_test_tmp.remove(output_test_tmp[0])
|
128 |
-
spans.append(span)
|
129 |
-
|
130 |
-
output_spans = []
|
131 |
-
for span in spans:
|
132 |
-
if len(span) == 1:
|
133 |
-
output_span = {"text": span[0]['text'],
|
134 |
-
"label": span[0]['label'],
|
135 |
-
"words": [{
|
136 |
-
'id': span[0]['id'],
|
137 |
-
'box': span[0]['box'],
|
138 |
-
'text': span[0]['text']
|
139 |
-
}],
|
140 |
-
}
|
141 |
-
else:
|
142 |
-
output_span = {"text": ' '.join([entity['text'] for entity in span]),
|
143 |
-
"label": span[0]['label'],
|
144 |
-
"words": [{
|
145 |
-
'id': entity['id'],
|
146 |
-
'box': entity['box'],
|
147 |
-
'text': entity['text']
|
148 |
-
} for entity in span]
|
149 |
-
|
150 |
-
}
|
151 |
-
output_spans.append(output_span)
|
152 |
-
docs.append({f'output': output_spans})
|
153 |
-
return [json.dumps(docs, ensure_ascii=False)]
|
154 |
-
|
155 |
-
except Exception as e:
|
156 |
-
logger.error(f"Error in postprocessing: {str(e)}")
|
157 |
-
self.error = str(e)
|
158 |
-
return None
|
159 |
-
|
160 |
-
|
161 |
-
def handle(self, data, context):
|
162 |
-
try:
|
163 |
-
if not self.initialized:
|
164 |
-
self.initialize(context)
|
165 |
-
|
166 |
-
if data is None:
|
167 |
-
return None
|
168 |
-
|
169 |
-
model_input = self.preprocess(data)
|
170 |
-
if model_input is None:
|
171 |
-
return None
|
172 |
-
|
173 |
-
model_out = self.inference(model_input)
|
174 |
-
if model_out is None:
|
175 |
-
return None
|
176 |
-
|
177 |
-
inference_out = self.postprocess(model_out)[0]
|
178 |
-
with open('temp/LayoutlMV3InferenceOutput.json', 'w') as inf_out:
|
179 |
-
inf_out.write(inference_out)
|
180 |
-
inference_out_list = json.loads(inference_out)
|
181 |
-
flattened_output_list = get_flattened_output(inference_out_list)
|
182 |
-
print('Ready for Annotation')
|
183 |
-
for i, flattened_output in enumerate(flattened_output_list):
|
184 |
-
annotate_image(data['image_path'][i], flattened_output)
|
185 |
-
except Exception as e:
|
186 |
-
logger.error(f"Error handling request: {str(e)}")
|
187 |
-
self.error = str(e)
|
188 |
-
|
189 |
-
_service = ModelHandler()
|
190 |
-
|
191 |
-
|
192 |
-
def handle(data, context):
|
193 |
-
if not _service.initialized:
|
194 |
-
_service.initialize(context)
|
195 |
-
|
196 |
-
if data is None:
|
197 |
-
return None
|
198 |
-
|
199 |
-
return _service.handle(data, context)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static/inference/Layoutlmv3_inference/ocr.py
DELETED
@@ -1,187 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import pandas as pd
|
3 |
-
import cv2
|
4 |
-
import numpy as np
|
5 |
-
import json
|
6 |
-
import requests
|
7 |
-
import traceback
|
8 |
-
|
9 |
-
from PIL import Image
|
10 |
-
|
11 |
-
def preprocess_image(image_path, max_file_size_mb=1, target_file_size_mb=0.5):
|
12 |
-
try:
|
13 |
-
# Check file size
|
14 |
-
file_size_mb = os.path.getsize(image_path) / (1024 * 1024) # Convert to megabytes
|
15 |
-
if file_size_mb > max_file_size_mb:
|
16 |
-
print(f"File size ({file_size_mb} MB) exceeds the maximum allowed size ({max_file_size_mb} MB). Resizing the image.")
|
17 |
-
|
18 |
-
# Read the image
|
19 |
-
image = cv2.imread(image_path)
|
20 |
-
|
21 |
-
# Calculate the new dimensions to achieve the target file size
|
22 |
-
ratio = target_file_size_mb / file_size_mb
|
23 |
-
new_width = int(image.shape[1] * np.sqrt(ratio))
|
24 |
-
new_height = int(image.shape[0] * np.sqrt(ratio))
|
25 |
-
|
26 |
-
# Enhance text
|
27 |
-
enhanced_img = enhance_txt(image)
|
28 |
-
|
29 |
-
# Resize the image
|
30 |
-
enhanced = cv2.resize(enhanced_img, (new_width, new_height))
|
31 |
-
|
32 |
-
return enhanced
|
33 |
-
|
34 |
-
else:
|
35 |
-
# If the file size is within the limit, proceed with the regular enhancement
|
36 |
-
image = cv2.imread(image_path)
|
37 |
-
enhanced = enhance_txt(image)
|
38 |
-
return enhanced
|
39 |
-
|
40 |
-
except Exception as e:
|
41 |
-
print(f"An error occurred: {str(e)}")
|
42 |
-
return None
|
43 |
-
|
44 |
-
|
45 |
-
def enhance_txt(img, intensity_increase=20, bilateral_filter_diameter=9, bilateral_filter_sigma_color=75, bilateral_filter_sigma_space=75):
|
46 |
-
# Get the width and height of the image
|
47 |
-
w = img.shape[1]
|
48 |
-
h = img.shape[0]
|
49 |
-
w1 = int(w * 0.05)
|
50 |
-
w2 = int(w * 0.95)
|
51 |
-
h1 = int(h * 0.05)
|
52 |
-
h2 = int(h * 0.95)
|
53 |
-
ROI = img[h1:h2, w1:w2] # 95% of the center of the image
|
54 |
-
threshold = np.mean(ROI) * 0.88 # % of average brightness
|
55 |
-
|
56 |
-
blurred = cv2.GaussianBlur(img, (1, 1), 0)
|
57 |
-
edged = 255 - cv2.Canny(blurred, 100, 150, apertureSize=7)
|
58 |
-
|
59 |
-
# Increase intensity by adding a constant value
|
60 |
-
img = np.clip(img + intensity_increase, 0, 255).astype(np.uint8)
|
61 |
-
|
62 |
-
# Apply bilateral filter to reduce noise
|
63 |
-
img = cv2.bilateralFilter(img, bilateral_filter_diameter, bilateral_filter_sigma_color, bilateral_filter_sigma_space)
|
64 |
-
|
65 |
-
_, binary = cv2.threshold(blurred, threshold, 255, cv2.THRESH_BINARY)
|
66 |
-
return binary
|
67 |
-
|
68 |
-
|
69 |
-
def run_tesseract_on_preprocessed_image(preprocessed_image, image_path):
|
70 |
-
try:
|
71 |
-
image_name = os.path.basename(image_path)
|
72 |
-
image_name = image_name[:image_name.find('.')]
|
73 |
-
|
74 |
-
# Create the "temp" folder if it doesn't exist
|
75 |
-
temp_folder = "temp"
|
76 |
-
if not os.path.exists(temp_folder):
|
77 |
-
os.makedirs(temp_folder)
|
78 |
-
|
79 |
-
# Define the OCR API endpoint
|
80 |
-
url = "https://api.ocr.space/parse/image"
|
81 |
-
|
82 |
-
# Define the API key and the language
|
83 |
-
api_key = "K88232854988957" # Replace with your actual OCR Space API key
|
84 |
-
language = "eng"
|
85 |
-
|
86 |
-
# Save the preprocessed image
|
87 |
-
cv2.imwrite(os.path.join(temp_folder, f"{image_name}_preprocessed.jpg"), preprocessed_image)
|
88 |
-
|
89 |
-
# Open the preprocessed image file as binary
|
90 |
-
with open(os.path.join(temp_folder, f"{image_name}_preprocessed.jpg"), "rb") as f:
|
91 |
-
# Define the payload for the API request
|
92 |
-
payload = {
|
93 |
-
"apikey": api_key,
|
94 |
-
"language": language,
|
95 |
-
"isOverlayRequired": True,
|
96 |
-
"OCREngine": 2
|
97 |
-
}
|
98 |
-
# Define the file parameter for the API request
|
99 |
-
file = {
|
100 |
-
"file": f
|
101 |
-
}
|
102 |
-
# Send the POST request to the OCR API
|
103 |
-
response = requests.post(url, data=payload, files=file)
|
104 |
-
|
105 |
-
# Check the status code of the response
|
106 |
-
if response.status_code == 200:
|
107 |
-
# Parse the JSON response
|
108 |
-
result = response.json()
|
109 |
-
print("---JSON file saved")
|
110 |
-
# Save the OCR result as JSON
|
111 |
-
with open(os.path.join(temp_folder, f"{image_name}_ocr.json"), 'w') as f:
|
112 |
-
json.dump(result, f)
|
113 |
-
|
114 |
-
return os.path.join(temp_folder, f"{image_name}_ocr.json")
|
115 |
-
else:
|
116 |
-
# Print the error message
|
117 |
-
print("Error: " + response.text)
|
118 |
-
return None
|
119 |
-
|
120 |
-
except Exception as e:
|
121 |
-
print(f"An error occurred during OCR request: {str(e)}")
|
122 |
-
return None
|
123 |
-
|
124 |
-
def clean_tesseract_output(json_output_path):
|
125 |
-
try:
|
126 |
-
with open(json_output_path, 'r') as json_file:
|
127 |
-
data = json.load(json_file)
|
128 |
-
|
129 |
-
lines = data['ParsedResults'][0]['TextOverlay']['Lines']
|
130 |
-
|
131 |
-
words = []
|
132 |
-
for line in lines:
|
133 |
-
for word_info in line['Words']:
|
134 |
-
word = {}
|
135 |
-
origin_box = [
|
136 |
-
word_info['Left'],
|
137 |
-
word_info['Top'],
|
138 |
-
word_info['Left'] + word_info['Width'],
|
139 |
-
word_info['Top'] + word_info['Height']
|
140 |
-
]
|
141 |
-
|
142 |
-
word['word_text'] = word_info['WordText']
|
143 |
-
word['word_box'] = origin_box
|
144 |
-
words.append(word)
|
145 |
-
|
146 |
-
return words
|
147 |
-
except (KeyError, IndexError, FileNotFoundError, json.JSONDecodeError) as e:
|
148 |
-
print(f"Error cleaning Tesseract output: {str(e)}")
|
149 |
-
return None
|
150 |
-
|
151 |
-
def prepare_batch_for_inference(image_paths):
|
152 |
-
# print("my_function was called")
|
153 |
-
# traceback.print_stack() # This will print the stack trace
|
154 |
-
print(f"Number of images to process: {len(image_paths)}") # Print the total number of images to be processed
|
155 |
-
print("1. Preparing for Inference")
|
156 |
-
tsv_output_paths = []
|
157 |
-
|
158 |
-
inference_batch = dict()
|
159 |
-
print("2. Starting Preprocessing")
|
160 |
-
# Ensure that the image is only 1
|
161 |
-
for image_path in image_paths:
|
162 |
-
print(f"Processing the image: {image_path}") # Print the image being processed
|
163 |
-
print("3. Preprocessing the Receipt")
|
164 |
-
preprocessed_image = preprocess_image(image_path)
|
165 |
-
if preprocessed_image is not None:
|
166 |
-
print("4. Preprocessing done. Running OCR")
|
167 |
-
json_output_path = run_tesseract_on_preprocessed_image(preprocessed_image, image_path)
|
168 |
-
print("5. OCR Complete")
|
169 |
-
if json_output_path:
|
170 |
-
tsv_output_paths.append(json_output_path)
|
171 |
-
|
172 |
-
print("6. Preprocessing and OCR Done")
|
173 |
-
# clean_outputs is a list of lists
|
174 |
-
clean_outputs = [clean_tesseract_output(tsv_path) for tsv_path in tsv_output_paths]
|
175 |
-
print("7. Cleaned OCR output")
|
176 |
-
word_lists = [[word['word_text'] for word in clean_output] for clean_output in clean_outputs]
|
177 |
-
print("8. Word List Created")
|
178 |
-
boxes_lists = [[word['word_box'] for word in clean_output] for clean_output in clean_outputs]
|
179 |
-
print("9. Box List Created")
|
180 |
-
inference_batch = {
|
181 |
-
"image_path": image_paths,
|
182 |
-
"bboxes": boxes_lists,
|
183 |
-
"words": word_lists
|
184 |
-
}
|
185 |
-
|
186 |
-
print("10. Prepared for Inference Batch")
|
187 |
-
return inference_batch
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static/inference/Layoutlmv3_inference/utils.py
DELETED
@@ -1,50 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
from transformers import AutoModelForTokenClassification, AutoProcessor
|
3 |
-
|
4 |
-
def normalize_box(bbox, width, height):
|
5 |
-
return [
|
6 |
-
int(bbox[0]*(1000/width)),
|
7 |
-
int(bbox[1]*(1000/height)),
|
8 |
-
int(bbox[2]*(1000/width)),
|
9 |
-
int(bbox[3]*(1000/height)),
|
10 |
-
]
|
11 |
-
|
12 |
-
def compare_boxes(b1, b2):
|
13 |
-
b1 = np.array([c for c in b1])
|
14 |
-
b2 = np.array([c for c in b2])
|
15 |
-
equal = np.array_equal(b1, b2)
|
16 |
-
return equal
|
17 |
-
|
18 |
-
def unnormalize_box(bbox, width, height):
|
19 |
-
return [
|
20 |
-
width * (bbox[0] / 1000),
|
21 |
-
height * (bbox[1] / 1000),
|
22 |
-
width * (bbox[2] / 1000),
|
23 |
-
height * (bbox[3] / 1000),
|
24 |
-
]
|
25 |
-
|
26 |
-
def adjacent(w1, w2):
|
27 |
-
if w1['label'] == w2['label'] and abs(w1['id'] - w2['id']) == 1:
|
28 |
-
return True
|
29 |
-
return False
|
30 |
-
|
31 |
-
def random_color():
|
32 |
-
return np.random.randint(0, 255, 3)
|
33 |
-
|
34 |
-
def image_label_2_color(annotation):
|
35 |
-
if 'output' in annotation.keys():
|
36 |
-
image_labels = set([span['label'] for span in annotation['output']])
|
37 |
-
label2color = {f'{label}': (random_color()[0], random_color()[
|
38 |
-
1], random_color()[2]) for label in image_labels}
|
39 |
-
return label2color
|
40 |
-
else:
|
41 |
-
raise ValueError('please use "output" as annotation key')
|
42 |
-
|
43 |
-
def load_model(model_path):
|
44 |
-
model = AutoModelForTokenClassification.from_pretrained(model_path)
|
45 |
-
return model
|
46 |
-
|
47 |
-
def load_processor():
|
48 |
-
processor = AutoProcessor.from_pretrained(
|
49 |
-
"microsoft/layoutlmv3-base", apply_ocr=False)
|
50 |
-
return processor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static/inference/preprocess.py
DELETED
@@ -1,206 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
import numpy as np
|
3 |
-
import os
|
4 |
-
import argparse
|
5 |
-
from datasets.features import ClassLabel
|
6 |
-
from transformers import AutoProcessor
|
7 |
-
from sklearn.model_selection import train_test_split
|
8 |
-
from datasets import Features, Sequence, ClassLabel, Value, Array2D, Array3D, Dataset
|
9 |
-
from datasets import Image as Img
|
10 |
-
from PIL import Image
|
11 |
-
from tqdm import tqdm_notebook # Import tqdm_notebook for displaying progress bars
|
12 |
-
|
13 |
-
|
14 |
-
import warnings
|
15 |
-
warnings.filterwarnings('ignore')
|
16 |
-
|
17 |
-
|
18 |
-
def read_text_file(file_path):
|
19 |
-
with open(file_path, 'r') as f:
|
20 |
-
return (f.readlines())
|
21 |
-
|
22 |
-
|
23 |
-
def prepare_examples(examples):
|
24 |
-
images = examples[image_column_name]
|
25 |
-
words = examples[text_column_name]
|
26 |
-
boxes = examples[boxes_column_name]
|
27 |
-
word_labels = examples[label_column_name]
|
28 |
-
|
29 |
-
encoding = processor(images, words, boxes=boxes, word_labels=word_labels,
|
30 |
-
truncation=True, padding="max_length")
|
31 |
-
|
32 |
-
return encoding
|
33 |
-
|
34 |
-
|
35 |
-
def get_zip_dir_name():
|
36 |
-
try:
|
37 |
-
os.chdir('/kaggle/input/ocr-combinedrec')
|
38 |
-
|
39 |
-
dir_list1 = os.listdir()
|
40 |
-
dir_list = sorted(dir_list1)
|
41 |
-
|
42 |
-
any_file_name = dir_list[0]
|
43 |
-
# Using os.path.splitext to get the file extension
|
44 |
-
zip_dir_name, file_extension = os.path.splitext(any_file_name)
|
45 |
-
|
46 |
-
# Extracting the directory name using os.path.dirname
|
47 |
-
# zip_dir_name = os.path.dirname(any_file_name)
|
48 |
-
|
49 |
-
# Test
|
50 |
-
|
51 |
-
return 'dataset_files'
|
52 |
-
|
53 |
-
# Check if all files start with the extracted directory name
|
54 |
-
print(all(list(map(lambda x: x.startswith(zip_dir_name), dir_list))))
|
55 |
-
if all(list(map(lambda x: x.startswith(zip_dir_name), dir_list))):
|
56 |
-
return zip_dir_name
|
57 |
-
return False
|
58 |
-
finally:
|
59 |
-
os.chdir('./../')
|
60 |
-
|
61 |
-
def filter_out_unannotated(example):
|
62 |
-
tags = example['ner_tags']
|
63 |
-
return not all([tag == label2id['O'] for tag in tags])
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
if __name__ == '__main__':
|
68 |
-
|
69 |
-
parser = argparse.ArgumentParser()
|
70 |
-
parser.add_argument('--valid_size')
|
71 |
-
parser.add_argument('--output_path')
|
72 |
-
args = parser.parse_args()
|
73 |
-
TEST_SIZE = float(args.valid_size)
|
74 |
-
OUTPUT_PATH = args.output_path
|
75 |
-
|
76 |
-
os.makedirs(args.output_path, exist_ok=True)
|
77 |
-
files = {}
|
78 |
-
zip_dir_name = get_zip_dir_name()
|
79 |
-
|
80 |
-
if zip_dir_name:
|
81 |
-
files['train_box'] = read_text_file('/kaggle/input/ocr-combinedrec/dataset_files_box.txt')
|
82 |
-
|
83 |
-
files['train_image'] = read_text_file(os.path.join(
|
84 |
-
os.curdir, 'ocr-combinedrec', f'{zip_dir_name}_image.txt'))
|
85 |
-
files['train'] = read_text_file(os.path.join(
|
86 |
-
os.curdir, 'ocr-combinedrec', f'{zip_dir_name}.txt'))
|
87 |
-
else:
|
88 |
-
for f in os.listdir():
|
89 |
-
if f.endswith('.txt') and f.find('box') != -1:
|
90 |
-
files['train_box'] = read_text_file(os.path.join(os.curdir, f))
|
91 |
-
elif f.endswith('.txt') and f.find('image') != -1:
|
92 |
-
files['train_image'] = read_text_file(
|
93 |
-
os.path.join(os.curdir, f))
|
94 |
-
elif f.endswith('.txt') and f.find('labels') == -1:
|
95 |
-
files['train'] = read_text_file(os.path.join(os.curdir, f))
|
96 |
-
|
97 |
-
assert(len(files['train']) == len(files['train_box']))
|
98 |
-
assert(len(files['train_box']) == len(files['train_image']))
|
99 |
-
assert(len(files['train_image']) == len(files['train']))
|
100 |
-
|
101 |
-
images = {}
|
102 |
-
for i, row in enumerate(files['train_image']):
|
103 |
-
if row != '\n':
|
104 |
-
image_name = row.split('\t')[-1]
|
105 |
-
images.setdefault(image_name.replace('\n', ''), []).append(i)
|
106 |
-
|
107 |
-
words, bboxes, ner_tags, image_path = [], [], [], []
|
108 |
-
for image, rows in images.items():
|
109 |
-
words.append([row.split('\t')[0].replace('\n', '')
|
110 |
-
for row in files['train'][rows[0]:rows[-1]+1]])
|
111 |
-
ner_tags.append([row.split('\t')[1].replace('\n', '')
|
112 |
-
for row in files['train'][rows[0]:rows[-1]+1]])
|
113 |
-
bboxes.append([box.split('\t')[1].replace('\n', '')
|
114 |
-
for box in files['train_box'][rows[0]:rows[-1]+1]])
|
115 |
-
if zip_dir_name:
|
116 |
-
image_path.append(f"/kaggle/input/ocr-combinedrec/{zip_dir_name}/{image}")
|
117 |
-
else:
|
118 |
-
image_path.append(f"/kaggle/input/ocr-combinedrec/{image}")
|
119 |
-
|
120 |
-
labels = list(set([tag for doc_tag in ner_tags for tag in doc_tag]))
|
121 |
-
id2label = {v: k for v, k in enumerate(labels)}
|
122 |
-
label2id = {k: v for v, k in enumerate(labels)}
|
123 |
-
|
124 |
-
dataset_dict = {
|
125 |
-
'id': range(len(words)),
|
126 |
-
'tokens': words,
|
127 |
-
'bboxes': [[list(map(int, bbox.split())) for bbox in doc] for doc in bboxes],
|
128 |
-
'ner_tags': [[label2id[tag] for tag in ner_tag] for ner_tag in ner_tags],
|
129 |
-
'image': [Image.open(path).convert("RGB") for path in image_path]
|
130 |
-
}
|
131 |
-
|
132 |
-
#raw features
|
133 |
-
features = Features({
|
134 |
-
'id': Value(dtype='string', id=None),
|
135 |
-
'tokens': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None),
|
136 |
-
'bboxes': Sequence(feature=Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None), length=-1, id=None),
|
137 |
-
'ner_tags': Sequence(feature=ClassLabel(num_classes=len(labels), names=labels, names_file=None, id=None), length=-1, id=None),
|
138 |
-
'image': Img(decode=True, id=None)
|
139 |
-
})
|
140 |
-
|
141 |
-
full_data_set = Dataset.from_dict(dataset_dict, features=features)
|
142 |
-
dataset = full_data_set.train_test_split(test_size=TEST_SIZE)
|
143 |
-
dataset["train"] = dataset["train"].filter(filter_out_unannotated)
|
144 |
-
processor = AutoProcessor.from_pretrained(
|
145 |
-
"microsoft/layoutlmv3-base", apply_ocr=False)
|
146 |
-
|
147 |
-
features = dataset["train"].features
|
148 |
-
column_names = dataset["train"].column_names
|
149 |
-
image_column_name = "image"
|
150 |
-
text_column_name = "tokens"
|
151 |
-
boxes_column_name = "bboxes"
|
152 |
-
label_column_name = "ner_tags"
|
153 |
-
|
154 |
-
# In the event the labels are not a `Sequence[ClassLabel]`, we will need to go through the dataset to get the
|
155 |
-
# unique labels.
|
156 |
-
|
157 |
-
|
158 |
-
# def get_label_list(labels):
|
159 |
-
# unique_labels = set()
|
160 |
-
# for label in labels:
|
161 |
-
# unique_labels = unique_labels | set(label)
|
162 |
-
# label_list = list(unique_labels)
|
163 |
-
# label_list.sort()
|
164 |
-
# return label_list
|
165 |
-
|
166 |
-
|
167 |
-
# if isinstance(features[label_column_name].feature, ClassLabel):
|
168 |
-
# label_list = features[label_column_name].feature.names
|
169 |
-
# # No need to convert the labels since they are already ints.
|
170 |
-
# id2label = {k: v for k, v in enumerate(label_list)}
|
171 |
-
# label2id = {v: k for k, v in enumerate(label_list)}
|
172 |
-
# else:
|
173 |
-
# label_list = get_label_list(dataset["train"][label_column_name])
|
174 |
-
# id2label = {k: v for k, v in enumerate(label_list)}
|
175 |
-
# label2id = {v: k for k, v in enumerate(label_list)}
|
176 |
-
# num_labels = len(label_list)
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
# we need to define custom features for `set_format` (used later on) to work properly
|
181 |
-
features = Features({
|
182 |
-
'pixel_values': Array3D(dtype="float32", shape=(3, 224, 224)),
|
183 |
-
'input_ids': Sequence(feature=Value(dtype='int64')),
|
184 |
-
'attention_mask': Sequence(Value(dtype='int64')),
|
185 |
-
'bbox': Array2D(dtype="int64", shape=(512, 4)),
|
186 |
-
'labels': Sequence(ClassLabel(names=labels)),
|
187 |
-
})
|
188 |
-
|
189 |
-
train_dataset = dataset["train"].map(
|
190 |
-
prepare_examples,
|
191 |
-
batched=True,
|
192 |
-
remove_columns=column_names,
|
193 |
-
features=features,
|
194 |
-
)
|
195 |
-
eval_dataset = dataset["test"].map(
|
196 |
-
prepare_examples,
|
197 |
-
batched=True,
|
198 |
-
remove_columns=column_names,
|
199 |
-
features=features,
|
200 |
-
)
|
201 |
-
train_dataset.set_format("torch")
|
202 |
-
if not OUTPUT_PATH.endswith('/'):
|
203 |
-
OUTPUT_PATH += '/'
|
204 |
-
train_dataset.save_to_disk(f'{OUTPUT_PATH}train_split')
|
205 |
-
eval_dataset.save_to_disk(f'{OUTPUT_PATH}eval_split')
|
206 |
-
dataset.save_to_disk(f'{OUTPUT_PATH}raw_data')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static/inference/run_inference.py
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
import argparse
|
2 |
-
from asyncio.log import logger
|
3 |
-
from Layoutlmv3_inference.ocr import prepare_batch_for_inference
|
4 |
-
from Layoutlmv3_inference.inference_handler import handle
|
5 |
-
import logging
|
6 |
-
import os
|
7 |
-
|
8 |
-
if __name__ == "__main__":
|
9 |
-
try:
|
10 |
-
parser = argparse.ArgumentParser()
|
11 |
-
parser.add_argument("--model_path", type=str)
|
12 |
-
parser.add_argument("--images_path", type=str)
|
13 |
-
args, _ = parser.parse_known_args()
|
14 |
-
images_path = args.images_path
|
15 |
-
image_files = os.listdir(images_path)
|
16 |
-
images_path = [images_path + '/' + image_files[0]]
|
17 |
-
inference_batch = prepare_batch_for_inference(images_path)
|
18 |
-
context = {"model_dir": args.model_path}
|
19 |
-
handle(inference_batch,context)
|
20 |
-
except Exception as err:
|
21 |
-
os.makedirs('log', exist_ok=True)
|
22 |
-
logging.basicConfig(filename='log/error_output.log', level=logging.ERROR,
|
23 |
-
format='%(asctime)s %(levelname)s %(name)s %(message)s')
|
24 |
-
logger = logging.getLogger(__name__)
|
25 |
-
logger.error(err)
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|