Spaces:

acverma
/

documentAI

Runtime error

App Files Files

documentAI / app.py

acverma

Update app.py

c8fc3e7 over 2 years ago

raw

history blame

5.92 kB

	# -- coding: utf-8 --
	"""DocAI_DeploymentGradio.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/1USSEj7nHh2n2hUhTJTC0Iwhj6mSR7-mD
	"""

	import os
	os.system('pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu')

	os.system('pip install pyyaml==5.1')

	os.system('pip install -q git+https://github.com/huggingface/transformers.git')

	os.system('pip install -q datasets seqeval')

	os.system('pip install torch==1.8.0+cu101 torchvision==0.9.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html')
	os.system('pip install -q detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.8/index.html')
	os.system('pip install -q pytesseract')

	#!pip install gradio

	#pip install -q git+https://github.com/huggingface/transformers.git

	#pip install h5py

	#pip install -q datasets seqeval

	import gradio as gr

	import numpy as np
	import tensorflow as tf

	import torch
	import json

	from datasets.features import ClassLabel
	from transformers import AutoProcessor

	from datasets import Features, Sequence, ClassLabel, Value, Array2D, Array3D
	from datasets import load_dataset # this dataset uses the new Image feature :)

	from transformers import LayoutLMv3Processor,LayoutLMv3ForTokenClassification, AutoProcessor ,AutoModelForTokenClassification

	#import cv2
	from PIL import Image, ImageDraw, ImageFont

	processor = LayoutLMv3Processor.from_pretrained("microsoft/layoutlmv3-base",apply_ocr = True)

	model = LayoutLMv3ForTokenClassification.from_pretrained("nielsr/layoutlmv3-finetuned-funsd")

	dataset = load_dataset("nielsr/funsd", split="test")
	#image = Image.open(dataset[0]["image_path"]).convert("RGB")
	#image = Image.open("./invoice.png")
	#image.save("document1.png")

	#image = Image.open(dataset[1]["image_path"]).convert("RGB")
	#image = Image.open("./invoice2.png")
	#image.save("document2.png")

	#image = Image.open(dataset[2]["image_path"]).convert("RGB")
	#image = Image.open("./invoice3.png")
	#image.save("document3.png")


	#dataset = load_dataset("nielsr/funsd-layoutlmv3")

	example = dataset["test"][0]
	example["image"].save("example1.png")

	example1 = dataset["test"][1]
	example1["image"].save("example2.png")

	example2 = dataset["test"][2]
	example2["image"].save("example3.png")

	#example2["image"]

	labels = dataset.features['ner_tags'].feature.names

	#words, boxes, ner_tags = example["tokens"], example["bboxes"], example["ner_tags"]

	features = dataset["test"].features


	# In the event the labels are not a `Sequence[ClassLabel]`, we will need to go through the dataset to get the
	# unique labels.

	id2label = {v: k for v, k in enumerate(labels)}

	label2color = {
	"question": "blue",
	"answer": "green",
	"header": "orange",
	"other": "violet",
	}

	#label2color = {'question':'blue', 'answer':'green', 'header':'orange', 'other':'violet'}

	def unnormalize_box(bbox, width, height):
	return [
	width * (bbox[0] / 1000),
	height * (bbox[1] / 1000),
	width * (bbox[2] / 1000),
	height * (bbox[3] / 1000),
	]

	def iob_to_label(label):
	label= label[2:]
	if not label:
	return 'other'
	return label

	def process_image(image):
	width, height = image.size

	# encode
	encoding = processor(image, truncation=True, return_offsets_mapping=True, return_tensors="pt")
	offset_mapping = encoding.pop('offset_mapping')

	# forward pass
	outputs = model(**encoding)

	# get predictions
	predictions = outputs.logits.argmax(-1).squeeze().tolist()
	token_boxes = encoding.bbox.squeeze().tolist()

	# only keep non-subword predictions
	is_subword = np.array(offset_mapping.squeeze().tolist())[:,0] != 0
	true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
	true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]

	# draw predictions over the image
	draw = ImageDraw.Draw(image)
	font = ImageFont.load_default()

	label2color = {'question':'blue', 'answer':'green', 'header':'orange', 'other':'violet'}
	for prediction, box in zip(true_predictions, true_boxes):
	predicted_label = iob_to_label(prediction) #.lower()
	draw.rectangle(box, outline=label2color[predicted_label])
	draw.text((box[0]+10, box[1]-10), text=predicted_label, fill=label2color[predicted_label], font=font)

	return image



	title = "DocumentAI - Extraction using LayoutLMv3 model"
	description = "Extraction of Form or Invoice Extraction - We use Microsoft's LayoutLMv3 trained on Invoice Dataset to predict the Biller Name, Biller Address, Biller post_code, Due_date, GST, Invoice_date, Invoice_number, Subtotal and Total. To use it, simply upload an image or use the example image below. Results will show up in a few seconds."

	article="<b>References</b><br>[1] Y. Xu et al., “LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking.” 2022. <a href='https://arxiv.org/abs/2204.08387'>Paper Link</a><br>[2] <a href='https://github.com/NielsRogge/Transformers-Tutorials/tree/master/LayoutLMv3'>LayoutLMv3 training and inference</a>"

	examples =[['example1.png'],['example1.png'],['example1.png']]

	css = """.output_image, .input_image {height: 600px !important}"""

	iface = gr.Interface(fn=process_image,
	inputs=gr.inputs.Image(type="pil"),
	outputs=gr.outputs.Image(type="pil", label="annotated image"),
	title=title,
	description=description,
	article=article,
	examples=examples,
	css=css,
	analytics_enabled = True, enable_queue=True
	)

	#iface.launch(inline=False, share=False, debug=False)

	iface.launch(inline=False,debug=True)