This model was trained on DocVQA Dataset questions

Code for Training and Prediction (v1): https://www.kaggle.com/tusharcode/training-layoutlm-docvqa

How to use:

from transformers import AutoTokenizer, AutoModelForDocumentQuestionAnswering
from datasets import load_dataset

model_checkpoint = "TusharGoel/LayoutLM-Finetuned-DocVQA"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, add_prefix_space=True)
model_predict = AutoModelForDocumentQuestionAnswering.from_pretrained(model_checkpoint)

model_predict.eval()
dataset = load_dataset("nielsr/funsd", split="train")
example = dataset[0]

question = "What's Licensee Number?"

words = example["words"]
boxes = example["bboxes"]

encoding = tokenizer(question.split(), words,
                            is_split_into_words=True, return_token_type_ids=True, return_tensors="pt")

bbox = []
for i, s, w in zip(encoding.input_ids[0], encoding.sequence_ids(0), encoding.word_ids(0)):
    if s == 1:
        bbox.append(boxes[w])
    elif i == tokenizer.sep_token_id:
        bbox.append([1000] * 4)
    else:
        bbox.append([0] * 4)
encoding["bbox"] = torch.tensor([bbox])

word_ids = encoding.word_ids(0)
outputs = model_predict(**encoding)

loss = outputs.loss
start_scores = outputs.start_logits
end_scores = outputs.end_logits

start, end = word_ids[start_scores.argmax(-1).item()], word_ids[end_scores.argmax(-1).item()]
print(" ".join(words[start : end + 1]))
Downloads last month
49
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Spaces using TusharGoel/LayoutLM-Finetuned-DocVQA 2