svjack commited on
Commit
948457d
1 Parent(s): 112bf03

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +71 -0
README.md ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - zh
4
+ library_name: transformers
5
+ tags:
6
+ - donut
7
+ - donut-python
8
+ ---
9
+
10
+ ### Installtion
11
+ ```bash
12
+ pip install torch
13
+ pip install transformers==4.11.3
14
+ pip install opencv-python==4.6.0.66
15
+ pip install donut-python
16
+ ```
17
+
18
+ ### Usage
19
+ ```python
20
+ import sys
21
+ import os
22
+ import pandas as pd
23
+ import numpy as np
24
+ import shutil
25
+
26
+ from tqdm import tqdm
27
+ import re
28
+
29
+ from donut import DonutModel
30
+ import torch
31
+ from PIL import Image
32
+
33
+ zh_model_path = "question_generator_by_zh_on_pic"
34
+
35
+ task_prompt = "<s_docvqa><s_question>{user_input}</s_question><s_answer>"
36
+ zh_pretrained_model = DonutModel.from_pretrained(zh_model_path)
37
+
38
+ if torch.cuda.is_available():
39
+ zh_pretrained_model.half()
40
+ device = torch.device("cuda")
41
+ zh_pretrained_model.to(device)
42
+
43
+ zh_pretrained_model.eval()
44
+ print("have load !")
45
+
46
+ def demo_process_vqa(input_img, question):
47
+ #input_img = Image.fromarray(input_img)
48
+ global zh_pretrained_model, task_prompt
49
+ user_prompt = task_prompt.replace("{user_input}", question)
50
+ output = zh_pretrained_model.inference(input_img, prompt=user_prompt)["predictions"][0]
51
+ req = {
52
+ "question": output["answer"],
53
+ "answer": output["question"]
54
+ }
55
+ return req
56
+
57
+
58
+ img_path = "zh_img.png"
59
+ demo_process_vqa(Image.open(img_path), "零钱通", )
60
+
61
+ '''
62
+ {
63
+ "question": "支付方式是什么?",
64
+ "answer": "零钱通"
65
+ }
66
+ '''
67
+
68
+ ```
69
+
70
+ ### Sample Image
71
+ <img src="https://raw.githubusercontent.com/svjack/docvqa-gen/main/imgs/zh_img.png" width = "500px" height = "500px"/>