File size: 2,622 Bytes
15f636b
 
 
 
 
 
 
 
 
 
 
d80ba6a
15f636b
 
d80ba6a
15f636b
 
 
 
 
 
 
 
d80ba6a
15f636b
 
 
d80ba6a
15f636b
d80ba6a
 
 
 
 
 
 
 
 
 
 
 
 
15f636b
d80ba6a
 
 
 
 
 
 
15f636b
 
 
d80ba6a
15f636b
 
 
d80ba6a
15f636b
 
 
 
d80ba6a
15f636b
 
d80ba6a
15f636b
 
 
 
 
d80ba6a
 
15f636b
 
 
 
d80ba6a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# -*- encoding: utf-8 -*-
# @Author: SWHL
# @Contact: liekkaskono@163.com
import math
import random
from pathlib import Path

import numpy as np
from PIL import Image, ImageDraw, ImageFont


def draw_ocr_box_txt(image, boxes, txts, font_path, scores=None, text_score=0.5):
    h, w = image.height, image.width
    img_left = image.copy()
    img_right = Image.new("RGB", (w, h), (255, 255, 255))

    random.seed(0)
    draw_left = ImageDraw.Draw(img_left)
    draw_right = ImageDraw.Draw(img_right)
    for idx, (box, txt) in enumerate(zip(boxes, txts)):
        if scores is not None and float(scores[idx]) < text_score:
            continue

        color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))

        box = [tuple(v) for v in box]
        draw_left.polygon(box, fill=color)
        draw_right.text([box[3][0], box[3][1]], str(idx), fill=color)

        draw_right.polygon(
            [
                box[0][0],
                box[0][1],
                box[1][0],
                box[1][1],
                box[2][0],
                box[2][1],
                box[3][0],
                box[3][1],
            ],
            outline=color,
        )

        box_height = math.sqrt(
            (box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2
        )

        box_width = math.sqrt(
            (box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2
        )

        if box_height > 2 * box_width:
            font_size = max(int(box_width * 0.9), 10)
            font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
            cur_y = box[0][1]
            for c in txt:
                char_size = font.getsize(c)
                draw_right.text((box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font)
                cur_y += char_size[1]
        else:
            font_size = max(int(box_height * 0.8), 10)
            font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
            draw_right.text([box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)

    img_left = Image.blend(image, img_left, 0.5)
    img_show = Image.new("RGB", (w * 2, h), (255, 255, 255))
    img_show.paste(img_left, (0, 0, w, h))
    img_show.paste(img_right, (w, 0, w * 2, h))
    return np.array(img_show)


def visualize(image, boxes, txts, scores, font_path="./fonts/FZYTK.TTF"):
    draw_img = draw_ocr_box_txt(image, boxes, txts, font_path, scores, text_score=0.5)

    draw_img_save = Path("./inference_results/")
    if not draw_img_save.exists():
        draw_img_save.mkdir(parents=True, exist_ok=True)
    return draw_img[:, :, ::-1]