MikkoLipsanen commited on
Commit
36e7edb
1 Parent(s): f8a998a

Create reading_order.py

Browse files
Files changed (1) hide show
  1. reading_order.py +106 -0
reading_order.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ class OrderPolygons:
4
+ def __init__(self, text_direction = 'lr'):
5
+ self.text_direction = text_direction
6
+
7
+ # Defines whether two lines overlap vertically
8
+ def _y_overlaps(self, u, v):
9
+ #u_y_min < v_y_max and u_y_max > v_y_min
10
+ return u[3] < v[2] and u[2] > v[3]
11
+
12
+ # Defines whether two lines overlap horizontally
13
+ def _x_overlaps(self, u, v):
14
+ #u_x_min < v_x_max and u_x_max > v_x_min
15
+ return u[1] < v[0] and u[0] > v[1]
16
+
17
+ # Defines whether one line (u) is above the other (v)
18
+ def _above(self, u, v):
19
+ #u_y_min < v_y_min
20
+ return u[3] < v[3]
21
+
22
+ # Defines whether one line (u) is left of the other (v)
23
+ def _left_of(self, u, v):
24
+ #u_x_max < v_x_min
25
+ return u[0] < v[1]
26
+
27
+ # Defines whether one line (w) overlaps with two others (u,v)
28
+ def _separates(self, w, u, v):
29
+ if w == u or w == v:
30
+ return 0
31
+ #w_y_max < (min(u_y_min, v_y_min))
32
+ if w[2] < min(u[3], v[3]):
33
+ return 0
34
+ #w_y_min > max(u_y_max, v_y_max)
35
+ if w[3] > max(u[2], v[2]):
36
+ return 0
37
+ #w_x_min < u_x_max and w_x_max > v_x_min
38
+ if w[1] < u[0] and w[0] > v[1]:
39
+ return 1
40
+ return 0
41
+
42
+ # Slightly modified version of the Kraken implementation at
43
+ # https://github.com/mittagessen/kraken/blob/master/kraken/lib/segmentation.py
44
+ def reading_order(self, lines):
45
+ """Given the list of lines, computes
46
+ the partial reading order. The output is a binary 2D array
47
+ such that order[i,j] is true if line i comes before line j
48
+ in reading order."""
49
+ # Input lines are arrays with 4 polygon coordinates:
50
+ # 0=x_right/x_max, 1=x_left/x_min, 2=y_down/y_max, 3=y_up/y_min
51
+
52
+ # Array where the order of precedence between the lines is defined
53
+ order = np.zeros((len(lines), len(lines)), 'B')
54
+
55
+ # Defines reading direction: default is from left to right
56
+ if self.text_direction == 'rl':
57
+ def horizontal_order(u, v):
58
+ return not self._left_of(u, v)
59
+ else:
60
+ horizontal_order = self._left_of
61
+
62
+ for i, u in enumerate(lines):
63
+ for j, v in enumerate(lines):
64
+ if self._x_overlaps(u, v):
65
+ if self._above(u, v):
66
+ # line u is placed before line v in reading order
67
+ order[i, j] = 1
68
+ else:
69
+
70
+ if [w for w in lines if self._separates(w, u, v)] == []:
71
+ if horizontal_order(u, v):
72
+ order[i, j] = 1
73
+ elif self._y_overlaps(u, v) and horizontal_order(u, v):
74
+ order[i, j] = 1
75
+
76
+ return order
77
+
78
+ # Taken from the Kraken implementation at
79
+ # https://github.com/mittagessen/kraken/blob/master/kraken/lib/segmentation.py
80
+ def topsort(self, order):
81
+ """Given a binary array defining a partial order (o[i,j]==True means i<j),
82
+ compute a topological sort. This is a quick and dirty implementation
83
+ that works for up to a few thousand elements."""
84
+
85
+ n = len(order)
86
+ visited = np.zeros(n)
87
+ L = []
88
+
89
+ def _visit(k):
90
+ if visited[k]:
91
+ return
92
+ visited[k] = 1
93
+ a, = np.nonzero(np.ravel(order[:, k]))
94
+ for line in a:
95
+ _visit(line)
96
+ L.append(k)
97
+
98
+ for k in range(n):
99
+ _visit(k)
100
+ return L
101
+
102
+ def order(self, lines):
103
+ order = self.reading_order(lines)
104
+ sorted = self.topsort(order)
105
+
106
+ return sorted