Spaces:
Sleeping
Sleeping
miesnerjacob
commited on
Commit
β’
2919f24
1
Parent(s):
a00f9ba
added code comments in keyword extraction file
Browse files- keyword_extraction.py +13 -6
keyword_extraction.py
CHANGED
@@ -90,7 +90,10 @@ class KeywordExtractor:
|
|
90 |
|
91 |
len_indices = 0
|
92 |
while True:
|
|
|
93 |
merged = self.merge_overlapping_indices(keyword_indices)
|
|
|
|
|
94 |
if len_indices == len(merged):
|
95 |
out_indices = sorted(merged, key=itemgetter(0))
|
96 |
return out_indices
|
@@ -108,18 +111,22 @@ class KeywordExtractor:
|
|
108 |
annotation (list): list of tuples for generating html
|
109 |
"""
|
110 |
|
|
|
111 |
arr = list(text)
|
|
|
|
|
112 |
for idx in sorted(keyword_indices, reverse=True):
|
113 |
arr.insert(idx[0], "<kw>")
|
114 |
-
arr.insert(idx[1]+1, "
|
|
|
|
|
115 |
joined_annotation = ''.join(arr)
|
|
|
|
|
116 |
split = joined_annotation.split('<kw>')
|
117 |
-
annotation = [(x.replace('XXXxxxXXXxxxXXX ', ''), "KEY", "#26aaef") if "XXXxxxXXXxxxXXX" in x else x for x in split]
|
118 |
|
119 |
-
|
120 |
-
for
|
121 |
-
if type(i) is tuple:
|
122 |
-
kws_check.append(i[0])
|
123 |
|
124 |
return annotation
|
125 |
|
|
|
90 |
|
91 |
len_indices = 0
|
92 |
while True:
|
93 |
+
# Merge overlapping indices
|
94 |
merged = self.merge_overlapping_indices(keyword_indices)
|
95 |
+
# Check to see if merging reduced number of annotation indices
|
96 |
+
# If merging did not reduce list return final indicies
|
97 |
if len_indices == len(merged):
|
98 |
out_indices = sorted(merged, key=itemgetter(0))
|
99 |
return out_indices
|
|
|
111 |
annotation (list): list of tuples for generating html
|
112 |
"""
|
113 |
|
114 |
+
# Turn list to numpy array
|
115 |
arr = list(text)
|
116 |
+
|
117 |
+
# Loop through indices in list and insert delimeters
|
118 |
for idx in sorted(keyword_indices, reverse=True):
|
119 |
arr.insert(idx[0], "<kw>")
|
120 |
+
arr.insert(idx[1]+1, "<!kw> <kw>")
|
121 |
+
|
122 |
+
# join array
|
123 |
joined_annotation = ''.join(arr)
|
124 |
+
|
125 |
+
# split array on delimeter
|
126 |
split = joined_annotation.split('<kw>')
|
|
|
127 |
|
128 |
+
# Create annotation for keywords in text
|
129 |
+
annotation = [(x.replace('<!kw> ', ''), "KEY", "#26aaef") if "<!kw>" in x else x for x in split]
|
|
|
|
|
130 |
|
131 |
return annotation
|
132 |
|