Update spaCy pipeline
Browse files- config.cfg +5 -1
- count_extraction_component.py +13 -1
- en_setec_mk_tv-any-py3-none-any.whl +2 -2
- meta.json +4 -2
config.cfg
CHANGED
@@ -10,7 +10,7 @@ seed = 0
|
|
10 |
|
11 |
[nlp]
|
12 |
lang = "en"
|
13 |
-
pipeline = ["tok2vec","ner"]
|
14 |
batch_size = 1000
|
15 |
disabled = []
|
16 |
before_creation = null
|
@@ -21,6 +21,10 @@ vectors = {"@vectors":"spacy.Vectors.v1"}
|
|
21 |
|
22 |
[components]
|
23 |
|
|
|
|
|
|
|
|
|
24 |
[components.ner]
|
25 |
factory = "ner"
|
26 |
incorrect_spans_key = null
|
|
|
10 |
|
11 |
[nlp]
|
12 |
lang = "en"
|
13 |
+
pipeline = ["tok2vec","ner","count_extraction_component"]
|
14 |
batch_size = 1000
|
15 |
disabled = []
|
16 |
before_creation = null
|
|
|
21 |
|
22 |
[components]
|
23 |
|
24 |
+
[components.count_extraction_component]
|
25 |
+
factory = "count_extraction_component"
|
26 |
+
label = "CONNECTION"
|
27 |
+
|
28 |
[components.ner]
|
29 |
factory = "ner"
|
30 |
incorrect_spans_key = null
|
count_extraction_component.py
CHANGED
@@ -5,7 +5,7 @@ from spacy.language import Language
|
|
5 |
import re
|
6 |
|
7 |
# https://spacy.io/usage/processing-pipelines#custom-components
|
8 |
-
@Language.factory("
|
9 |
class CountExtractorComponent(object):
|
10 |
def __init__(self, nlp, name, label="CONNECTION"):
|
11 |
self.label = label
|
@@ -50,3 +50,15 @@ class CountExtractorComponent(object):
|
|
50 |
ent._.text = text
|
51 |
ent._.count = 1
|
52 |
return doc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
import re
|
6 |
|
7 |
# https://spacy.io/usage/processing-pipelines#custom-components
|
8 |
+
@Language.factory("count_extraction_component")
|
9 |
class CountExtractorComponent(object):
|
10 |
def __init__(self, nlp, name, label="CONNECTION"):
|
11 |
self.label = label
|
|
|
50 |
ent._.text = text
|
51 |
ent._.count = 1
|
52 |
return doc
|
53 |
+
|
54 |
+
def connections(self, doc):
|
55 |
+
connections = {}
|
56 |
+
for ent in doc.ents:
|
57 |
+
if ent._.count is None:
|
58 |
+
continue
|
59 |
+
|
60 |
+
if ent._.text not in connections:
|
61 |
+
connections[ent._.text] = ent._.count
|
62 |
+
continue
|
63 |
+
connections[ent._.text] += ent._.count
|
64 |
+
return connections
|
en_setec_mk_tv-any-py3-none-any.whl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:486bf4e15ccf77ad023ba0362bf1142e833b37e7506d78a7141975d4480ebdd3
|
3 |
+
size 5706265
|
meta.json
CHANGED
@@ -35,11 +35,13 @@
|
|
35 |
},
|
36 |
"pipeline":[
|
37 |
"tok2vec",
|
38 |
-
"ner"
|
|
|
39 |
],
|
40 |
"components":[
|
41 |
"tok2vec",
|
42 |
-
"ner"
|
|
|
43 |
],
|
44 |
"disabled":[
|
45 |
|
|
|
35 |
},
|
36 |
"pipeline":[
|
37 |
"tok2vec",
|
38 |
+
"ner",
|
39 |
+
"count_extraction_component"
|
40 |
],
|
41 |
"components":[
|
42 |
"tok2vec",
|
43 |
+
"ner",
|
44 |
+
"count_extraction_component"
|
45 |
],
|
46 |
"disabled":[
|
47 |
|