oh201516 commited on
Commit
b250531
1 Parent(s): 78bb401

Update spaCy pipeline

Browse files
config.cfg CHANGED
@@ -10,7 +10,7 @@ seed = 0
10
 
11
  [nlp]
12
  lang = "en"
13
- pipeline = ["tok2vec","ner"]
14
  batch_size = 1000
15
  disabled = []
16
  before_creation = null
@@ -21,6 +21,10 @@ vectors = {"@vectors":"spacy.Vectors.v1"}
21
 
22
  [components]
23
 
 
 
 
 
24
  [components.ner]
25
  factory = "ner"
26
  incorrect_spans_key = null
 
10
 
11
  [nlp]
12
  lang = "en"
13
+ pipeline = ["tok2vec","ner","count_extraction_component"]
14
  batch_size = 1000
15
  disabled = []
16
  before_creation = null
 
21
 
22
  [components]
23
 
24
+ [components.count_extraction_component]
25
+ factory = "count_extraction_component"
26
+ label = "CONNECTION"
27
+
28
  [components.ner]
29
  factory = "ner"
30
  incorrect_spans_key = null
count_extraction_component.py CHANGED
@@ -5,7 +5,7 @@ from spacy.language import Language
5
  import re
6
 
7
  # https://spacy.io/usage/processing-pipelines#custom-components
8
- @Language.factory("count_extraction")
9
  class CountExtractorComponent(object):
10
  def __init__(self, nlp, name, label="CONNECTION"):
11
  self.label = label
@@ -50,3 +50,15 @@ class CountExtractorComponent(object):
50
  ent._.text = text
51
  ent._.count = 1
52
  return doc
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import re
6
 
7
  # https://spacy.io/usage/processing-pipelines#custom-components
8
+ @Language.factory("count_extraction_component")
9
  class CountExtractorComponent(object):
10
  def __init__(self, nlp, name, label="CONNECTION"):
11
  self.label = label
 
50
  ent._.text = text
51
  ent._.count = 1
52
  return doc
53
+
54
+ def connections(self, doc):
55
+ connections = {}
56
+ for ent in doc.ents:
57
+ if ent._.count is None:
58
+ continue
59
+
60
+ if ent._.text not in connections:
61
+ connections[ent._.text] = ent._.count
62
+ continue
63
+ connections[ent._.text] += ent._.count
64
+ return connections
en_setec_mk_tv-any-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edda8abe91cb2274ca61da460b6ccc5f3f80d155d1dd7bae59914e179a475699
3
- size 5706128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:486bf4e15ccf77ad023ba0362bf1142e833b37e7506d78a7141975d4480ebdd3
3
+ size 5706265
meta.json CHANGED
@@ -35,11 +35,13 @@
35
  },
36
  "pipeline":[
37
  "tok2vec",
38
- "ner"
 
39
  ],
40
  "components":[
41
  "tok2vec",
42
- "ner"
 
43
  ],
44
  "disabled":[
45
 
 
35
  },
36
  "pipeline":[
37
  "tok2vec",
38
+ "ner",
39
+ "count_extraction_component"
40
  ],
41
  "components":[
42
  "tok2vec",
43
+ "ner",
44
+ "count_extraction_component"
45
  ],
46
  "disabled":[
47