adrianeboyd
commited on
Commit
•
4c18175
1
Parent(s):
5336f60
Update spaCy pipeline
Browse files- LICENSES_SOURCES +2 -2
- README.md +3 -3
- accuracy.json +1 -1
- ca_core_news_md-any-py3-none-any.whl +2 -2
- config.cfg +1 -0
- meta.json +6 -6
- morphologizer/cfg +1 -0
LICENSES_SOURCES
CHANGED
@@ -11,10 +11,10 @@ http://www.gnu.org/licenses/gpl.html```
|
|
11 |
|
12 |
|
13 |
|
14 |
-
# UD Catalan AnCora v2.8 + NER v3.2.
|
15 |
|
16 |
* Author: Carlos Rodríguez-Penagos and Carme Armentano-Oller
|
17 |
-
* URL: https://github.com/TeMU-BSC/spacy/releases/tag/3.2.
|
18 |
* License: CC BY 4.0
|
19 |
|
20 |
```
|
|
|
11 |
|
12 |
|
13 |
|
14 |
+
# UD Catalan AnCora v2.8 + NER v3.2.9
|
15 |
|
16 |
* Author: Carlos Rodríguez-Penagos and Carme Armentano-Oller
|
17 |
+
* URL: https://github.com/TeMU-BSC/spacy/releases/tag/3.2.9
|
18 |
* License: CC BY 4.0
|
19 |
|
20 |
```
|
README.md
CHANGED
@@ -78,12 +78,12 @@ Catalan pipeline optimized for CPU. Components: tok2vec, morphologizer, parser,
|
|
78 |
| Feature | Description |
|
79 |
| --- | --- |
|
80 |
| **Name** | `ca_core_news_md` |
|
81 |
-
| **Version** | `3.
|
82 |
-
| **spaCy** | `>=3.
|
83 |
| **Default Pipeline** | `tok2vec`, `morphologizer`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
|
84 |
| **Components** | `tok2vec`, `morphologizer`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
|
85 |
| **Vectors** | 500000 keys, 20000 unique vectors (300 dimensions) |
|
86 |
-
| **Sources** | [UD Catalan AnCora v2.8](https://github.com/UniversalDependencies/UD_Catalan-AnCora) (Martínez Alonso, Héctor; Pascual, Elena; Zeman, Daniel)<br />[UD Catalan AnCora v2.8 + NER v3.2.
|
87 |
| **License** | `GNU GPL 3.0` |
|
88 |
| **Author** | [Explosion](https://explosion.ai) |
|
89 |
|
|
|
78 |
| Feature | Description |
|
79 |
| --- | --- |
|
80 |
| **Name** | `ca_core_news_md` |
|
81 |
+
| **Version** | `3.6.0` |
|
82 |
+
| **spaCy** | `>=3.6.0,<3.7.0` |
|
83 |
| **Default Pipeline** | `tok2vec`, `morphologizer`, `parser`, `attribute_ruler`, `lemmatizer`, `ner` |
|
84 |
| **Components** | `tok2vec`, `morphologizer`, `parser`, `senter`, `attribute_ruler`, `lemmatizer`, `ner` |
|
85 |
| **Vectors** | 500000 keys, 20000 unique vectors (300 dimensions) |
|
86 |
+
| **Sources** | [UD Catalan AnCora v2.8](https://github.com/UniversalDependencies/UD_Catalan-AnCora) (Martínez Alonso, Héctor; Pascual, Elena; Zeman, Daniel)<br />[UD Catalan AnCora v2.8 + NER v3.2.9](https://github.com/TeMU-BSC/spacy/releases/tag/3.2.9) (Carlos Rodríguez-Penagos and Carme Armentano-Oller)<br />[Catalan Lemmatizer](https://github.com/explosion/spacy-lookups-data) (Text Mining Unit, Barcelona Supercomputing Center)<br />[Catalan Word Embeddings in FastText (Version 1.0)](http://doi.org/10.5281/zenodo.4522041) (Gutiérrez-Fandiño, Asier, Armengol-Estapé, Jordi, Gonzalez-Agirre, Aitor, Carrino, Casimiro Pio, de Gibert, Ona, & Villegas, Marta) |
|
87 |
| **License** | `GNU GPL 3.0` |
|
88 |
| **Author** | [Explosion](https://explosion.ai) |
|
89 |
|
accuracy.json
CHANGED
@@ -299,5 +299,5 @@
|
|
299 |
"f": 0.9000718907
|
300 |
}
|
301 |
},
|
302 |
-
"speed":
|
303 |
}
|
|
|
299 |
"f": 0.9000718907
|
300 |
}
|
301 |
},
|
302 |
+
"speed": 6341.5992717393
|
303 |
}
|
ca_core_news_md-any-py3-none-any.whl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b72551683c2ca9b03fcaf3c2f7313b3b4762f78164bedfb04ac890bbad77f57
|
3 |
+
size 49212706
|
config.cfg
CHANGED
@@ -35,6 +35,7 @@ scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
|
|
35 |
[components.morphologizer]
|
36 |
factory = "morphologizer"
|
37 |
extend = false
|
|
|
38 |
overwrite = true
|
39 |
scorer = {"@scorers":"spacy.morphologizer_scorer.v1"}
|
40 |
|
|
|
35 |
[components.morphologizer]
|
36 |
factory = "morphologizer"
|
37 |
extend = false
|
38 |
+
label_smoothing = 0.0
|
39 |
overwrite = true
|
40 |
scorer = {"@scorers":"spacy.morphologizer_scorer.v1"}
|
41 |
|
meta.json
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
{
|
2 |
"lang":"ca",
|
3 |
"name":"core_news_md",
|
4 |
-
"version":"3.
|
5 |
"description":"Catalan pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.",
|
6 |
"author":"Explosion",
|
7 |
"email":"contact@explosion.ai",
|
8 |
"url":"https://explosion.ai",
|
9 |
"license":"GNU GPL 3.0",
|
10 |
-
"spacy_version":">=3.
|
11 |
-
"spacy_git_version":"
|
12 |
"vectors":{
|
13 |
"width":300,
|
14 |
"vectors":20000,
|
@@ -670,7 +670,7 @@
|
|
670 |
"f":0.9000718907
|
671 |
}
|
672 |
},
|
673 |
-
"speed":
|
674 |
},
|
675 |
"sources":[
|
676 |
{
|
@@ -680,8 +680,8 @@
|
|
680 |
"author":"Mart\u00ednez Alonso, H\u00e9ctor; Pascual, Elena; Zeman, Daniel"
|
681 |
},
|
682 |
{
|
683 |
-
"name":"UD Catalan AnCora v2.8 + NER v3.2.
|
684 |
-
"url":"https://github.com/TeMU-BSC/spacy/releases/tag/3.2.
|
685 |
"license":"CC BY 4.0",
|
686 |
"author":"Carlos Rodr\u00edguez-Penagos and Carme Armentano-Oller"
|
687 |
},
|
|
|
1 |
{
|
2 |
"lang":"ca",
|
3 |
"name":"core_news_md",
|
4 |
+
"version":"3.6.0",
|
5 |
"description":"Catalan pipeline optimized for CPU. Components: tok2vec, morphologizer, parser, senter, ner, attribute_ruler, lemmatizer.",
|
6 |
"author":"Explosion",
|
7 |
"email":"contact@explosion.ai",
|
8 |
"url":"https://explosion.ai",
|
9 |
"license":"GNU GPL 3.0",
|
10 |
+
"spacy_version":">=3.6.0,<3.7.0",
|
11 |
+
"spacy_git_version":"cb4fdc83e",
|
12 |
"vectors":{
|
13 |
"width":300,
|
14 |
"vectors":20000,
|
|
|
670 |
"f":0.9000718907
|
671 |
}
|
672 |
},
|
673 |
+
"speed":6341.5992717393
|
674 |
},
|
675 |
"sources":[
|
676 |
{
|
|
|
680 |
"author":"Mart\u00ednez Alonso, H\u00e9ctor; Pascual, Elena; Zeman, Daniel"
|
681 |
},
|
682 |
{
|
683 |
+
"name":"UD Catalan AnCora v2.8 + NER v3.2.9",
|
684 |
+
"url":"https://github.com/TeMU-BSC/spacy/releases/tag/3.2.9",
|
685 |
"license":"CC BY 4.0",
|
686 |
"author":"Carlos Rodr\u00edguez-Penagos and Carme Armentano-Oller"
|
687 |
},
|
morphologizer/cfg
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
{
|
2 |
"extend":false,
|
|
|
3 |
"labels_morph":{
|
4 |
"Definite=Def|Gender=Masc|Number=Sing|POS=DET|PronType=Art":"Definite=Def|Gender=Masc|Number=Sing|PronType=Art",
|
5 |
"POS=PROPN":"",
|
|
|
1 |
{
|
2 |
"extend":false,
|
3 |
+
"label_smoothing":0.0,
|
4 |
"labels_morph":{
|
5 |
"Definite=Def|Gender=Masc|Number=Sing|POS=DET|PronType=Art":"Definite=Def|Gender=Masc|Number=Sing|PronType=Art",
|
6 |
"POS=PROPN":"",
|