percevalw commited on
Commit
c79c37b
1 Parent(s): bdbcef0

Upload folder using huggingface_hub

Browse files
PKG-INFO ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.1
2
+ Name: dummy-ner
3
+ Version: 0.1.0
4
+ Summary:
5
+ Author: Perceval Wajsburt
6
+ Author-email: perceval.wajsburt@aphp.fr
7
+ Requires-Python: >=3.7.1,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.8
10
+ Classifier: Programming Language :: Python :: 3.9
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Dist: edsnlp[ml] (>=0.11.2)
15
+ Requires-Dist: sentencepiece (>=0.1.96,<0.2.0)
16
+ Description-Content-Type: text/markdown
17
+
18
+ # Dummy EDS-NLP NER model
19
+
20
+ This model was trained on the DEFT 2020 Track 3 dataset, but it's main purpose is to
21
+ test the integration of EDS-NLP with the Hugging Face Hub.
README.md CHANGED
@@ -1,3 +1,4 @@
1
- ---
2
- license: bsd-3-clause
3
- ---
 
 
1
+ # Dummy EDS-NLP NER model
2
+
3
+ This model was trained on the DEFT 2020 Track 3 dataset, but it's main purpose is to
4
+ test the integration of EDS-NLP with the Hugging Face Hub.
dummy_ner/__init__.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # -----------------------------------------
3
+ # This section was autogenerated by edsnlp
4
+ # -----------------------------------------
5
+
6
+ import edsnlp
7
+ from pathlib import Path
8
+ from typing import Optional, Dict, Any
9
+
10
+ __version__ = None
11
+
12
+ def load(
13
+ overrides: Optional[Dict[str, Any]] = None,
14
+ ) -> edsnlp.Pipeline:
15
+ artifacts_path = Path(__file__).parent / "artifacts"
16
+ model = edsnlp.load(artifacts_path, overrides=overrides)
17
+ return model
dummy_ner/artifacts/config.cfg ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [nlp]
2
+ lang = "eds"
3
+ pipeline = ["ner"]
4
+
5
+ [nlp.tokenizer]
6
+ @tokenizers = "eds.tokenizer"
7
+
8
+ [components]
9
+
10
+ [components.ner]
11
+ @factory = "eds.ner_crf"
12
+ target_span_getter = "gold_spans"
13
+ labels = ["anatomie", "date", "dose", "duree", "examen", "frequence", "mode", "moment", "pathologie", "sosy", "substance", "traitement", "valeur"]
14
+ infer_span_setter = true
15
+ mode = "joint"
16
+ window = 40
17
+ stride = 20
18
+
19
+ [components.ner.embedding]
20
+ @factory = "eds.text_cnn"
21
+ kernel_sizes = [3]
22
+
23
+ [components.ner.embedding.embedding]
24
+ @factory = "eds.transformer"
25
+ model = "./ner/embedding/embedding"
26
+ window = 128
27
+ stride = 96
28
+
29
+ [components.ner.span_setter]
30
+ ents = true
31
+ * = true
32
+ gold_spans = ["anatomie", "date", "dose", "duree", "examen", "frequence", "mode", "moment", "pathologie", "sosy", "substance", "traitement", "valeur"]
33
+
dummy_ner/artifacts/meta.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+
3
+ }
dummy_ner/artifacts/ner/embedding/embedding/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./ner/embedding/embedding",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 128,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 512,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 2,
17
+ "num_hidden_layers": 2,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.40.1",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
dummy_ner/artifacts/ner/embedding/embedding/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ead96338fa7a78a05b3f0cb78763d825f9144b6ae6ff346092261e8d3c3ef046
3
+ size 17547912
dummy_ner/artifacts/ner/embedding/embedding/parameters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9ad71db7a9edf51b3a873b80cab6a1af6f1ba026021af76bbee919c451ca5a4
3
+ size 8992
dummy_ner/artifacts/ner/embedding/embedding/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
dummy_ner/artifacts/ner/embedding/embedding/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
dummy_ner/artifacts/ner/embedding/embedding/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 1000000000000000019884624838656,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
dummy_ner/artifacts/ner/embedding/embedding/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
dummy_ner/artifacts/ner/embedding/parameters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4653e3d2bda5ef27335411413e121eefdd4ceb7aed323f3964e9ad43cbab4d8d
3
+ size 263528
dummy_ner/artifacts/ner/parameters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e5a517b89e44c372a89938808359ff8113651c41e383d10d88a6fd01d80aea7
3
+ size 34363
dummy_ner/artifacts/tokenizer ADDED
@@ -0,0 +1 @@
 
 
1
+ ��prefix_search��suffix_search��infix_finditer��token_match��url_match��exceptions��faster_heuristics�
pyproject.toml ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = [ "poetry-core>=1.0.0", "pypandoc<1.8.0",]
3
+ build-backend = "poetry.core.masonry.api"
4
+
5
+ [tool.edsnlp]
6
+ model_name = "dummy-ner"
7
+
8
+ [tool.poetry]
9
+ name = "dummy-ner"
10
+ version = "0.1.0"
11
+ description = ""
12
+ authors = [ "Perceval Wajsburt <perceval.wajsburt@aphp.fr>",]
13
+ readme = "README.md"
14
+ include = [ "dummy_ner/artifacts/**",]
15
+ [[tool.poetry.packages]]
16
+ include = "dummy_ner"
17
+
18
+ [tool.interrogate]
19
+ ignore-init-method = true
20
+ ignore-init-module = true
21
+ ignore-magic = false
22
+ ignore-semiprivate = false
23
+ ignore-private = false
24
+ ignore-property-decorators = false
25
+ ignore-module = true
26
+ ignore-nested-functions = false
27
+ ignore-nested-classes = true
28
+ ignore-setters = false
29
+ fail-under = 10
30
+ exclude = [ "docs", "build", "tests",]
31
+ verbose = 0
32
+ quiet = false
33
+ whitelist-regex = []
34
+ color = true
35
+ omit-covered-files = false
36
+
37
+ [tool.mypy]
38
+ plugins = "pydantic.mypy"
39
+
40
+ [tool.ruff]
41
+ fix = true
42
+ extend-exclude = [ ".git", "__pycache__", "__init__.py", ".mypy_cache", ".pytest_cache", ".venv", "build",]
43
+ line-length = 88
44
+ select = [ "E", "F", "W", "I001",]
45
+
46
+ [tool.poetry.dependencies]
47
+ python = ">=3.7.1,<4.0"
48
+ sentencepiece = "^0.1.96"
49
+
50
+ [tool.pytest.ini_options]
51
+ testpaths = [ "tests",]
52
+
53
+ [tool.ruff.flake8-tidy-imports]
54
+ ban-relative-imports = "parents"
55
+
56
+ [tool.ruff.extend-per-file-ignores]
57
+ "__init__.py" = [ "F401",]
58
+
59
+ [tool.ruff.isort]
60
+ known-first-party = [ "edsnlp",]
61
+ known-third-party = [ "build",]
62
+ order-by-type = true
63
+
64
+ [tool.coverage.report]
65
+ include = [ "quaero_ner/*", "scripts/*",]
66
+ omit = [ "tests/*",]
67
+ exclude_lines = [ "def __repr__", "if __name__ == .__main__.:", "@overload", "pragma: no cover", "raise .*Error", "raise .*Exception", "warn\\(", "if __name__ == .__main__.:", "if TYPE_CHECKING:", "class .*\\bProtocol\\):", "@(abc\\.)?abstractmethod", "Span.set_extension.*", "Doc.set_extension.*", "Token.set_extension.*",]
68
+
69
+ [tool.poetry.dependencies.edsnlp]
70
+ version = ">=0.11.2"
71
+ extras = [ "ml",]
72
+
73
+ [tool.poetry.group.docs]
74
+ optional = true
75
+
76
+ [tool.poetry.group.docs.dependencies]