fleonce commited on
Commit
5c95ecf
1 Parent(s): c97bcc5

Upload ITERForRelationExtraction

Browse files
Files changed (4) hide show
  1. README.md +104 -0
  2. config.json +66 -0
  3. generation_config.json +5 -0
  4. model.safetensors +3 -0
README.md ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model:
4
+ - microsoft/deberta-v3-large
5
+ library_name: transformers
6
+ tags:
7
+ - relation extraction
8
+ - nlp
9
+ model-index:
10
+ - name: iter-ade-deberta-large
11
+ results:
12
+ - task:
13
+ type: relation-extraction
14
+ dataset:
15
+ name: ade
16
+ type: ade
17
+ metrics:
18
+ - name: F1
19
+ type: f1
20
+ value: 87.536
21
+ ---
22
+
23
+
24
+ # ITER: Iterative Transformer-based Entity Recognition and Relation Extraction
25
+
26
+ This model checkpoint is part of the collection of models published alongside our paper ITER,
27
+ [accepted at EMNLP 2024](https://aclanthology.org/2024.findings-emnlp.655/).<br>
28
+ To ease reproducibility and enable open research, our source code has been published on [GitHub](https://github.com/fleonce/iter).
29
+
30
+ This model achieved an F1 score of `87.536` on dataset `ade` on split `1`
31
+
32
+ ### Using ITER in your code
33
+
34
+ First, install ITER in your preferred environment:
35
+
36
+ ```text
37
+ pip install git+https://github.com/fleonce/iter
38
+ ```
39
+
40
+ To use our model, refer to the following code:
41
+ ```python
42
+ from iter import ITERForRelationExtraction
43
+
44
+ model = ITERForRelationExtraction.from_pretrained("fleonce/iter-ade-deberta-large")
45
+ tokenizer = model.tokenizer
46
+
47
+ encodings = tokenizer(
48
+ "An art exhibit at the Hakawati Theatre in Arab east Jerusalem was a series of portraits of Palestinians killed in the rebellion .",
49
+ return_tensors="pt"
50
+ )
51
+
52
+ generation_output = model.generate(
53
+ encodings["input_ids"],
54
+ attention_mask=encodings["attention_mask"],
55
+ )
56
+
57
+ # entities
58
+ print(generation_output.entities)
59
+
60
+ # relations between entities
61
+ print(generation_output.links)
62
+ ```
63
+
64
+ ### Checkpoints
65
+
66
+ We publish checkpoints for the models performing best on the following datasets:
67
+
68
+ - **ACE05**:
69
+ 1. [fleonce/iter-ace05-deberta-large](https://huggingface.co/fleonce/iter-ace05-deberta-large)
70
+ - **CoNLL04**:
71
+ 1. [fleonce/iter-conll04-deberta-large](https://huggingface.co/fleonce/iter-conll04-deberta-large)
72
+ - **ADE**:
73
+ 1. [fleonce/iter-ade-deberta-large](https://huggingface.co/fleonce/iter-ade-deberta-large)
74
+ - **SciERC**:
75
+ 1. [fleonce/iter-scierc-deberta-large](https://huggingface.co/fleonce/iter-scierc-deberta-large)
76
+ 2. [fleonce/iter-scierc-scideberta-full](https://huggingface.co/fleonce/iter-scierc-scideberta-full)
77
+ - **CoNLL03**:
78
+ 1. [fleonce/iter-conll03-deberta-large](https://huggingface.co/fleonce/iter-conll03-deberta-large)
79
+ - **GENIA**:
80
+ 1. [fleonce/iter-genia-deberta-large](https://huggingface.co/fleonce/iter-genia-deberta-large)
81
+
82
+
83
+ ### Reproducibility
84
+
85
+ For each dataset, we selected the best performing checkpoint out of the 5 training runs we performed during training.
86
+ This model was trained with the following hyperparameters:
87
+
88
+ - Seed: `1`
89
+ - Config: `ade/small_lr_d_ff_4096_split1`
90
+ - PyTorch `2.3.0` with CUDA `11.8` and precision `torch.float32`
91
+ - GPU: `1 NVIDIA H100 SXM 80 GB GPU`
92
+
93
+ Varying GPU and CUDA version as well as training precision did result in slightly different end results in our tests
94
+ for reproducibility.
95
+
96
+ To train this model, refer to the following command:
97
+ ```shell
98
+ python3 train.py --dataset ade/small_lr_d_ff_4096_split1 --transformer microsoft/deberta-v3-large --seed 1
99
+ ```
100
+
101
+ ```text
102
+ @inproceedings{citation}
103
+ ```
104
+
config.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "models/fleonce/iter-ade-deberta-large",
3
+ "activation_fn": "relu",
4
+ "architectures": [
5
+ "ITERForRelationExtraction"
6
+ ],
7
+ "d_ff": 4096,
8
+ "d_model": 1024,
9
+ "dataset": "ade_split1",
10
+ "dropout": 0.3,
11
+ "entity_types": [
12
+ "Adverse-Effect",
13
+ "Drug"
14
+ ],
15
+ "features": 544,
16
+ "link_types": [
17
+ "Adverse-Effect"
18
+ ],
19
+ "max_length": 512,
20
+ "max_nest_depth": 2,
21
+ "model_type": "iter",
22
+ "num_links": 1,
23
+ "num_types": 3,
24
+ "threshold": 0.5,
25
+ "torch_dtype": "float32",
26
+ "transformer_config": {
27
+ "_name_or_path": "microsoft/deberta-v3-large",
28
+ "architectures": null,
29
+ "attention_probs_dropout_prob": 0.1,
30
+ "decoder_start_token_id": null,
31
+ "eos_token_id": null,
32
+ "hidden_act": "gelu",
33
+ "hidden_dropout_prob": 0.1,
34
+ "hidden_size": 1024,
35
+ "initializer_range": 0.02,
36
+ "intermediate_size": 4096,
37
+ "is_encoder_decoder": false,
38
+ "layer_norm_eps": 1e-07,
39
+ "max_length": 512,
40
+ "max_position_embeddings": 512,
41
+ "max_relative_positions": -1,
42
+ "model_type": "deberta-v2",
43
+ "norm_rel_ebd": "layer_norm",
44
+ "num_attention_heads": 16,
45
+ "num_hidden_layers": 24,
46
+ "pooler_dropout": 0,
47
+ "pooler_hidden_act": "gelu",
48
+ "pooler_hidden_size": 1024,
49
+ "pos_att_type": [
50
+ "p2c",
51
+ "c2p"
52
+ ],
53
+ "position_biased_input": false,
54
+ "position_buckets": 256,
55
+ "relative_attention": true,
56
+ "share_att_key": true,
57
+ "task_specific_params": null,
58
+ "type_vocab_size": 0,
59
+ "vocab_size": 128100
60
+ },
61
+ "transformers_version": "4.37.0",
62
+ "use_bias": false,
63
+ "use_gate": true,
64
+ "use_mlp": true,
65
+ "use_scale": true
66
+ }
generation_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "max_length": 512,
4
+ "transformers_version": "4.37.0"
5
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2b7b5186f80fc4cc7ae9f3216feb220a6195ca3fe8c0108e2c84bb6b973d2b6
3
+ size 2038235024