File size: 2,938 Bytes
689805c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
[paths]
train = "data/relations_0401_train.spacy"
dev = "data/relations_0401_dev.spacy"
raw = null
init_tok2vec = null
vectors = null
[system]
seed = 342
gpu_allocator = "pytorch"
[nlp]
lang = "en"
pipeline = ["transformer","relation_extractor"]
disabled = []
before_creation = null
after_creation = null
after_pipeline_creation = null
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
batch_size = 1000
[components]
[components.relation_extractor]
factory = "relation_extractor"
threshold = 0.5
[components.relation_extractor.model]
@architectures = "rel_model.v1"
[components.relation_extractor.model.classification_layer]
@architectures = "rel_classification_layer.v1"
nI = null
nO = null
[components.relation_extractor.model.create_instance_tensor]
@architectures = "rel_instance_tensor.v1"
pooling = {"@layers":"reduce_mean.v1"}
[components.relation_extractor.model.create_instance_tensor.get_instances]
@misc = "rel_instance_generator.v1"
max_length = 50
[components.relation_extractor.model.create_instance_tensor.tok2vec]
@architectures = "spacy-transformers.TransformerListener.v1"
grad_factor = 1.0
pooling = {"@layers":"reduce_mean.v1"}
upstream = "*"
[components.transformer]
factory = "transformer"
max_batch_items = 4096
set_extra_annotations = {"@annotation_setters":"spacy-transformers.null_annotation_setter.v1"}
[components.transformer.model]
@architectures = "spacy-transformers.TransformerModel.v1"
name = "timhbach/Team-Gryffindor-bert-base-finetuned-NER-creditcardcontract"
[components.transformer.model.get_spans]
@span_getters = "spacy-transformers.strided_spans.v1"
window = 64
stride = 48
[components.transformer.model.tokenizer_config]
use_fast = true
[corpora]
[corpora.dev]
@readers = "Gold_ents_Corpus.v1"
file = ${paths.dev}
[corpora.train]
@readers = "Gold_ents_Corpus.v1"
file = ${paths.train}
[training]
seed = ${system.seed}
gpu_allocator = ${system.gpu_allocator}
dropout = 0.1
accumulate_gradient = 1
patience = 1600000
max_epochs = 0
max_steps = 1000
eval_frequency = 100
frozen_components = []
dev_corpus = "corpora.dev"
train_corpus = "corpora.train"
before_to_disk = null
annotating_components = []
[training.batcher]
@batchers = "spacy.batch_by_padded.v1"
discard_oversize = true
size = 2000
buffer = 256
get_length = null
[training.logger]
@loggers = "spacy.ConsoleLogger.v1"
progress_bar = false
[training.optimizer]
@optimizers = "Adam.v1"
beta1 = 0.9
beta2 = 0.999
L2_is_weight_decay = true
L2 = 0.01
grad_clip = 1.0
use_averages = false
eps = 0.00000001
[training.optimizer.learn_rate]
@schedules = "warmup_linear.v1"
warmup_steps = 250
total_steps = 20000
initial_rate = 0.00005
[training.score_weights]
rel_micro_p = 0.0
rel_micro_r = 0.0
rel_micro_f = 1.0
[pretraining]
[initialize]
vectors = ${paths.vectors}
init_tok2vec = ${paths.init_tok2vec}
vocab_data = null
lookups = null
before_init = null
after_init = null
[initialize.components]
[initialize.tokenizer] |