local env = import "../env.jsonnet"; local dataset_path = env.str("DATA_PATH", "data/ace/events"); local ontology_path = "data/ace/ontology.tsv"; local debug = false; # embedding local label_dim = 64; local pretrained_model = env.str("ENCODER", "roberta-large"); # module local dropout = 0.2; local bio_dim = 512; local bio_layers = 2; local span_typing_dims = [256, 256]; local event_smoothing_factor = env.json("SMOOTHING", "0.0"); local arg_smoothing_factor = env.json("SMOOTHING", "0.0"); local layer_fix = 0; # training local typing_loss_factor = 8.0; local grad_acc = env.json("GRAD_ACC", "1"); local max_training_tokens = 512; local max_inference_tokens = 1024; local lr = env.json("LR", "1e-3"); local cuda_devices = env.json("CUDA_DEVICES", "[0]"); { dataset_reader: { type: "concrete", debug: debug, pretrained_model: pretrained_model, ignore_label: false, [ if debug then "max_instances" ]: 128, event_smoothing_factor: event_smoothing_factor, arg_smoothing_factor: event_smoothing_factor, }, train_data_path: dataset_path + "/train.tar.gz", validation_data_path: dataset_path + "/dev.tar.gz", test_data_path: dataset_path + "/test.tar.gz", datasets_for_vocab_creation: ["train"], data_loader: { batch_sampler: { type: "max_tokens_sampler", max_tokens: max_training_tokens, sorting_keys: ['tokens'] } }, validation_data_loader: { batch_sampler: { type: "max_tokens_sampler", max_tokens: max_inference_tokens, sorting_keys: ['tokens'] } }, model: { type: "span", word_embedding: { token_embedders: { "pieces": { type: "pretrained_transformer", model_name: pretrained_model, } }, }, span_extractor: { type: 'combo', sub_extractors: [ { type: 'self_attentive', }, { type: 'bidirectional_endpoint', } ] }, span_finder: { type: "bio", bio_encoder: { type: "lstm", hidden_size: bio_dim, num_layers: bio_layers, bidirectional: true, dropout: dropout, }, no_label: false, }, span_typing: { type: 'mlp', hidden_dims: span_typing_dims, }, metrics: [{type: "srl"}], ontology_path: ontology_path, typing_loss_factor: typing_loss_factor, label_dim: label_dim, max_decoding_spans: 128, max_recursion_depth: 2, debug: debug, }, trainer: { num_epochs: 128, patience: null, [if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0], validation_metric: "+arg-c_f", num_gradient_accumulation_steps: grad_acc, optimizer: { type: "transformer", base: { type: "adam", lr: lr, }, embeddings_lr: 0.0, encoder_lr: 1e-5, pooler_lr: 1e-5, layer_fix: layer_fix, } }, cuda_devices:: cuda_devices, [if std.length(cuda_devices) > 1 then "distributed"]: { "cuda_devices": cuda_devices }, [if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true, }