askainet commited on
Commit
05460db
1 Parent(s): f18de35

initial commit

Browse files
Files changed (7) hide show
  1. README.md +59 -0
  2. config.json +58 -0
  3. merges.txt +0 -0
  4. pytorch_model.bin +3 -0
  5. tokenizer.json +0 -0
  6. tokenizer_config.json +1 -0
  7. vocab.json +0 -0
README.md CHANGED
@@ -1,3 +1,62 @@
1
  ---
 
 
 
 
2
  license: mit
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ language: en
3
+ datasets:
4
+ - vblagoje/lfqa
5
+ - vblagoje/lfqa_support_docs
6
  license: mit
7
  ---
8
+
9
+ ## Introduction
10
+ See [blog post](https://towardsdatascience.com/long-form-qa-beyond-eli5-an-updated-dataset-and-approach-319cb841aabb) for more details.
11
+
12
+ ## Usage
13
+
14
+ ```python
15
+ import torch
16
+ from transformers import AutoTokenizer, AutoModel, AutoModelForSeq2SeqLM
17
+
18
+ model_name = "vblagoje/bart_lfqa"
19
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
20
+
21
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
22
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
23
+ model = model.to(device)
24
+
25
+ # it all starts with a question/query
26
+ query = "Why does water heated to room temperature feel colder than the air around it?"
27
+
28
+ # given the question above suppose these documents below were found in some document store
29
+ documents = ["when the skin is completely wet. The body continuously loses water by...",
30
+ "at greater pressures. There is an ambiguity, however, as to the meaning of the terms 'heating' and 'cooling'...",
31
+ "are not in a relation of thermal equilibrium, heat will flow from the hotter to the colder, by whatever pathway...",
32
+ "air condition and moving along a line of constant enthalpy toward a state of higher humidity. A simple example ...",
33
+ "Thermal contact conductance In physics, thermal contact conductance is the study of heat conduction between solid ..."]
34
+
35
+ # concatenate question and support documents into BART input
36
+ conditioned_doc = "<P> " + " <P> ".join([d for d in documents])
37
+ query_and_docs = "question: {} context: {}".format(query, conditioned_doc)
38
+
39
+ model_input = tokenizer(query_and_docs, truncation=True, padding=True, return_tensors="pt")
40
+
41
+ generated_answers_encoded = model.generate(input_ids=model_input["input_ids"].to(device),
42
+ attention_mask=model_input["attention_mask"].to(device),
43
+ min_length=64,
44
+ max_length=256,
45
+ do_sample=False,
46
+ early_stopping=True,
47
+ num_beams=8,
48
+ temperature=1.0,
49
+ top_k=None,
50
+ top_p=None,
51
+ eos_token_id=tokenizer.eos_token_id,
52
+ no_repeat_ngram_size=3,
53
+ num_return_sequences=1)
54
+ tokenizer.batch_decode(generated_answers_encoded, skip_special_tokens=True,clean_up_tokenization_spaces=True)
55
+
56
+ # below is the abstractive answer generated by the model
57
+ ["When you heat water to room temperature, it loses heat to the air around it. When you cool it down, it gains heat back from the air, which is why it feels colder than the air surrounding it. It's the same reason why you feel cold when you turn on a fan. The air around you is losing heat, and the water is gaining heat."]
58
+
59
+ ```
60
+
61
+ ## Author
62
+ - Vladimir Blagojevic: `dovlex [at] gmail.com` [Twitter](https://twitter.com/vladblagoje) | [LinkedIn](https://www.linkedin.com/in/blagojevicvladimir/)
config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_num_labels": 3,
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "add_bias_logits": false,
6
+ "add_final_layer_norm": false,
7
+ "architectures": [
8
+ "BartForConditionalGeneration"
9
+ ],
10
+ "attention_dropout": 0.0,
11
+ "bos_token_id": 0,
12
+ "classif_dropout": 0.0,
13
+ "d_model": 1024,
14
+ "decoder_attention_heads": 16,
15
+ "decoder_ffn_dim": 4096,
16
+ "decoder_layerdrop": 0.0,
17
+ "decoder_layers": 12,
18
+ "decoder_start_token_id": 2,
19
+ "dropout": 0.1,
20
+ "encoder_attention_heads": 16,
21
+ "encoder_ffn_dim": 4096,
22
+ "encoder_layerdrop": 0.0,
23
+ "encoder_layers": 12,
24
+ "eos_token_id": 2,
25
+ "id2label": {
26
+ "0": "LABEL_0",
27
+ "1": "LABEL_1",
28
+ "2": "LABEL_2"
29
+ },
30
+ "init_std": 0.02,
31
+ "is_encoder_decoder": true,
32
+ "label2id": {
33
+ "LABEL_0": 0,
34
+ "LABEL_1": 1,
35
+ "LABEL_2": 2
36
+ },
37
+ "max_position_embeddings": 1024,
38
+ "model_type": "bart",
39
+ "normalize_before": false,
40
+ "normalize_embedding": true,
41
+ "num_hidden_layers": 12,
42
+ "output_past": false,
43
+ "pad_token_id": 1,
44
+ "prefix": " ",
45
+ "scale_embedding": false,
46
+ "static_position_embeddings": false,
47
+ "task_specific_params": {
48
+ "summarization": {
49
+ "early_stopping": true,
50
+ "length_penalty": 2.0,
51
+ "max_length": 142,
52
+ "min_length": 56,
53
+ "no_repeat_ngram_size": 3,
54
+ "num_beams": 4
55
+ }
56
+ },
57
+ "vocab_size": 50265
58
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75eaab4cbd1dac20d21abb3ed2be6464a761983b79aad307ac38c39e7b22296b
3
+ size 1625557313
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_max_length": 1024}
vocab.json ADDED
The diff for this file is too large to render. See raw diff