bistdude commited on
Commit
940e06e
1 Parent(s): e2a2afe

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -0
app.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import GTP2Tokenizer, TrainingArguments, Trainer, GPT2LMHeadModel
2
+ from datasets import load_dataset
3
+
4
+ dataset = load_dataset("sst2")
5
+
6
+ for row in dataset['train']:
7
+ print(row)
8
+
9
+ for i, row in enumerate(dataset):
10
+ prep_text = f"<|startoftext|> {rwo['sentence']}<|pad|>Sentiment: {rwo['label']}<|endoftext|>"
11
+ encodings_dict = tokenizer(prep_txt)
12
+
13
+ tokenizer = GTP2Tokenizer.from_pretrained('gpt2', bos_token='<|startoftext|>', eos_token='<|endoftext|>', pad_token='<|pad|>')
14
+ model = GPT2LMHeadModel.from_pretrained('gpt2')
15
+
16
+ train_args = TrainingArguments(output_dir='results', num_train_epochs = 1, warmup_steps =100, weight_decay = 0.01)
17
+
18
+ Trainer(model='gpt2', args=train_args,train_dataset=train_dataset)
19
+
20
+ model.eval()
21
+
22
+ prompt = f'<|startoftext|>Tweet: {text}\nSentiment:'
23
+ tokenizer_text = tokenizer(prompt, return_tensors="pt").input_ids
24
+ output = model.generate(tokenized_text)
25
+ predicted_text = tokenizer.decode(output)
26
+
27
+
28
+
29
+