john commited on
Commit
730ad9d
·
1 Parent(s): b2f107f

learn how to training

Browse files
Files changed (2) hide show
  1. app.py +45 -8
  2. requirements.txt +3 -1
app.py CHANGED
@@ -1,15 +1,52 @@
1
  import gradio as gr
2
  from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
5
 
6
- def predict(image):
7
- predictions = pipeline(image)
8
- return {p["label"]: p["score"] for p in predictions}
9
 
10
  gr.Interface(
11
- predict,
12
- inputs=gr.inputs.Image(label="Upload hot dog candidate", type="filepath"),
13
- outputs=gr.outputs.Label(num_top_classes=2),
14
- title="Hot Dog? Or Not?",
15
  ).launch()
 
1
  import gradio as gr
2
  from transformers import pipeline
3
+ from transformers import AutoTokenizer
4
+ from datasets import load_dataset
5
+ from transformers import DataCollatorWithPadding
6
+
7
+ raw_datasets = load_dataset("glue", "sst2")
8
+ raw_datasets
9
+ checkpoint = "bert-base-uncased"
10
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
11
+ def tokenize_function(example):
12
+ return tokenizer(example["sentence"], truncation=True)
13
+
14
+ tokenized_datasets = raw_datasets.map(tokenize_function, batched=True,remove_columns=['idx','sentence'])
15
+ tokenized_datasets
16
+
17
+
18
+ data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
19
+
20
+ from transformers import TrainingArguments
21
+ from transformers import AutoModelForSequenceClassification
22
+ from datasets import load_metric
23
+ from transformers import Trainer
24
+ import numpy as np
25
+
26
+ training_args = TrainingArguments("test-trainer", evaluation_strategy="epoch")# ѵ����Ҫ�IJ�����Ĭ�ϵ�
27
+ model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
28
+
29
+ def compute_metrics(eval_preds):
30
+ metric = load_metric("glue", "sst2")
31
+ logits, labels = eval_preds
32
+ predictions = np.argmax(logits, axis=-1)
33
+ return metric.compute(predictions=predictions, references=labels)
34
+
35
+ trainer = Trainer(
36
+ model,
37
+ training_args,
38
+ train_dataset=tokenized_datasets["train"],
39
+ eval_dataset=tokenized_datasets["validation"],
40
+ data_collator=data_collator,
41
+ tokenizer=tokenizer,
42
+ compute_metrics=compute_metrics,
43
+ )
44
 
 
45
 
 
 
 
46
 
47
  gr.Interface(
48
+ fn=trainer.train,
49
+ NONE,
50
+ NONE,
51
+ title="test",
52
  ).launch()
requirements.txt CHANGED
@@ -1,2 +1,4 @@
1
  transformers
2
- torch
 
 
 
1
  transformers
2
+ streamlit
3
+ torch
4
+ datasets