JerryAnto commited on
Commit
f754afe
·
1 Parent(s): 152de0d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -7
app.py CHANGED
@@ -1,9 +1,13 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
-
4
- # In[ ]:
 
 
5
 
 
6
 
 
7
 
8
  from PIL import Image
9
  from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor, PreTrainedTokenizerFast
@@ -15,6 +19,21 @@ vit_feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-pat
15
 
16
  tokenizer = PreTrainedTokenizerFast.from_pretrained("distilgpt2")
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def vit2distilgpt2(img):
20
  pixel_values = vit_feature_extractor(images=img, return_tensors="pt").pixel_values
@@ -23,7 +42,7 @@ def vit2distilgpt2(img):
23
 
24
  return(generated_sentences[0].split('.')[0])
25
 
26
-
27
 
28
  import gradio as gr
29
 
@@ -54,5 +73,4 @@ gr.Interface(
54
  article=article,
55
  examples=examples,
56
  theme="huggingface",
57
- ).launch(debug=True, enable_queue=True)
58
-
 
1
+ # -*- coding: utf-8 -*-
2
+ """Image Captioning with ViT+GPT2
3
+ Automatically generated by Colaboratory.
4
+ Original file is located at
5
+ https://colab.research.google.com/drive/1P3O0gO5AUqSmM8rE9dxy2tXJ-9jkhxHz
6
+ """
7
 
8
+ #! pip install transformers -q
9
 
10
+ #! pip install gradio -q
11
 
12
  from PIL import Image
13
  from transformers import VisionEncoderDecoderModel, ViTFeatureExtractor, PreTrainedTokenizerFast
 
19
 
20
  tokenizer = PreTrainedTokenizerFast.from_pretrained("distilgpt2")
21
 
22
+ # url = 'https://d2gp644kobdlm6.cloudfront.net/wp-content/uploads/2016/06/bigstock-Shocked-and-surprised-boy-on-t-113798588-300x212.jpg'
23
+
24
+ # with Image.open(requests.get(url, stream=True).raw) as img:
25
+ # pixel_values = vit_feature_extractor(images=img, return_tensors="pt").pixel_values
26
+
27
+ #encoder_outputs = model.generate(pixel_values.to('cpu'),num_beams=5)
28
+
29
+ #generated_sentences = tokenizer.batch_decode(encoder_outputs, skip_special_tokens=True)
30
+
31
+ #generated_sentences
32
+
33
+ #naive text processing
34
+ #generated_sentences[0].split('.')[0]
35
+
36
+ # inference function
37
 
38
  def vit2distilgpt2(img):
39
  pixel_values = vit_feature_extractor(images=img, return_tensors="pt").pixel_values
 
42
 
43
  return(generated_sentences[0].split('.')[0])
44
 
45
+ #!wget https://media.glamour.com/photos/5f171c4fd35176eaedb36823/master/w_2560%2Cc_limit/bike.jpg
46
 
47
  import gradio as gr
48
 
 
73
  article=article,
74
  examples=examples,
75
  theme="huggingface",
76
+ ).launch(debug=True, enable_queue=True)