cris2312 commited on
Commit
ea6b4c0
1 Parent(s): 227b955

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -14
app.py CHANGED
@@ -1,18 +1,22 @@
1
- import torch
2
- from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
3
- from diffusers.utils import export_to_video
4
 
5
- # load pipeline
6
- pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16, variant="fp16")
7
- pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
8
 
9
- # optimize for GPU memory
10
- pipe.enable_model_cpu_offload()
11
- pipe.enable_vae_slicing()
12
 
13
- # generate
14
- prompt = "Spiderman is surfing. Darth Vader is also surfing and following Spiderman"
15
- video_frames = pipe(prompt, num_inference_steps=25, num_frames=200).frames
16
 
17
- # convent to video
18
- video_path = export_to_video(video_frames)
 
 
 
 
 
 
 
1
+ import requests
2
+ from PIL import Image
3
+ from transformers import BlipProcessor, BlipForConditionalGeneration
4
 
5
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
6
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
 
7
 
8
+ img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
9
+ raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
 
10
 
11
+ # conditional image captioning
12
+ text = "a photography of"
13
+ inputs = processor(raw_image, text, return_tensors="pt")
14
 
15
+ out = model.generate(**inputs)
16
+ print(processor.decode(out[0], skip_special_tokens=True))
17
+
18
+ # unconditional image captioning
19
+ inputs = processor(raw_image, return_tensors="pt")
20
+
21
+ out = model.generate(**inputs)
22
+ print(processor.decode(out[0], skip_special_tokens=True))