Nice work :)
#1
by
Bailey24
- opened
I love your work
Could you please tell me how to speed up the predict
time?
# predict
with torch.no_grad():
outputs = model(**inputs)
I mean how can I use the GPU to predict to speed it up?
You can move both the model and inputs on the GPU, like so:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
inputs = {k:v.to(device) for k,v for inputs.items()}
# predict
with torch.no_grad():
outputs = model(**inputs)
Thanks a lot.
I preprocess the prompts and image, but I got the error.
ValueError Traceback (most recent call last)
<ipython-input-59-6c4240d4c8a3> in <module>
11 # prompts = np.array(prompts)
12 # prompts = torch.from_numpy(prompts)
---> 13 inputs = processor(text=prompts, images=[image] * len(prompts), padding="max_length", return_tensors="pt")
2 frames
/usr/local/lib/python3.8/dist-packages/transformers/models/clipseg/processing_clipseg.py in __call__(self, text, images, return_tensors, **kwargs)
81
82 if text is not None:
---> 83 encoding = self.tokenizer(text, return_tensors=return_tensors, **kwargs)
84
85 if images is not None:
/usr/local/lib/python3.8/dist-packages/transformers/tokenization_utils_base.py in __call__(self, text, text_pair, text_target, text_pair_target, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)
2518 if not self._in_target_context_manager:
2519 self._switch_to_input_mode()
-> 2520 encodings = self._call_one(text=text, text_pair=text_pair, **all_kwargs)
2521 if text_target is not None:
2522 self._switch_to_target_mode()
/usr/local/lib/python3.8/dist-packages/transformers/tokenization_utils_base.py in _call_one(self, text, text_pair, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)
2576
2577 if not _is_valid_text_input(text):
-> 2578 raise ValueError(
2579 "text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) "
2580 "or `List[List[str]]` (batch of pretokenized examples)."
ValueError: text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).
Here is my code.
from transformers import CLIPTokenizer
tokenizer = CLIPTokenizer.from_pretrained("CIDAS/clipseg-rd64-refined")
prompts = ["orange", "violet", "green", "black"]
prompts = tokenizer(prompts, padding=True, return_tensors="pt")
from torchvision import transforms
to_tensor = transforms.ToTensor()
image = to_tensor(image)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
prompts.to(device)
image.to(device)
inputs = processor(text=prompts, images=[image] * len(prompts), padding="max_length", return_tensors="pt")
Then I got the error.
Could you please help me?
Hi, I'm an NLP newer, so I didn't know how to input prompts
into processor
.
Because I think the processor
requires the string list, but I want to use GPU, the prompts
have to become the tensor
. I'm confused about it.
Could you please help me?
I see.
Doesn't it as follow?
inputs = processor(text=prompts, images=[image] * len(prompts), padding="max_length", return_tensors="pt")
inputs.to(device)
with torch.no_grad():
outputs = model(**inputs)
Because it speed up.