# source: https://github.com/openai/CLIP/blob/main/notebooks/Prompt_Engineering_for_ImageNet.ipynb | |
IMAGENET_TEMPLATES = [ | |
'a bad photo of a {}.', | |
'a photo of many {}.', | |
'a sculpture of a {}.', | |
'a photo of the hard to see {}.', | |
'a low resolution photo of the {}.', | |
'a rendering of a {}.', | |
'graffiti of a {}.', | |
'a bad photo of the {}.', | |
'a cropped photo of the {}.', | |
'a tattoo of a {}.', | |
'the embroidered {}.', | |
'a photo of a hard to see {}.', | |
'a bright photo of a {}.', | |
'a photo of a clean {}.', | |
'a photo of a dirty {}.', | |
'a dark photo of the {}.', | |
'a drawing of a {}.', | |
'a photo of my {}.', | |
'the plastic {}.', | |
'a photo of the cool {}.', | |
'a close-up photo of a {}.', | |
'a black and white photo of the {}.', | |
'a painting of the {}.', | |
'a painting of a {}.', | |
'a pixelated photo of the {}.', | |
'a sculpture of the {}.', | |
'a bright photo of the {}.', | |
'a cropped photo of a {}.', | |
'a plastic {}.', | |
'a photo of the dirty {}.', | |
'a jpeg corrupted photo of a {}.', | |
'a blurry photo of the {}.', | |
'a photo of the {}.', | |
'a good photo of the {}.', | |
'a rendering of the {}.', | |
'a {} in a video game.', | |
'a photo of one {}.', | |
'a doodle of a {}.', | |
'a close-up photo of the {}.', | |
'a photo of a {}.', | |
'the origami {}.', | |
'the {} in a video game.', | |
'a sketch of a {}.', | |
'a doodle of the {}.', | |
'a origami {}.', | |
'a low resolution photo of a {}.', | |
'the toy {}.', | |
'a rendition of the {}.', | |
'a photo of the clean {}.', | |
'a photo of a large {}.', | |
'a rendition of a {}.', | |
'a photo of a nice {}.', | |
'a photo of a weird {}.', | |
'a blurry photo of a {}.', | |
'a cartoon {}.', | |
'art of a {}.', | |
'a sketch of the {}.', | |
'a embroidered {}.', | |
'a pixelated photo of a {}.', | |
'itap of the {}.', | |
'a jpeg corrupted photo of the {}.', | |
'a good photo of a {}.', | |
'a plushie {}.', | |
'a photo of the nice {}.', | |
'a photo of the small {}.', | |
'a photo of the weird {}.', | |
'the cartoon {}.', | |
'art of the {}.', | |
'a drawing of the {}.', | |
'a photo of the large {}.', | |
'a black and white photo of a {}.', | |
'the plushie {}.', | |
'a dark photo of a {}.', | |
'itap of a {}.', | |
'graffiti of the {}.', | |
'a toy {}.', | |
'itap of my {}.', | |
'a photo of a cool {}.', | |
'a photo of a small {}.', | |
'a tattoo of the {}.', | |
# 'A photo of a {} in the scene.', | |
] | |
# v1: 59.0875 | |
IMAGENET_TEMPLATES_SELECT = [ | |
'itap of a {}.', | |
'a bad photo of the {}.', | |
'a origami {}.', | |
'a photo of the large {}.', | |
'a {} in a video game.', | |
'art of the {}.', | |
'a photo of the small {}.', | |
'A photo of a {} in the scene', | |
] | |
# v2: 58.2584 | |
# IMAGENET_TEMPLATES_SELECT = [ | |
# 'itap of a {}', | |
# 'a bad photo of the {}', | |
# 'a origami {}', | |
# 'a photo of the large {}', | |
# 'art of the {}', | |
# 'a photo of the small {}', | |
# 'A photo of a {} in the scene', | |
# ] | |
# v3: 59.1006 | |
# IMAGENET_TEMPLATES_SELECT = [ | |
# 'itap of a {}.', | |
# 'a bad photo of the {}.', | |
# 'a origami {}.', | |
# 'a photo of the large {}.', | |
# 'art of the {}.', | |
# 'a photo of the small {}.', | |
# 'a cropped photo of a {}.', | |
# 'A photo of a {} in the scene', | |
# 'itap of a {} in the scene', | |
# 'a bad photo of the {} in the scene', | |
# 'a origami {} in the scene', | |
# 'a photo of the large {} in the scene', | |
# 'art of the {} in the scene', | |
# 'a photo of the small {} in the scene', | |
# 'a cropped photo of a {} in the scene', | |
# ] | |
# v4: 59.8659 | |
# IMAGENET_TEMPLATES_SELECT = [ | |
# 'a bad photo of the {}.', | |
# 'a photo of the large {}.', | |
# 'art of the {}.', | |
# 'a photo of the small {}.', | |
# 'a cropped photo of a {}.', | |
# 'A photo of a {} in the scene', | |
# 'a bad photo of the {} in the scene', | |
# 'a photo of the large {} in the scene', | |
# 'art of the {} in the scene', | |
# 'a photo of the small {} in the scene', | |
# 'a cropped photo of a {} in the scene', | |
# 'a photo of a masked {} in the scene', | |
# ] | |
# v5: 59.9346 | |
# IMAGENET_TEMPLATES_SELECT = [ | |
# 'a bad photo of the {}.', | |
# 'a photo of the large {}.', | |
# 'art of the {}.', | |
# 'a photo of the small {}.', | |
# 'a cropped photo of a {}.', | |
# 'This is a photo of a {}', | |
# 'This is a photo of a small {}', | |
# 'This is a photo of a medium {}', | |
# 'This is a photo of a large {}', | |
# 'A photo of a {} in the scene', | |
# 'a bad photo of the {} in the scene', | |
# 'a photo of the large {} in the scene', | |
# 'art of the {} in the scene', | |
# 'a photo of the small {} in the scene', | |
# 'a cropped photo of a {} in the scene', | |
# 'a photo of a masked {} in the scene', | |
# 'There is a {} in the scene', | |
# 'There is the {} in the scene', | |
# 'This is a {} in the scene', | |
# 'This is the {} in the scene', | |
# 'This is one {} in the scene', | |
# ] | |
# v6: 60.6611 | |
# IMAGENET_TEMPLATES_SELECT = [ | |
# 'a bad photo of the {}.', | |
# 'a photo of the large {}.', | |
# 'art of the {}.', | |
# 'a photo of the small {}.', | |
# 'a cropped photo of a {}.', | |
# 'This is a photo of a {}', | |
# 'This is a photo of a small {}', | |
# 'This is a photo of a medium {}', | |
# 'This is a photo of a large {}', | |
# 'A photo of a {} in the scene', | |
# 'a bad photo of the {} in the scene', | |
# 'a photo of the large {} in the scene', | |
# 'art of the {} in the scene', | |
# 'a photo of the small {} in the scene', | |
# 'a cropped photo of a {} in the scene', | |
# 'a photo of a masked {} in the scene', | |
# 'There is a {} in the scene', | |
# 'There is the {} in the scene', | |
# 'This is a {} in the scene', | |
# 'This is the {} in the scene', | |
# 'This is one {} in the scene', | |
# | |
# 'There is a masked {} in the scene', | |
# 'There is the masked {} in the scene', | |
# 'This is a masked {} in the scene', | |
# 'This is the masked {} in the scene', | |
# 'This is one masked {} in the scene', | |
# ] | |
# v7: 60.4529 | |
# IMAGENET_TEMPLATES_SELECT = [ | |
# 'a bad photo of the {}.', | |
# 'a photo of the large {}.', | |
# 'art of the {}.', | |
# 'a photo of the small {}.', | |
# 'a cropped photo of a {}.', | |
# 'This is a photo of a {}', | |
# 'This is a photo of a small {}', | |
# 'This is a photo of a medium {}', | |
# 'This is a photo of a large {}', | |
# 'A photo of a {} in the scene', | |
# 'a bad photo of the {} in the scene', | |
# 'a photo of the large {} in the scene', | |
# 'art of the {} in the scene', | |
# 'a photo of the small {} in the scene', | |
# 'a cropped photo of a {} in the scene', | |
# 'a photo of a masked {} in the scene', | |
# 'There is a {} in the scene', | |
# 'There is the {} in the scene', | |
# 'This is a {} in the scene', | |
# 'This is the {} in the scene', | |
# 'This is one {} in the scene', | |
# | |
# 'There is a cropped {} in the scene', | |
# 'There is the cropped {} in the scene', | |
# 'This is a cropped {} in the scene', | |
# 'This is the cropped {} in the scene', | |
# 'This is one cropped {} in the scene', | |
# | |
# 'a cropped photo of the {}', | |
# 'a cropped photo of a {}', | |
# 'a cropped photo of one {}', | |
# | |
# 'There is a masked {} in the scene', | |
# 'There is the masked {} in the scene', | |
# 'This is a masked {} in the scene', | |
# 'This is the masked {} in the scene', | |
# 'This is one masked {} in the scene', | |
# ] | |
# v8: 60.7057 | |
# IMAGENET_TEMPLATES_SELECT = [ | |
# 'a bad photo of the {}.', | |
# 'a photo of the large {}.', | |
# 'a photo of the small {}.', | |
# 'a cropped photo of a {}.', | |
# 'This is a photo of a {}', | |
# 'This is a photo of a small {}', | |
# 'This is a photo of a medium {}', | |
# 'This is a photo of a large {}', | |
# | |
# 'This is a masked photo of a {}', | |
# 'This is a masked photo of a small {}', | |
# 'This is a masked photo of a medium {}', | |
# 'This is a masked photo of a large {}', | |
# | |
# 'A photo of a {} in the scene', | |
# 'a bad photo of the {} in the scene', | |
# 'a photo of the large {} in the scene', | |
# 'a photo of the small {} in the scene', | |
# 'a cropped photo of a {} in the scene', | |
# 'a photo of a masked {} in the scene', | |
# 'There is a {} in the scene', | |
# 'There is the {} in the scene', | |
# 'This is a {} in the scene', | |
# 'This is the {} in the scene', | |
# 'This is one {} in the scene', | |
# | |
# 'There is a masked {} in the scene', | |
# 'There is the masked {} in the scene', | |
# 'This is a masked {} in the scene', | |
# 'This is the masked {} in the scene', | |
# 'This is one masked {} in the scene', | |
# ] | |
# v9: 60.8775 | |
# IMAGENET_TEMPLATES_SELECT = [ | |
# 'a bad photo of the {}.', | |
# 'a photo of the large {}.', | |
# 'a photo of the small {}.', | |
# 'a cropped photo of a {}.', | |
# 'This is a photo of a {}', | |
# 'This is a photo of a small {}', | |
# 'This is a photo of a medium {}', | |
# 'This is a photo of a large {}', | |
# | |
# 'This is a masked photo of a {}', | |
# 'This is a masked photo of a small {}', | |
# 'This is a masked photo of a medium {}', | |
# 'This is a masked photo of a large {}', | |
# | |
# 'This is a cropped photo of a {}', | |
# 'This is a cropped photo of a small {}', | |
# 'This is a cropped photo of a medium {}', | |
# 'This is a cropped photo of a large {}', | |
# | |
# 'A photo of a {} in the scene', | |
# 'a bad photo of the {} in the scene', | |
# 'a photo of the large {} in the scene', | |
# 'a photo of the small {} in the scene', | |
# 'a cropped photo of a {} in the scene', | |
# 'a photo of a masked {} in the scene', | |
# 'There is a {} in the scene', | |
# 'There is the {} in the scene', | |
# 'This is a {} in the scene', | |
# 'This is the {} in the scene', | |
# 'This is one {} in the scene', | |
# | |
# 'There is a masked {} in the scene', | |
# 'There is the masked {} in the scene', | |
# 'This is a masked {} in the scene', | |
# 'This is the masked {} in the scene', | |
# 'This is one masked {} in the scene', | |
# ] | |
# v9 | |
IMAGENET_TEMPLATES_SELECT_CLIP = [ | |
'a bad photo of the {}.', | |
'a photo of the large {}.', | |
'a photo of the small {}.', | |
'a cropped photo of a {}.', | |
'This is a photo of a {}', | |
'This is a photo of a small {}', | |
'This is a photo of a medium {}', | |
'This is a photo of a large {}', | |
'This is a masked photo of a {}', | |
'This is a masked photo of a small {}', | |
'This is a masked photo of a medium {}', | |
'This is a masked photo of a large {}', | |
'This is a cropped photo of a {}', | |
'This is a cropped photo of a small {}', | |
'This is a cropped photo of a medium {}', | |
'This is a cropped photo of a large {}', | |
'A photo of a {} in the scene', | |
'a bad photo of the {} in the scene', | |
'a photo of the large {} in the scene', | |
'a photo of the small {} in the scene', | |
'a cropped photo of a {} in the scene', | |
'a photo of a masked {} in the scene', | |
'There is a {} in the scene', | |
'There is the {} in the scene', | |
'This is a {} in the scene', | |
'This is the {} in the scene', | |
'This is one {} in the scene', | |
'There is a masked {} in the scene', | |
'There is the masked {} in the scene', | |
'This is a masked {} in the scene', | |
'This is the masked {} in the scene', | |
'This is one masked {} in the scene', | |
] | |
# v10, for comparison | |
# IMAGENET_TEMPLATES_SELECT_CLIP = [ | |
# 'a photo of a {}.', | |
# | |
# 'This is a photo of a {}', | |
# 'This is a photo of a small {}', | |
# 'This is a photo of a medium {}', | |
# 'This is a photo of a large {}', | |
# | |
# 'This is a photo of a {}', | |
# 'This is a photo of a small {}', | |
# 'This is a photo of a medium {}', | |
# 'This is a photo of a large {}', | |
# | |
# 'a photo of a {} in the scene', | |
# 'a photo of a {} in the scene', | |
# | |
# 'There is a {} in the scene', | |
# 'There is the {} in the scene', | |
# 'This is a {} in the scene', | |
# 'This is the {} in the scene', | |
# 'This is one {} in the scene', | |
# ] | |
ViLD_templates = [ | |
'There is {article} {category} in the scene.', | |
'There is the {category} in the scene.', | |
'a photo of {article} {category} in the scene.', | |
'a photo of the {category} in the scene.', | |
'a photo of one {category} in the scene.', | |
'itap of {article} {category}.', | |
'itap of my {category}.', | |
'itap of the {category}.', | |
'a photo of {article} {category}.', | |
'a photo of my {category}.', | |
'a photo of the {category}.', | |
'a photo of one {category}.', | |
'a photo of many {category}.', | |
'a good photo of {article} {category}.', | |
'a good photo of the {category}.', | |
'a bad photo of {article} {category}.', | |
'a bad photo of the {category}.', | |
'a photo of a nice {category}.', | |
'a photo of the nice {category}.', | |
'a photo of a cool {category}.', | |
'a photo of the cool {category}.', | |
'a photo of a weird {category}.', | |
'a photo of the weird {category}.', | |
'a photo of a small {category}.', | |
'a photo of the small {category}.', | |
'a photo of a large {category}.', | |
'a photo of the large {category}.', | |
'a photo of a clean {category}.', | |
'a photo of the clean {category}.', | |
'a photo of a dirty {category}.', | |
'a photo of the dirty {category}.', | |
'a bright photo of {article} {category}.', | |
'a bright photo of the {category}.', | |
'a dark photo of {article} {category}.', | |
'a dark photo of the {category}.', | |
'a photo of a hard to see {category}.', | |
'a photo of the hard to see {category}.', | |
'a low resolution photo of {article} {category}.', | |
'a low resolution photo of the {category}.', | |
'a cropped photo of {article} {category}.', | |
'a cropped photo of the {category}.', | |
'a close-up photo of {article} {category}.', | |
'a close-up photo of the {category}.', | |
'a jpeg corrupted photo of {article} {category}.', | |
'a jpeg corrupted photo of the {category}.', | |
'a blurry photo of {article} {category}.', | |
'a blurry photo of the {category}.', | |
'a pixelated photo of {article} {category}.', | |
'a pixelated photo of the {category}.', | |
'a black and white photo of the {category}.', | |
'a black and white photo of {article} {category}.', | |
'a plastic {category}.', | |
'the plastic {category}.', | |
'a toy {category}.', | |
'the toy {category}.', | |
'a plushie {category}.', | |
'the plushie {category}.', | |
'a cartoon {category}.', | |
'the cartoon {category}.', | |
'an embroidered {category}.', | |
'the embroidered {category}.', | |
'a painting of the {category}.', | |
'a painting of a {category}.' | |
] |