File size: 2,123 Bytes
404d2af 8b973ee 404d2af 8b973ee 404d2af 8b973ee 404d2af 8b973ee 404d2af 8b973ee 404d2af 8b973ee 404d2af 8b973ee 404d2af 8b973ee 404d2af 8b973ee 404d2af 8b973ee 404d2af 8b973ee 404d2af 8b973ee 404d2af 8b973ee 404d2af 8b973ee 404d2af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# Copyright 2019-present NAVER Corp.
# CC BY-NC-SA 3.0
# Available only for non-commercial use
import os, pdb
from tqdm import trange
from .dataset import Dataset
class RandomWebImages(Dataset):
"""1 million distractors from Oxford and Paris Revisited
see http://ptak.felk.cvut.cz/revisitop/revisitop1m/
"""
def __init__(self, start=0, end=1024, root="data/revisitop1m"):
Dataset.__init__(self)
self.root = root
bar = None
self.imgs = []
for i in range(start, end):
try:
# read cached list
img_list_path = os.path.join(self.root, "image_list_%d.txt" % i)
cached_imgs = [e.strip() for e in open(img_list_path)]
assert cached_imgs, f"Cache '{img_list_path}' is empty!"
self.imgs += cached_imgs
except IOError:
if bar is None:
bar = trange(start, 4 * end, desc="Caching")
bar.update(4 * i)
# create it
imgs = []
for d in range(
i * 4, (i + 1) * 4
): # 4096 folders in total, on average 256 each
key = hex(d)[2:].zfill(3)
folder = os.path.join(self.root, key)
if not os.path.isdir(folder):
continue
imgs += [f for f in os.listdir(folder) if verify_img(folder, f)]
bar.update(1)
assert imgs, f"No images found in {folder}/"
open(img_list_path, "w").write("\n".join(imgs))
self.imgs += imgs
if bar:
bar.update(bar.total - bar.n)
self.nimg = len(self.imgs)
def get_key(self, i):
key = self.imgs[i]
return os.path.join(key[:3], key)
def verify_img(folder, f):
path = os.path.join(folder, f)
if not f.endswith(".jpg"):
return False
try:
from PIL import Image
Image.open(path).convert("RGB") # try to open it
return True
except:
return False
|