File size: 3,362 Bytes
c4c7cee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import os
import numpy as np
import reverse_geocoder


def get_loc(x):
    location = reverse_geocoder.search(x[0].tolist())[0]
    country = location.get("cc", "")
    region = location.get("admin1", "")
    sub_region = location.get("admin2", "")
    city = location.get("name", "")

    a = country if country != "" else None
    b, c, d = None, None, None
    if a is not None:
        b = country + "," + region if region != "" else None
        if b is not None:
            c = country + "," + region + "," + sub_region if sub_region != "" else None
            d = (
                country + "," + region + "," + sub_region + "," + city
                if city != ""
                else None
            )

    return a, b, c, d


def get_match_values(pred, gt, N, pos):
    xa, xb, xc, xd = get_loc(gt)
    ya, yb, yc, yd = get_loc(pred)

    if xa is not None:
        N["country"] += 1
        if xa == ya:
            pos["country"] += 1
        if xb is not None:
            N["region"] += 1
            if xb == yb:
                pos["region"] += 1
            if xc is not None:
                N["sub-region"] += 1
                if xc == yc:
                    pos["sub-region"] += 1
            if xd is not None:
                N["city"] += 1
                if xd == yd:
                    pos["city"] += 1


def compute_print_accuracy(N, pos):
    for k in N.keys():
        pos[k] /= N[k]

    # pretty-print accuracy in percentage with 2 floating points
    print(
        f'Accuracy: {pos["country"]*100.0:.2f} (country), {pos["region"]*100.0:.2f} (region), {pos["sub-region"]*100.0:.2f} (sub-region), {pos["city"]*100.0:.2f} (city)'
    )
    print(
        f'Haversine: {pos["haversine"]:.2f} (haversine), {pos["geoguessr"]:.2f} (geoguessr)'
    )


def get_filenames(idx):
    from autofaiss import build_index

    path = join(args.features_parent, f"features-{idx}/")
    files = [f for f in os.listdir(path)]
    full_files = [join(path, f) for f in os.listdir(path)]
    index = build_index(
        embeddings=np.concatenate([np.load(f) for f in tqdm(full_files)], axis=0),
        nb_cores=12,
        save_on_disk=False,
    )[0]
    return index, files


def normalize(x):
    lat, lon = x[:, 0], x[:, 1]
    """Used to put all lat lon inside ±90 and ±180."""
    lat = (lat + 90) % 360 - 90
    if lat > 90:
        lat = 180 - lat
        lon += 180
    lon = (lon + 180) % 360 - 180
    return np.stack([lat, lon], axis=1)


def haversine(pred, gt, N, p):
    # expects inputs to be np arrays in (lat, lon) format as radians
    # N x 2
    pred = np.radians(normalize(pred))
    gt = np.radians(normalize(gt))

    # calculate the difference in latitude and longitude between the predicted and ground truth points
    lat_diff = pred[:, 0] - gt[:, 0]
    lon_diff = pred[:, 1] - gt[:, 1]

    # calculate the haversine formula components
    lhs = np.sin(lat_diff / 2) ** 2
    rhs = np.cos(pred[:, 0]) * np.cos(gt[:, 0]) * np.sin(lon_diff / 2) ** 2
    a = lhs + rhs

    # calculate the final distance using the haversine formula
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

    haversine_distance = 6371 * c[0]
    geoguessr_sum = 5000 * np.exp(-haversine_distance / 1492.7)

    N["geoguessr"] += 1
    p["geoguessr"] += geoguessr_sum

    N["haversine"] += 1
    p["haversine"] += haversine_distance