File size: 1,317 Bytes
46e0dd0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity


def find_self_closest_match(sim_matrix, word_list):
    '''sim_matrix should be (n,n)'''
    n = sim_matrix.shape[0]
    sim_matrix[range(n), range(n)] = 0
    indices = np.argmax(sim_matrix, axis = -1)
    ret_list = []
    for ind in indices:
        ret_list.append(word_list[ind])
    return ret_list


def find_ref_closest_match(sim_matrix, word_list):
    '''
    sim_matrix should be (n_ref, n_query)
    word_list should be (n_ref,)
    '''
    n_ref, n_query = sim_matrix.shape[0], sim_matrix.shape[1]
    indices = np.argmax(sim_matrix, axis = 0) # similarity matrix, take the maximum
    #print(indices)
    ret_list = []
    for ind in indices:
        ret_list.append(word_list[ind])
    return ret_list

def sort_ref_closest_match(sim_matrix, word_list):
    '''
    sim_matrix should be (n_ref, n_query)
    word_list should be (n_ref,)
    '''
    n_ref, n_query = sim_matrix.shape[0], sim_matrix.shape[1]
    
    indices_list = np.argsort(sim_matrix, axis = 0)[::-1] # descending order
    
    #print(indices_list)
    ret_list = []
    for indices in indices_list:
        word_sorted = []
        for ind in indices:
            word_sorted.append(word_list[ind])
        ret_list.append(word_sorted)
    return ret_list