Spaces:

MarkdenOuden
/

Ancient_Greek_Word2Vec

Runtime error

App Files Files Community

Ancient_Greek_Word2Vec / word2vec.py

Mark7549

Add models to repo

5f3cfd3 7 months ago

raw

history blame

2.11 kB

	from gensim.models import Word2Vec
	from collections import defaultdict

	def load_word2vec_model(model_path):
	'''
	Load a word2vec model from a file
	'''
	return Word2Vec.load(model_path)


	def get_word_vector(model, word):
	'''
	Return the word vector of a word
	'''
	return model.wv[word]


	def iterate_over_words(model):
	'''
	Iterate over all words in the vocabulary and print their vectors
	'''
	index = 0
	for word, index in model.wv.key_to_index.items():
	vector = get_word_vector(model, word)
	print(f'{index} Word: {word}, Vector: {vector}')
	index += 1


	def model_dictionary(model):
	'''
	Return the dictionary of the word2vec model
	Key is the word and value is the vector of the word
	'''
	dict = defaultdict(list)
	for word, index in model.wv.key_to_index.items():
	vector = get_word_vector(model, word)
	dict[word] = vector

	return dict


	def dot_product(vector_a, vector_b):
	'''
	Return the dot product of two vectors
	'''
	return sum(a * b for a, b in zip(vector_a, vector_b))


	def magnitude(vector):
	'''
	Return the magnitude of a vector
	'''
	return sum(x2 for x in vector) 0.5


	def cosine_similarity(vector_a, vector_b):
	'''
	Return the cosine similarity of two vectors
	'''
	dot_prod = dot_product(vector_a, vector_b)
	mag_a = magnitude(vector_a)
	mag_b = magnitude(vector_b)

	# Avoid division by zero
	if mag_a == 0 or mag_b == 0:
	return 0.0

	similarity = dot_prod / (mag_a * mag_b)
	return similarity


	def main():
	model = load_word2vec_model('../models/archaic_cbow.model')
	archaic_cbow_dict = model_dictionary(model)

	score = cosine_similarity(archaic_cbow_dict['Πελοπόννησος'], archaic_cbow_dict['σπάργανον'])
	print(score)

	# vector = get_word_vector(model, 'ἀνήρ')
	# print(vector)

	# Iterate over all words and print their vectors
	# iterate_over_words(model)


	if __name__ == "__main__":
	main()