Spaces:

aletrn
/

ai-pronunciation-trainer

Running

ai-pronunciation-trainer / WordMetrics.py

alessandro trinca tornidor

doc: add/update docstring and typing hints

0700cb3 9 months ago

2.43 kB

	import numpy as np


	# ref from https://gitlab.com/-/snippets/1948157
	# For some variants, look here https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#Python

	# Pure/numpy python
	def edit_distance_python2(a: str, b: str) -> np.ndarray \| int:
	"""A pure python levenshtein distance implementation"""
	# This version is commutative, so as an optimization we force \|a\|>=\|b\|
	if len(a) < len(b):
	return edit_distance_python(b, a)
	if len(b) == 0: # Can deal with empty sequences faster
	return len(a)
	# Only two rows are really needed: the one currently filled in, and the previous
	distances = []
	distances.append([i for i in range(len(b) + 1)])
	distances.append([0 for _ in range(len(b) + 1)])
	# We can prefill the first row:
	costs = [0 for _ in range(3)]
	for i, a_token in enumerate(a, start=1):
	distances[1][0] += 1 # Deals with the first column.
	for j, b_token in enumerate(b, start=1):
	costs[0] = distances[1][j - 1] + 1
	costs[1] = distances[0][j] + 1
	costs[2] = distances[0][j - 1] + (0 if a_token == b_token else 1)
	distances[1][j] = min(costs)
	# Move to the next row:
	distances[0][:] = distances[1][:]
	return distances[1][len(b)]


	#https://stackabuse.com/levenshtein-distance-and-text-similarity-in-python/
	def edit_distance_python(seq1: str, seq2: str) -> np.ndarray:
	"""A levenshtein distance implementation.

	Args:
	seq1 (str): First sequence.
	seq2 (str): Second sequence.

	Returns:
	np.ndarray: The levenshtein distance between the two sequences.
	"""
	size_x = len(seq1) + 1
	size_y = len(seq2) + 1
	matrix = np.zeros((size_x, size_y))
	for x in range(size_x):
	matrix[x, 0] = x
	for y in range(size_y):
	matrix[0, y] = y

	for x in range(1, size_x):
	for y in range(1, size_y):
	if seq1[x - 1] == seq2[y - 1]:
	matrix[x, y] = min(
	matrix[x - 1, y] + 1,
	matrix[x - 1, y - 1],
	matrix[x, y - 1] + 1
	)
	else:
	matrix[x, y] = min(
	matrix[x - 1, y] + 1,
	matrix[x - 1, y - 1] + 1,
	matrix[x, y - 1] + 1
	)
	#print (matrix)
	return matrix[size_x - 1, size_y - 1]