import re import pandas as pd from numpy import dot from numpy.linalg import norm from body_shape_lookup import body_shape_lookup BODY_SHAPE_MEASURES = "body_shape_measures_normalised_updated.csv" # selecting specific features RATIOS_TO_USE = ['shoulder_to_hip_distance', 'hip_to_ankle_distance', 'thigh_to_torso_ratio_normalised', 'upper_to_lower_torso_normalised_ratio', 'shoulder_to_hip_ratio', 'thigh_to_body_ratio', 'upper_torso_to_body_ratio'] def extract_digits(input_string): # find digits in the format '1A' or '12B' match = re.search(r'\d+', input_string) if match: return int(match.group()) else: return -1 # not found def is_match(row): # check whether there was a match for this record # extract the user class from id ground_truth = extract_digits(row['Volunteer_ID']) return ground_truth == row['Rank_1_Body_Shape'] or ground_truth == row['Rank_2_Body_Shape'] or ground_truth == row['Rank_3_Body_Shape'] def select_body_shape(normalised_body_shape_measures): # load the body shape measures body_shape_df = pd.read_csv(BODY_SHAPE_MEASURES) # load the calculated measures. volunteers_df = normalised_body_shape_measures # select only the columns corresponding to the ratios body_shape_ratios = body_shape_df[RATIOS_TO_USE] # calculate euclidean distance for each volunteer for index, volunteer_row in volunteers_df.iterrows(): print(f"\nProcessing volunteer {volunteer_row['id']}") volunteer_ratios = volunteer_row[RATIOS_TO_USE] top_scores = [(-1000, 'n/a')] * 3 for body_index, body_shape_row in body_shape_ratios.iterrows(): # euclidean distance # similarity = np.linalg.norm(volunteer_ratios - body_shape_row) # calculate cosine similarity similarity = dot(volunteer_ratios, body_shape_row) / (norm(volunteer_ratios)*norm(body_shape_row)) # Check if the current score is among the top 3 for i, (score, _) in enumerate(top_scores): if similarity > score: top_scores.insert(i, (similarity, body_index + 1)) top_scores = top_scores[:3] break print(f"Volunteer {volunteer_row['id']} (body shape {body_index + 1}) Similarity:\t{similarity:.3f}") # Print the top 3 best body shapes and scores for the current volunteer print(f"Volunteer {volunteer_row['id']} top 3 body shapes and scores are:") for i, (score, body_shape) in enumerate(top_scores): print(f"Rank {i + 1}: Body Shape {body_shape} with score {score:.3f}") body_shape_index = top_scores[0][1] return body_shape_lookup(body_shape_index)