Spaces:
Runtime error
Runtime error
File size: 3,028 Bytes
c9ec478 c205b8f c9ec478 2c8cacd c9ec478 c205b8f c9ec478 c205b8f c9ec478 c205b8f c9ec478 c205b8f c9ec478 c205b8f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import re
import pandas as pd
from numpy import dot
from numpy.linalg import norm
from body_shape_lookup import body_shape_lookup
BODY_SHAPE_MEASURES = "body_shape_measures.csv"
# selecting specific features
RATIOS_TO_USE = ['shoulder_to_hip_distance',
'hip_to_ankle_distance',
'thigh_to_torso_ratio_normalised',
'upper_to_lower_torso_normalised_ratio',
'shoulder_to_hip_ratio',
'thigh_to_body_ratio',
'upper_torso_to_body_ratio']
def extract_digits(input_string):
# find digits in the format '1A' or '12B'
match = re.search(r'\d+', input_string)
if match:
return int(match.group())
else:
return -1 # not found
def is_match(row):
# check whether there was a match for this record
# extract the user class from id
ground_truth = extract_digits(row['Volunteer_ID'])
return ground_truth == row['Rank_1_Body_Shape'] or ground_truth == row['Rank_2_Body_Shape'] or ground_truth == row['Rank_3_Body_Shape']
def select_body_shape(normalised_body_shape_measures):
# load the body shape measures
body_shape_df = pd.read_csv(BODY_SHAPE_MEASURES)
# load the calculated measures.
volunteers_df = normalised_body_shape_measures
# select only the columns corresponding to the ratios
body_shape_ratios = body_shape_df[RATIOS_TO_USE]
calculation_information = ""
# calculate euclidean distance for each volunteer
for index, volunteer_row in volunteers_df.iterrows():
print(f"\nProcessing volunteer {volunteer_row['id']}")
volunteer_ratios = volunteer_row[RATIOS_TO_USE]
top_scores = [(-1000, 'n/a')] * 3
for body_index, body_shape_row in body_shape_ratios.iterrows():
# euclidean distance
# similarity = np.linalg.norm(volunteer_ratios - body_shape_row)
# calculate cosine similarity
similarity = dot(volunteer_ratios, body_shape_row) / (norm(volunteer_ratios)*norm(body_shape_row))
# Check if the current score is among the top 3
for i, (score, _) in enumerate(top_scores):
if similarity > score:
top_scores.insert(i, (similarity, body_index + 1))
top_scores = top_scores[:3]
break
print(f"(body shape {body_index + 1}) Similarity:\t{similarity:.3f}")
calculation_information += f"(body shape {body_index + 1}) Similarity:\t{similarity:.3f}\n"
# Print the top 3 best body shapes and scores for the current volunteer
print(f"Body shapes and scores are:")
for i, (score, body_shape) in enumerate(top_scores):
print(f"Rank {i + 1}: Body Shape {body_shape} with score {score:.3f}")
calculation_information += f"Rank {i + 1}: Body Shape {body_shape} with score {score:.3f}\n"
body_shape_index = top_scores[0][1]
return (body_shape_lookup(body_shape_index), calculation_information) |