Spaces:
Sleeping
Sleeping
WebashalarForML
commited on
Commit
•
8dba865
1
Parent(s):
cf24fe3
Update utility/utils.py
Browse files- utility/utils.py +7 -5
utility/utils.py
CHANGED
@@ -413,22 +413,21 @@ def remove_duplicates_case_insensitive(data_dict):
|
|
413 |
|
414 |
# Update the dictionary with unique values
|
415 |
data_dict[key] = unique_list
|
|
|
416 |
|
417 |
# Process the model output for parsed result
|
418 |
def process_resume_data(LLMdata,cont_data,extracted_text):
|
419 |
|
420 |
-
# Apply the function to the data
|
421 |
-
LLMdata=remove_duplicates_case_insensitive(LLMdata)
|
422 |
# Removing duplicate emails
|
423 |
unique_emails = []
|
424 |
for email in cont_data['emails']:
|
425 |
-
if email
|
426 |
unique_emails.append(email)
|
427 |
|
428 |
-
# Removing duplicate links
|
429 |
unique_links = []
|
430 |
for link in cont_data['links_RE']:
|
431 |
-
if link
|
432 |
unique_links.append(link)
|
433 |
|
434 |
# Removing duplicate phone numbers
|
@@ -448,6 +447,9 @@ def process_resume_data(LLMdata,cont_data,extracted_text):
|
|
448 |
LLMdata['Email'] += unique_emails
|
449 |
LLMdata['Link'] += unique_links
|
450 |
LLMdata['Contact'] += unique_numbers
|
|
|
|
|
|
|
451 |
|
452 |
# Initialize the processed data dictionary
|
453 |
processed_data = {
|
|
|
413 |
|
414 |
# Update the dictionary with unique values
|
415 |
data_dict[key] = unique_list
|
416 |
+
return data_dict
|
417 |
|
418 |
# Process the model output for parsed result
|
419 |
def process_resume_data(LLMdata,cont_data,extracted_text):
|
420 |
|
|
|
|
|
421 |
# Removing duplicate emails
|
422 |
unique_emails = []
|
423 |
for email in cont_data['emails']:
|
424 |
+
if not any(email.lower() == existing_email.lower() for existing_email in LLMdata['Email']):
|
425 |
unique_emails.append(email)
|
426 |
|
427 |
+
# Removing duplicate links (case insensitive)
|
428 |
unique_links = []
|
429 |
for link in cont_data['links_RE']:
|
430 |
+
if not any(link.lower() == existing_link.lower() for existing_link in LLMdata['Link']):
|
431 |
unique_links.append(link)
|
432 |
|
433 |
# Removing duplicate phone numbers
|
|
|
447 |
LLMdata['Email'] += unique_emails
|
448 |
LLMdata['Link'] += unique_links
|
449 |
LLMdata['Contact'] += unique_numbers
|
450 |
+
|
451 |
+
# Apply the function to the data
|
452 |
+
LLMdata=remove_duplicates_case_insensitive(LLMdata)
|
453 |
|
454 |
# Initialize the processed data dictionary
|
455 |
processed_data = {
|