Spaces:
Sleeping
Sleeping
WebashalarForML
commited on
Commit
•
3c5c7f9
1
Parent(s):
3bc2e7c
Update utility/utils.py
Browse files- utility/utils.py +34 -6
utility/utils.py
CHANGED
@@ -402,7 +402,31 @@ def process_extracted_text(extracted_text):
|
|
402 |
|
403 |
# Process the model output for parsed result
|
404 |
def process_resume_data(LLMdata,cont_data,extracted_text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
405 |
|
|
|
|
|
|
|
|
|
|
|
406 |
# Initialize the processed data dictionary
|
407 |
processed_data = {
|
408 |
"name": [],
|
@@ -416,14 +440,18 @@ def process_resume_data(LLMdata,cont_data,extracted_text):
|
|
416 |
}
|
417 |
#LLM
|
418 |
processed_data['name'].extend(LLMdata.get('Name', []))
|
419 |
-
processed_data['contact_number'].extend(LLMdata.get('Contact', []))
|
420 |
processed_data['Designation'].extend(LLMdata.get('Designation', []))
|
421 |
-
processed_data['email'].extend(LLMdata.get("Email", []))
|
422 |
processed_data['Location'].extend(LLMdata.get('Address', []))
|
423 |
-
processed_data['Link'].extend(LLMdata.get('Link', []))
|
424 |
processed_data['Company'].extend(LLMdata.get('Company', []))
|
425 |
#Contact
|
426 |
-
processed_data['email'].extend(cont_data.get("emails", []))
|
427 |
-
processed_data['contact_number'].extend(cont_data.get("phone_numbers", []))
|
428 |
-
processed_data['Link'].extend(cont_data.get("links_RE", []))
|
|
|
|
|
|
|
|
|
429 |
return processed_data
|
|
|
402 |
|
403 |
# Process the model output for parsed result
|
404 |
def process_resume_data(LLMdata,cont_data,extracted_text):
|
405 |
+
|
406 |
+
# Removing duplicate emails
|
407 |
+
unique_emails = []
|
408 |
+
for email in cont_data['emails']:
|
409 |
+
if email not in LLMdata['Email']:
|
410 |
+
unique_emails.append(email)
|
411 |
+
|
412 |
+
# Removing duplicate links
|
413 |
+
unique_links = []
|
414 |
+
for link in cont_data['links_RE']:
|
415 |
+
if link not in LLMdata['Link']:
|
416 |
+
unique_links.append(link)
|
417 |
+
|
418 |
+
# Removing duplicate phone numbers
|
419 |
+
normalized_contact = [num[-10:] for num in LLMdata['Contact']]
|
420 |
+
unique_numbers = []
|
421 |
+
for num in cont_data['phone_numbers']:
|
422 |
+
if num[-10:] not in normalized_contact:
|
423 |
+
unique_numbers.append(num)
|
424 |
|
425 |
+
# Add unique emails, links, and phone numbers to the original LLMdata
|
426 |
+
LLMdata['Email'] += unique_emails
|
427 |
+
LLMdata['Link'] += unique_links
|
428 |
+
LLMdata['Contact'] += unique_numbers
|
429 |
+
|
430 |
# Initialize the processed data dictionary
|
431 |
processed_data = {
|
432 |
"name": [],
|
|
|
440 |
}
|
441 |
#LLM
|
442 |
processed_data['name'].extend(LLMdata.get('Name', []))
|
443 |
+
#processed_data['contact_number'].extend(LLMdata.get('Contact', []))
|
444 |
processed_data['Designation'].extend(LLMdata.get('Designation', []))
|
445 |
+
#processed_data['email'].extend(LLMdata.get("Email", []))
|
446 |
processed_data['Location'].extend(LLMdata.get('Address', []))
|
447 |
+
#processed_data['Link'].extend(LLMdata.get('Link', []))
|
448 |
processed_data['Company'].extend(LLMdata.get('Company', []))
|
449 |
#Contact
|
450 |
+
#processed_data['email'].extend(cont_data.get("emails", []))
|
451 |
+
#processed_data['contact_number'].extend(cont_data.get("phone_numbers", []))
|
452 |
+
#processed_data['Link'].extend(cont_data.get("links_RE", []))
|
453 |
+
#New_merge_data
|
454 |
+
processed_data['email'].extend(LLMdata['Email'])
|
455 |
+
processed_data['contact_number'].extend(LLMdata['Contact'])
|
456 |
+
processed_data['Link'].extend(LLMdata['Link'])
|
457 |
return processed_data
|