WebashalarForML commited on
Commit
3c5c7f9
1 Parent(s): 3bc2e7c

Update utility/utils.py

Browse files
Files changed (1) hide show
  1. utility/utils.py +34 -6
utility/utils.py CHANGED
@@ -402,7 +402,31 @@ def process_extracted_text(extracted_text):
402
 
403
  # Process the model output for parsed result
404
  def process_resume_data(LLMdata,cont_data,extracted_text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
 
 
 
 
 
 
406
  # Initialize the processed data dictionary
407
  processed_data = {
408
  "name": [],
@@ -416,14 +440,18 @@ def process_resume_data(LLMdata,cont_data,extracted_text):
416
  }
417
  #LLM
418
  processed_data['name'].extend(LLMdata.get('Name', []))
419
- processed_data['contact_number'].extend(LLMdata.get('Contact', []))
420
  processed_data['Designation'].extend(LLMdata.get('Designation', []))
421
- processed_data['email'].extend(LLMdata.get("Email", []))
422
  processed_data['Location'].extend(LLMdata.get('Address', []))
423
- processed_data['Link'].extend(LLMdata.get('Link', []))
424
  processed_data['Company'].extend(LLMdata.get('Company', []))
425
  #Contact
426
- processed_data['email'].extend(cont_data.get("emails", []))
427
- processed_data['contact_number'].extend(cont_data.get("phone_numbers", []))
428
- processed_data['Link'].extend(cont_data.get("links_RE", []))
 
 
 
 
429
  return processed_data
 
402
 
403
  # Process the model output for parsed result
404
  def process_resume_data(LLMdata,cont_data,extracted_text):
405
+
406
+ # Removing duplicate emails
407
+ unique_emails = []
408
+ for email in cont_data['emails']:
409
+ if email not in LLMdata['Email']:
410
+ unique_emails.append(email)
411
+
412
+ # Removing duplicate links
413
+ unique_links = []
414
+ for link in cont_data['links_RE']:
415
+ if link not in LLMdata['Link']:
416
+ unique_links.append(link)
417
+
418
+ # Removing duplicate phone numbers
419
+ normalized_contact = [num[-10:] for num in LLMdata['Contact']]
420
+ unique_numbers = []
421
+ for num in cont_data['phone_numbers']:
422
+ if num[-10:] not in normalized_contact:
423
+ unique_numbers.append(num)
424
 
425
+ # Add unique emails, links, and phone numbers to the original LLMdata
426
+ LLMdata['Email'] += unique_emails
427
+ LLMdata['Link'] += unique_links
428
+ LLMdata['Contact'] += unique_numbers
429
+
430
  # Initialize the processed data dictionary
431
  processed_data = {
432
  "name": [],
 
440
  }
441
  #LLM
442
  processed_data['name'].extend(LLMdata.get('Name', []))
443
+ #processed_data['contact_number'].extend(LLMdata.get('Contact', []))
444
  processed_data['Designation'].extend(LLMdata.get('Designation', []))
445
+ #processed_data['email'].extend(LLMdata.get("Email", []))
446
  processed_data['Location'].extend(LLMdata.get('Address', []))
447
+ #processed_data['Link'].extend(LLMdata.get('Link', []))
448
  processed_data['Company'].extend(LLMdata.get('Company', []))
449
  #Contact
450
+ #processed_data['email'].extend(cont_data.get("emails", []))
451
+ #processed_data['contact_number'].extend(cont_data.get("phone_numbers", []))
452
+ #processed_data['Link'].extend(cont_data.get("links_RE", []))
453
+ #New_merge_data
454
+ processed_data['email'].extend(LLMdata['Email'])
455
+ processed_data['contact_number'].extend(LLMdata['Contact'])
456
+ processed_data['Link'].extend(LLMdata['Link'])
457
  return processed_data