ShahzainHaider commited on
Commit
c11535f
·
1 Parent(s): e8fc64a

Upload folder using huggingface_hub

Browse files
app/__pycache__/app.cpython-38.pyc CHANGED
Binary files a/app/__pycache__/app.cpython-38.pyc and b/app/__pycache__/app.cpython-38.pyc differ
 
app/app.py CHANGED
@@ -23,7 +23,7 @@ async def ocr(Id_card: UploadFile = File(...)):
23
  with open(file_path, "wb+") as file_object:
24
  file_object.write(Id_card.file.read())
25
 
26
- card_type = "passport"
27
  dictionary = custom_ocr(file_path, card_type)
28
 
29
  if '' in list(dictionary.values()):
 
23
  with open(file_path, "wb+") as file_object:
24
  file_object.write(Id_card.file.read())
25
 
26
+ card_type = "id_card"
27
  dictionary = custom_ocr(file_path, card_type)
28
 
29
  if '' in list(dictionary.values()):
app/extract_country/__pycache__/country_dictionary.cpython-38.pyc CHANGED
Binary files a/app/extract_country/__pycache__/country_dictionary.cpython-38.pyc and b/app/extract_country/__pycache__/country_dictionary.cpython-38.pyc differ
 
app/extract_country/country_dictionary.py CHANGED
@@ -26,7 +26,7 @@ country_data = {
26
  'GUATEMALA' : ['GUATEMALA'],
27
  'Bolivia' : ['Bolivariano', 'Bolivia'],
28
  'El Salvador' : ['Salvador'],
29
- 'Dominican Republic' : ['REPUBLICADOMINICANA']
30
  }
31
 
32
  def find_country(ocr_list):
 
26
  'GUATEMALA' : ['GUATEMALA'],
27
  'Bolivia' : ['Bolivariano', 'Bolivia'],
28
  'El Salvador' : ['Salvador'],
29
+ 'Dominican Republic' : ['REPUBLICA DOMINICANA']
30
  }
31
 
32
  def find_country(ocr_list):
app/images/idcards/input.jpg CHANGED
deploy.py CHANGED
@@ -1,4 +1,5 @@
1
  import gradio as gr
 
2
  from app.extract_country.country_dictionary import find_country
3
  from app.extract_dates.date_engine import date_extractor
4
  from app.extract_gender.gender_extractor import gender_extract
@@ -7,6 +8,8 @@ from app.ocr_engine.ocr import OCR
7
  from paddleocr import PaddleOCR
8
  from app.layoutLM_api.api import custom_ocr
9
  import cv2
 
 
10
 
11
  def ocr(image_input, webcam_input, card_type_input):
12
 
@@ -16,17 +19,39 @@ def ocr(image_input, webcam_input, card_type_input):
16
  print("Webcam Image : ", type(webcam_input))
17
 
18
  if image_input is not None and image_input.any():
19
- print("In input image")
20
  cv2.imwrite(file_path, image_input)
 
21
  else:
22
  print("In webcam")
23
  cv2.imwrite(file_path, cv2.flip(webcam_input, 1))
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  # print(type(input_img))
26
  ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to download and load model into memory
27
 
28
-
29
-
30
  # with open(file_path, "wb+") as file_object:
31
  # file_object.write(input_img.file.read())
32
 
 
1
  import gradio as gr
2
+ import numpy as np
3
  from app.extract_country.country_dictionary import find_country
4
  from app.extract_dates.date_engine import date_extractor
5
  from app.extract_gender.gender_extractor import gender_extract
 
8
  from paddleocr import PaddleOCR
9
  from app.layoutLM_api.api import custom_ocr
10
  import cv2
11
+ from PIL import Image, ImageFilter
12
+
13
 
14
  def ocr(image_input, webcam_input, card_type_input):
15
 
 
19
  print("Webcam Image : ", type(webcam_input))
20
 
21
  if image_input is not None and image_input.any():
22
+ print("In input image")
23
  cv2.imwrite(file_path, image_input)
24
+
25
  else:
26
  print("In webcam")
27
  cv2.imwrite(file_path, cv2.flip(webcam_input, 1))
28
 
29
+ image = Image.open(file_path)
30
+ upscaled_image = image.resize((image.width*2, image.height*2), resample=Image.Resampling.BILINEAR)
31
+ sharpened_image = upscaled_image.filter(ImageFilter.SHARPEN)
32
+ sharpened_image.save(file_path)
33
+
34
+ sharpened_image = cv2.imread(file_path)
35
+ # Apply smoothing
36
+ smoothed_image = cv2.GaussianBlur(sharpened_image, (5, 5), 0)
37
+ # Set the desired output level range
38
+ output_min = 55
39
+ output_max = 255
40
+
41
+ # Calculate the minimum and maximum pixel values in the image
42
+ min_value = np.min(image)
43
+ max_value = np.max(image)
44
+
45
+ # Normalize the image to the desired output level range
46
+ normalized_image = cv2.normalize(smoothed_image, None, output_min, output_max, cv2.NORM_MINMAX)
47
+
48
+
49
+ # Save the upscaled, smoothed, and sharpened image
50
+ cv2.imwrite(file_path, normalized_image)
51
+
52
  # print(type(input_img))
53
  ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to download and load model into memory
54
 
 
 
55
  # with open(file_path, "wb+") as file_object:
56
  # file_object.write(input_img.file.read())
57
 
test.py CHANGED
@@ -1,19 +1,12 @@
1
- import re
2
 
3
- def identify_document_id(data_list):
4
- keywords = ["Document ID", "Document Number", "Passport Number", "ID Number"] # Add other possible keywords
5
 
6
- for item in data_list:
7
- for keyword in keywords:
8
- if keyword in item:
9
- # Extract document ID based on format and length
10
- document_id = re.findall(r'\b[A-Za-z0-9]+\b', item)
11
- # Additional checks for format and length can be added here
12
- return document_id[0] if document_id else None
13
 
14
- return None
 
15
 
16
- # Test the function with the given data list
17
- data_list = ["Govermment of the People's Republic of Bangladesh", 'NationalIDCard', '12May 1975', 'HETH', 'Caaaat', 'Name', 'ROMANARAHMAN', 'Date of tn 12 May 1975', '8673674936']
18
- document_id = identify_document_id(data_list)
19
- print(document_id)
 
1
+ from datetime import datetime
2
 
3
+ dob = "56-01-02"
 
4
 
5
+ # Parse the string date into a datetime object
6
+ date_object = datetime.strptime(dob, "%Y-%m-%d")
 
 
 
 
 
7
 
8
+ # Format the date object using strftime
9
+ formatted_date = date_object.strftime("%B %d, %Y")
10
 
11
+ # Print the formatted date
12
+ print(formatted_date)