OCR / app /functions.py
ShahzainHaider's picture
Upload folder using huggingface_hub
8ce5e48
raw
history blame
2.17 kB
from PIL import Image, ImageFilter
import cv2
import pandas as pd
def get_index(string, substring):
start_index = string.find(substring) # Starting index of "shahzain"
end_index = start_index + len(substring) # Ending index of "shahzain"
return int(start_index), int(end_index)
def get_latest_value_from_csv(search_key):
try:
output_dict = {}
df = pd.read_csv("/content/log.csv")
filtered_df = df[df["OCR Extracted Text"] == search_key]
# Sort the filtered DataFrame by the timestamp column in descending order
sorted_df = filtered_df.sort_values("timestamp", ascending=False)
if not filtered_df.empty:
# Extract the latest row
latest_row = sorted_df.iloc[0]
# Print the values of specific columns that are not null
columns_to_print = ["NAME", "DOB", "GENDER", "COUNTRY", "DOCUMENT NUMBER"]
for column in columns_to_print:
if pd.notnull(latest_row[column]):
output_dict[column] = latest_row[column]
print("Training data from user", output_dict)
else:
print("Annotated Data Not Found")
return True, output_dict
except Exception as e:
print("Exception in e ", str(e))
return False, output_dict
def image_enhacement(file_path):
image = Image.open(file_path)
upscaled_image = image.resize((image.width*2, image.height*2), resample=Image.Resampling.BILINEAR)
sharpened_image = upscaled_image.filter(ImageFilter.SHARPEN)
sharpened_image.save(file_path)
sharpened_image = cv2.imread(file_path)
smoothed_image = cv2.GaussianBlur(sharpened_image, (5, 5), 0) # Apply smoothing
# Set the desired output level range
output_min = 55
output_max = 255
# Calculate the minimum and maximum pixel values in the image
# min_value = np.min(image)
# max_value = np.max(image)
normalized_image = cv2.normalize(smoothed_image, None, output_min, output_max, cv2.NORM_MINMAX) # Normalize the image to the desired output level range
cv2.imwrite(file_path, normalized_image)