Spaces:
Sleeping
Sleeping
WebashalarForML
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,17 @@
|
|
1 |
# libraries
|
2 |
from flask import Flask, render_template, request, redirect, url_for, flash, session, send_from_directory
|
3 |
import os
|
4 |
-
|
|
|
5 |
from backup.backup import NER_Model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
# Flask App
|
7 |
app = Flask(__name__)
|
8 |
app.secret_key = 'your_secret_key'
|
@@ -19,17 +28,20 @@ if not os.path.exists(app.config['UPLOAD_FOLDER']):
|
|
19 |
@app.route('/')
|
20 |
def index():
|
21 |
uploaded_files = session.get('uploaded_files', [])
|
|
|
22 |
return render_template('index.html', uploaded_files=uploaded_files)
|
23 |
|
24 |
@app.route('/upload', methods=['POST'])
|
25 |
def upload_file():
|
26 |
if 'files' not in request.files:
|
27 |
flash('No file part')
|
|
|
28 |
return redirect(request.url)
|
29 |
|
30 |
files = request.files.getlist('files') # Get multiple files
|
31 |
if not files or all(file.filename == '' for file in files):
|
32 |
flash('No selected files')
|
|
|
33 |
return redirect(request.url)
|
34 |
|
35 |
uploaded_files = []
|
@@ -38,9 +50,11 @@ def upload_file():
|
|
38 |
filename = file.filename
|
39 |
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
|
40 |
uploaded_files.append(filename)
|
|
|
41 |
|
42 |
session['uploaded_files'] = uploaded_files
|
43 |
flash('Files successfully uploaded')
|
|
|
44 |
return redirect(url_for('index'))
|
45 |
|
46 |
@app.route('/remove_file')
|
@@ -48,8 +62,10 @@ def remove_file():
|
|
48 |
uploaded_files = session.get('uploaded_files', [])
|
49 |
for filename in uploaded_files:
|
50 |
os.remove(os.path.join(app.config['UPLOAD_FOLDER'], filename))
|
|
|
51 |
session.pop('uploaded_files', None)
|
52 |
flash('Files successfully removed')
|
|
|
53 |
return redirect(url_for('index'))
|
54 |
|
55 |
@app.route('/process', methods=['POST'])
|
@@ -57,58 +73,60 @@ def process_file():
|
|
57 |
uploaded_files = session.get('uploaded_files', [])
|
58 |
if not uploaded_files:
|
59 |
flash('No files selected for processing')
|
|
|
60 |
return redirect(url_for('index'))
|
61 |
|
62 |
# Create a list of file paths for the extracted text function
|
63 |
file_paths = [os.path.join(app.config['UPLOAD_FOLDER'], filename) for filename in uploaded_files]
|
|
|
64 |
|
65 |
-
# Extract text from all images
|
66 |
-
extracted_text,processed_Img = extract_text_from_images(file_paths,RESULT_FOLDER)
|
67 |
-
# Convert PDF to text
|
68 |
-
print("extracted_text----------------------------",extracted_text)
|
69 |
-
print("extracted_text type----------------------------",type(extracted_text))
|
70 |
-
|
71 |
-
print("processed_Img----------------------------",processed_Img)
|
72 |
-
print("processed_Img type----------------------------",type(processed_Img))
|
73 |
-
|
74 |
-
|
75 |
try:
|
|
|
|
|
|
|
|
|
|
|
76 |
# Call the Gemma model API and get the professional data
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
82 |
except Exception as e:
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
|
98 |
session['processed_data'] = processed_data
|
99 |
session['processed_Img'] = processed_Img
|
100 |
-
flash('Data processed and analyzed successfully
|
|
|
101 |
return redirect(url_for('result'))
|
102 |
|
103 |
@app.route('/result')
|
104 |
def result():
|
105 |
processed_data = session.get('processed_data', {})
|
106 |
processed_Img = session.get('processed_Img', {})
|
107 |
-
|
|
|
108 |
|
109 |
@app.route('/uploads/<filename>')
|
110 |
def uploaded_file(filename):
|
|
|
111 |
return send_from_directory(app.config['UPLOAD_FOLDER'], filename)
|
112 |
|
113 |
if __name__ == '__main__':
|
114 |
-
|
|
|
|
1 |
# libraries
|
2 |
from flask import Flask, render_template, request, redirect, url_for, flash, session, send_from_directory
|
3 |
import os
|
4 |
+
import logging
|
5 |
+
from utility.utils import extract_text_from_images, Data_Extractor, json_to_llm_str, process_extracted_text, process_resume_data
|
6 |
from backup.backup import NER_Model
|
7 |
+
from paddleOCR import
|
8 |
+
|
9 |
+
# Configure logging
|
10 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s', handlers=[
|
11 |
+
logging.FileHandler("app.log"),
|
12 |
+
logging.StreamHandler()
|
13 |
+
])
|
14 |
+
|
15 |
# Flask App
|
16 |
app = Flask(__name__)
|
17 |
app.secret_key = 'your_secret_key'
|
|
|
28 |
@app.route('/')
|
29 |
def index():
|
30 |
uploaded_files = session.get('uploaded_files', [])
|
31 |
+
logging.info(f"Accessed index page, uploaded files: {uploaded_files}")
|
32 |
return render_template('index.html', uploaded_files=uploaded_files)
|
33 |
|
34 |
@app.route('/upload', methods=['POST'])
|
35 |
def upload_file():
|
36 |
if 'files' not in request.files:
|
37 |
flash('No file part')
|
38 |
+
logging.warning("No file part found in the request")
|
39 |
return redirect(request.url)
|
40 |
|
41 |
files = request.files.getlist('files') # Get multiple files
|
42 |
if not files or all(file.filename == '' for file in files):
|
43 |
flash('No selected files')
|
44 |
+
logging.warning("No files selected for upload")
|
45 |
return redirect(request.url)
|
46 |
|
47 |
uploaded_files = []
|
|
|
50 |
filename = file.filename
|
51 |
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
|
52 |
uploaded_files.append(filename)
|
53 |
+
logging.info(f"Uploaded file: {filename}")
|
54 |
|
55 |
session['uploaded_files'] = uploaded_files
|
56 |
flash('Files successfully uploaded')
|
57 |
+
logging.info(f"Files successfully uploaded: {uploaded_files}")
|
58 |
return redirect(url_for('index'))
|
59 |
|
60 |
@app.route('/remove_file')
|
|
|
62 |
uploaded_files = session.get('uploaded_files', [])
|
63 |
for filename in uploaded_files:
|
64 |
os.remove(os.path.join(app.config['UPLOAD_FOLDER'], filename))
|
65 |
+
logging.info(f"Removed file: {filename}")
|
66 |
session.pop('uploaded_files', None)
|
67 |
flash('Files successfully removed')
|
68 |
+
logging.info("All uploaded files removed")
|
69 |
return redirect(url_for('index'))
|
70 |
|
71 |
@app.route('/process', methods=['POST'])
|
|
|
73 |
uploaded_files = session.get('uploaded_files', [])
|
74 |
if not uploaded_files:
|
75 |
flash('No files selected for processing')
|
76 |
+
logging.warning("No files selected for processing")
|
77 |
return redirect(url_for('index'))
|
78 |
|
79 |
# Create a list of file paths for the extracted text function
|
80 |
file_paths = [os.path.join(app.config['UPLOAD_FOLDER'], filename) for filename in uploaded_files]
|
81 |
+
logging.info(f"Processing files: {file_paths}")
|
82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
try:
|
84 |
+
# Extract text from all images
|
85 |
+
extracted_text, processed_Img = extract_text_from_images(file_paths, RESULT_FOLDER)
|
86 |
+
logging.info(f"Extracted text: {extracted_text}")
|
87 |
+
logging.info(f"Processed images: {processed_Img}")
|
88 |
+
|
89 |
# Call the Gemma model API and get the professional data
|
90 |
+
llmText = json_to_llm_str(extracted_text)
|
91 |
+
logging.info(f"LLM text: {llmText}")
|
92 |
+
|
93 |
+
LLMdata = Data_Extractor(llmText)
|
94 |
+
logging.info(f"LLM data: {LLMdata}")
|
95 |
+
|
96 |
except Exception as e:
|
97 |
+
logging.error(f"Error during LLM processing: {e}")
|
98 |
+
logging.info("Running backup model...")
|
99 |
+
|
100 |
+
# Run the backup model in case of an exception
|
101 |
+
text = json_to_llm_str(extracted_text)
|
102 |
+
LLMdata = NER_Model(text)
|
103 |
+
logging.info(f"NER model data: {LLMdata}")
|
104 |
+
|
105 |
+
cont_data = process_extracted_text(extracted_text)
|
106 |
+
logging.info(f"Contextual data: {cont_data}")
|
107 |
+
|
108 |
+
# Storing the parsed results
|
109 |
+
processed_data = process_resume_data(LLMdata, cont_data, extracted_text)
|
110 |
+
logging.info(f"Processed data: {processed_data}")
|
111 |
|
112 |
session['processed_data'] = processed_data
|
113 |
session['processed_Img'] = processed_Img
|
114 |
+
flash('Data processed and analyzed successfully')
|
115 |
+
logging.info("Data processed and analyzed successfully")
|
116 |
return redirect(url_for('result'))
|
117 |
|
118 |
@app.route('/result')
|
119 |
def result():
|
120 |
processed_data = session.get('processed_data', {})
|
121 |
processed_Img = session.get('processed_Img', {})
|
122 |
+
logging.info(f"Displaying results: Data - {processed_data}, Images - {processed_Img}")
|
123 |
+
return render_template('result.html', data=processed_data, Img=processed_Img)
|
124 |
|
125 |
@app.route('/uploads/<filename>')
|
126 |
def uploaded_file(filename):
|
127 |
+
logging.info(f"Serving file: {filename}")
|
128 |
return send_from_directory(app.config['UPLOAD_FOLDER'], filename)
|
129 |
|
130 |
if __name__ == '__main__':
|
131 |
+
logging.info("Starting Flask app")
|
132 |
+
app.run(debug=True)
|