Spaces:
Sleeping
Sleeping
WebashalarForML
commited on
Update utility/utils.py
Browse files- utility/utils.py +16 -11
utility/utils.py
CHANGED
@@ -185,20 +185,25 @@ def extract_text_from_images(image_paths):
|
|
185 |
|
186 |
# Function to call the Gemma model and process the output as Json
|
187 |
def Data_Extractor(data, client=client):
|
188 |
-
text = f'''Act as a
|
189 |
-
|
|
|
|
|
190 |
{{
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
}}
|
199 |
|
200 |
-
Output:
|
201 |
-
|
|
|
|
|
|
|
202 |
# Call the API for inference
|
203 |
response = client.text_generation(text, max_new_tokens=1000)#, temperature=0.4, top_k=50, top_p=0.9, repetition_penalty=1.2)
|
204 |
|
|
|
185 |
|
186 |
# Function to call the Gemma model and process the output as Json
|
187 |
def Data_Extractor(data, client=client):
|
188 |
+
text = f'''<s>[INST] Act as a Text extractor for the following text given in text: {data}.
|
189 |
+
Your task is to extract specific information and return it in a JSON format as outlined below:
|
190 |
+
|
191 |
+
Please extract the following details:
|
192 |
{{
|
193 |
+
"Name": ["Identify and extract all the person's names from the text."],
|
194 |
+
"Designation": ["Extract all designations or job titles mentioned in the text."],
|
195 |
+
"Company": ["Extract all company or organization names if mentioned."],
|
196 |
+
"Contact": ["Extract all phone numbers, including country codes if present."],
|
197 |
+
"Address": ["Extract all full postal addresses or locations mentioned in the text."],
|
198 |
+
"Email": ["Identify and extract all valid email addresses mentioned in the text; if none are found, return 'Not found'."],
|
199 |
+
"Link": ["Identify and extract any website URLs or social media links present in the text."]
|
200 |
}}
|
201 |
|
202 |
+
Output:
|
203 |
+
[/INST]</s> [INST]
|
204 |
+
Return the extracted information in JSON format as follows:
|
205 |
+
[/INST]'''
|
206 |
+
|
207 |
# Call the API for inference
|
208 |
response = client.text_generation(text, max_new_tokens=1000)#, temperature=0.4, top_k=50, top_p=0.9, repetition_penalty=1.2)
|
209 |
|