WebashalarForML commited on
Commit
bae46d8
·
verified ·
1 Parent(s): 2779ae0

Update utility/utils.py

Browse files
Files changed (1) hide show
  1. utility/utils.py +16 -11
utility/utils.py CHANGED
@@ -185,20 +185,25 @@ def extract_text_from_images(image_paths):
185
 
186
  # Function to call the Gemma model and process the output as Json
187
  def Data_Extractor(data, client=client):
188
- text = f'''Act as a Text extractor for the following text given in text: {data}
189
- Extract text in the following output JSON string:
 
 
190
  {{
191
- "Name": ["Identify and Extract All the person's name from the text."],
192
- "Designation": ["Extract All the designation or job title mentioned in the text."],
193
- "Company": ["Extract All the company or organization name if mentioned."],
194
- "Contact": ["Extract All phone number, including country codes if present."],
195
- "Address": ["Extract All the full postal address or location mentioned in the text."],
196
- "Email": ["Identify and Extract All valid email addresses mentioned in the text else 'Not found'."],
197
- "Link": ["Identify and Extract any website URLs or social media links present in the text."]
198
  }}
199
 
200
- Output:
201
- '''
 
 
 
202
  # Call the API for inference
203
  response = client.text_generation(text, max_new_tokens=1000)#, temperature=0.4, top_k=50, top_p=0.9, repetition_penalty=1.2)
204
 
 
185
 
186
  # Function to call the Gemma model and process the output as Json
187
  def Data_Extractor(data, client=client):
188
+ text = f'''<s>[INST] Act as a Text extractor for the following text given in text: {data}.
189
+ Your task is to extract specific information and return it in a JSON format as outlined below:
190
+
191
+ Please extract the following details:
192
  {{
193
+ "Name": ["Identify and extract all the person's names from the text."],
194
+ "Designation": ["Extract all designations or job titles mentioned in the text."],
195
+ "Company": ["Extract all company or organization names if mentioned."],
196
+ "Contact": ["Extract all phone numbers, including country codes if present."],
197
+ "Address": ["Extract all full postal addresses or locations mentioned in the text."],
198
+ "Email": ["Identify and extract all valid email addresses mentioned in the text; if none are found, return 'Not found'."],
199
+ "Link": ["Identify and extract any website URLs or social media links present in the text."]
200
  }}
201
 
202
+ Output:
203
+ [/INST]</s> [INST]
204
+ Return the extracted information in JSON format as follows:
205
+ [/INST]'''
206
+
207
  # Call the API for inference
208
  response = client.text_generation(text, max_new_tokens=1000)#, temperature=0.4, top_k=50, top_p=0.9, repetition_penalty=1.2)
209