Tonic commited on
Commit
e62dd99
1 Parent(s): bb6e5e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -9
app.py CHANGED
@@ -4,6 +4,7 @@ import os
4
  import json
5
  import time
6
  import transformers
 
7
  from transformers import AutoTokenizer, AutoModelForCausalLM
8
 
9
  hf_token = os.getenv("HF_AUTH_TOKEN")
@@ -156,21 +157,51 @@ def query_vectara(text):
156
  else:
157
  return f"Error: {response.status_code}"
158
 
159
- # Main function to integrate Vectara, OLMo, and hallucination check
160
- def evaluate_content(user_input):
161
- vectara_summary = query_vectara(user_input)
162
- olmo_output = generate_text(vectara_summary)
163
- hallucination_score = check_hallucination(olmo_output, vectara_summary)
164
- return olmo_output, hallucination_score
165
 
166
- # Create the Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  iface = gr.Interface(
168
  fn=evaluate_content,
169
  inputs=[gr.Textbox(label="User Input")],
170
  outputs=[
171
- gr.Textbox(label="Vectara Summary"),
172
  gr.Textbox(label="Vectara Sources", lines=10),
173
- gr.Textbox(label="Generated Text"),
174
  gr.Textbox(label="Hallucination Score")
175
  ],
176
  live=False,
 
4
  import json
5
  import time
6
  import transformers
7
+ import re
8
  from transformers import AutoTokenizer, AutoModelForCausalLM
9
 
10
  hf_token = os.getenv("HF_AUTH_TOKEN")
 
157
  else:
158
  return f"Error: {response.status_code}"
159
 
160
+ def clean_text(text):
161
+ # Function to clean text using regex
162
+ cleaned_text = re.sub(r'[^\w\s]', '', text) # Remove special characters except spaces
163
+ return cleaned_text
 
 
164
 
165
+ def evaluate_content(user_input):
166
+ vectara_response = query_vectara(user_input)
167
+ vectara_response_json = json.loads(vectara_response)
168
+
169
+ summary = vectara_response_json.get("summary", "")
170
+ sources = vectara_response_json.get("sources", [])
171
+
172
+ # Clean summary text
173
+ summary_clean = clean_text(summary)
174
+
175
+ # Process sources to extract and clean necessary information
176
+ sources_info = ""
177
+ for source in sources:
178
+ title = source.get("title", "No title")
179
+ author = source.get("author", "No author")
180
+ page_number = source.get("page number", "N/A")
181
+
182
+ # Clean source info
183
+ title_clean = clean_text(title)
184
+ author_clean = clean_text(author)
185
+
186
+ sources_info += f"Title: {title_clean}, Author: {author_clean}, Page: {page_number}\n"
187
+
188
+ # Generate text based on the cleaned summary
189
+ olmo_output = generate_text(summary_clean)
190
+ olmo_output_clean = clean_text(olmo_output)
191
+
192
+ # Check hallucination based on the original output and summary
193
+ hallucination_score = check_hallucination(olmo_output, summary)
194
+
195
+ return summary_clean, sources_info, olmo_output_clean, hallucination_score
196
+
197
+ # Adjust the Gradio interface outputs to match the new structure
198
  iface = gr.Interface(
199
  fn=evaluate_content,
200
  inputs=[gr.Textbox(label="User Input")],
201
  outputs=[
202
+ gr.Textbox(label="Vectara Summary", lines=10),
203
  gr.Textbox(label="Vectara Sources", lines=10),
204
+ gr.Textbox(label="Generated Text", lines=10),
205
  gr.Textbox(label="Hallucination Score")
206
  ],
207
  live=False,