storresbusquets commited on
Commit
2c042c1
·
1 Parent(s): 425594a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -25
app.py CHANGED
@@ -201,9 +201,9 @@ class GradioInference:
201
  progress(0.40, desc="Summarizing")
202
 
203
  # Perform summarization on the transcription
204
- transcription_summary = self.summarizer(
205
- results["text"], max_length=150, min_length=30, do_sample=False
206
- )
207
 
208
  ########################## PRUEBA CHATGPT #################################
209
  from langchain.chains.llm import LLMChain
@@ -211,10 +211,11 @@ class GradioInference:
211
  from langchain.chains.combine_documents.stuff import StuffDocumentsChain
212
 
213
  OPENAI_API_KEY = os.environ['OPENAI_API_KEY']
214
-
 
215
  # Define prompt
216
  prompt_template = """Write a concise summary of the following:
217
- "{results["text"]}"
218
  CONCISE SUMMARY:"""
219
  prompt = PromptTemplate.from_template(prompt_template)
220
 
@@ -224,7 +225,7 @@ class GradioInference:
224
 
225
  # Define StuffDocumentsChain
226
  stuff_chain = StuffDocumentsChain(
227
- llm_chain=llm_chain, document_variable_name="text"
228
  )
229
 
230
  docs = loader.load()
@@ -233,28 +234,28 @@ class GradioInference:
233
  ########################## FIN PRUEBA CHATGPT #################################
234
 
235
  #### Prueba
236
- WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
237
 
238
- input_ids_sum = self.tokenizer(
239
- [WHITESPACE_HANDLER(results["text"])],
240
- return_tensors="pt",
241
- padding="max_length",
242
- truncation=True,
243
- max_length=512
244
- )["input_ids"]
245
 
246
- output_ids_sum = self.model.generate(
247
- input_ids=input_ids_sum,
248
- max_length=130,
249
- no_repeat_ngram_size=2,
250
- num_beams=4
251
- )[0]
252
 
253
- summary = self.tokenizer.decode(
254
- output_ids_sum,
255
- skip_special_tokens=True,
256
- clean_up_tokenization_spaces=False
257
- )
258
  #### Fin prueba
259
 
260
  progress(0.50, desc="Extracting Keywords")
 
201
  progress(0.40, desc="Summarizing")
202
 
203
  # Perform summarization on the transcription
204
+ # transcription_summary = self.summarizer(
205
+ # results["text"], max_length=150, min_length=30, do_sample=False
206
+ # )
207
 
208
  ########################## PRUEBA CHATGPT #################################
209
  from langchain.chains.llm import LLMChain
 
211
  from langchain.chains.combine_documents.stuff import StuffDocumentsChain
212
 
213
  OPENAI_API_KEY = os.environ['OPENAI_API_KEY']
214
+
215
+ text = results["text"]
216
  # Define prompt
217
  prompt_template = """Write a concise summary of the following:
218
+ "{text}"
219
  CONCISE SUMMARY:"""
220
  prompt = PromptTemplate.from_template(prompt_template)
221
 
 
225
 
226
  # Define StuffDocumentsChain
227
  stuff_chain = StuffDocumentsChain(
228
+ llm_chain=llm_chain, document_variable_name=text
229
  )
230
 
231
  docs = loader.load()
 
234
  ########################## FIN PRUEBA CHATGPT #################################
235
 
236
  #### Prueba
237
+ # WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
238
 
239
+ # input_ids_sum = self.tokenizer(
240
+ # [WHITESPACE_HANDLER(results["text"])],
241
+ # return_tensors="pt",
242
+ # padding="max_length",
243
+ # truncation=True,
244
+ # max_length=512
245
+ # )["input_ids"]
246
 
247
+ # output_ids_sum = self.model.generate(
248
+ # input_ids=input_ids_sum,
249
+ # max_length=130,
250
+ # no_repeat_ngram_size=2,
251
+ # num_beams=4
252
+ # )[0]
253
 
254
+ # summary = self.tokenizer.decode(
255
+ # output_ids_sum,
256
+ # skip_special_tokens=True,
257
+ # clean_up_tokenization_spaces=False
258
+ # )
259
  #### Fin prueba
260
 
261
  progress(0.50, desc="Extracting Keywords")