Spaces:

storresbusquets
/

demo1

Runtime error

App Files Files Community

storresbusquets commited on Sep 14, 2023

Commit

8cae8bc

1 Parent(s): c6818ef

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -48

app.py CHANGED Viewed

@@ -44,18 +44,18 @@ class GradioInference:
         self.tokenizer = AutoTokenizer.from_pretrained("csebuetnlp/mT5_multilingual_XLSum")
         self.model = AutoModelForSeq2SeqLM.from_pretrained("csebuetnlp/mT5_multilingual_XLSum")
-        self.llm_tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")
-        self.pipeline = pipeline(
-            "text-generation", #task
-            model="tiiuae/falcon-7b-instruct",
-            tokenizer=self.llm_tokenizer,
-            trust_remote_code=True,
-            do_sample=True,
-            top_k=10,
-            num_return_sequences=1,
-            eos_token_id=self.tokenizer.eos_token_id
-        )
     def __call__(self, link, lang, size, progress=gr.Progress()):
@@ -214,53 +214,53 @@ class GradioInference:
         progress(0.40, desc="Summarizing")
         # Perform summarization on the transcription
-        # transcription_summary = self.summarizer(
-        #     results["text"], max_length=150, min_length=30, do_sample=False
-        # )
         ########################## PRUEBA LLM #################################
-        from langchain import HuggingFacePipeline, PromptTemplate, LLMChain
-        llm = HuggingFacePipeline(pipeline = self.pipeline, model_kwargs = {'temperature':0})
-        template = """
-              Write a concise summary of the following text delimited by triple backquotes.
-              ```{text}```
-              CONCISE SUMMARY:
-           """
-        prompt = PromptTemplate(template=template, input_variables=["text"])
-        llm_chain = LLMChain(prompt=prompt, llm=llm)
-        text = results["text"]
-        summ = llm_chain.run(text)
         ########################## FIN PRUEBA LLM #################################
         #### Prueba
-        # WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
-        # input_ids_sum = self.tokenizer(
-        #     [WHITESPACE_HANDLER(results["text"])],
-        #     return_tensors="pt",
-        #     padding="max_length",
-        #     truncation=True,
-        #     max_length=512
-        # )["input_ids"]
-        # output_ids_sum = self.model.generate(
-        #     input_ids=input_ids_sum,
-        #     max_length=130,
-        #     no_repeat_ngram_size=2,
-        #     num_beams=4
-        # )[0]
-        # summary = self.tokenizer.decode(
-        #     output_ids_sum,
-        #     skip_special_tokens=True,
-        #     clean_up_tokenization_spaces=False
-        # )
         #### Fin prueba
         progress(0.50, desc="Extracting Keywords")
@@ -303,8 +303,8 @@ class GradioInference:
         if lang == "english":
             return (
                 results["text"],
-                summ,
-                # transcription_summary[0]["summary_text"],
                 formatted_keywords,
                 formatted_sentiment,
                 wordcloud_image,
@@ -312,8 +312,8 @@ class GradioInference:
         else:
             return (
                 results["text"],
-                summ,
-                # summary,
                 formatted_keywords,
                 formatted_sentiment,
                 wordcloud_image,

         self.tokenizer = AutoTokenizer.from_pretrained("csebuetnlp/mT5_multilingual_XLSum")
         self.model = AutoModelForSeq2SeqLM.from_pretrained("csebuetnlp/mT5_multilingual_XLSum")
+        # self.llm_tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")
+        # self.pipeline = pipeline(
+        #     "text-generation", #task
+        #     model="tiiuae/falcon-7b-instruct",
+        #     tokenizer=self.llm_tokenizer,
+        #     trust_remote_code=True,
+        #     do_sample=True,
+        #     top_k=10,
+        #     num_return_sequences=1,
+        #     eos_token_id=self.tokenizer.eos_token_id
+        # )
     def __call__(self, link, lang, size, progress=gr.Progress()):
         progress(0.40, desc="Summarizing")
         # Perform summarization on the transcription
+        transcription_summary = self.summarizer(
+            results["text"], max_length=150, min_length=30, do_sample=False
+        )
         ########################## PRUEBA LLM #################################
+        # from langchain import HuggingFacePipeline, PromptTemplate, LLMChain
+        # llm = HuggingFacePipeline(pipeline = self.pipeline, model_kwargs = {'temperature':0})
+        # template = """
+        #       Write a concise summary of the following text delimited by triple backquotes.
+        #       ```{text}```
+        #       CONCISE SUMMARY:
+        #    """
+        # prompt = PromptTemplate(template=template, input_variables=["text"])
+        # llm_chain = LLMChain(prompt=prompt, llm=llm)
+        # text = results["text"]
+        # summ = llm_chain.run(text)
         ########################## FIN PRUEBA LLM #################################
         #### Prueba
+        WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
+        input_ids_sum = self.tokenizer(
+            [WHITESPACE_HANDLER(results["text"])],
+            return_tensors="pt",
+            padding="max_length",
+            truncation=True,
+            max_length=512
+        )["input_ids"]
+        output_ids_sum = self.model.generate(
+            input_ids=input_ids_sum,
+            max_length=130,
+            no_repeat_ngram_size=2,
+            num_beams=4
+        )[0]
+        summary = self.tokenizer.decode(
+            output_ids_sum,
+            skip_special_tokens=True,
+            clean_up_tokenization_spaces=False
+        )
         #### Fin prueba
         progress(0.50, desc="Extracting Keywords")
         if lang == "english":
             return (
                 results["text"],
+                # summ,
+                transcription_summary[0]["summary_text"],
                 formatted_keywords,
                 formatted_sentiment,
                 wordcloud_image,
         else:
             return (
                 results["text"],
+                # summ,
+                summary,
                 formatted_keywords,
                 formatted_sentiment,
                 wordcloud_image,