Spaces:

nickmuchi
/

Earnings-Call-Analysis-Whisperer

Running

nickmuchi commited on May 12, 2023

Commit

45412ff

•

1 Parent(s): f1d1353

Update functions.py

Files changed (1) hide show

functions.py CHANGED Viewed

@@ -323,7 +323,7 @@ def sentiment_pipe(earnings_text):
     return earnings_sentiment, earnings_sentences
 @st.cache_data
-def chunk_and_preprocess_text(text, model_name):
     '''Chunk and preprocess text for summarization'''
@@ -341,22 +341,22 @@ def chunk_and_preprocess_text(text, model_name):
     combined_length = len(tokenizer.tokenize(sentence)) + length # add the no. of sentence tokens to the length counter
     if combined_length  <= tokenizer.max_len_single_sentence: # if it doesn't exceed
-      chunk += sentence + " " # add the sentence to the chunk
-      length = combined_length # update the length counter
       # if it is the last sentence
-      if count == len(sentences) - 1:
-        chunks.append(chunk) # save the chunk
     else:
-      chunks.append(chunk) # save the chunk
-      # reset
-      length = 0
-      chunk = ""
-      # take care of the overflow sentence
-      chunk += sentence + " "
-      length = len(tokenizer.tokenize(sentence))
     return chunks

     return earnings_sentiment, earnings_sentences
 @st.cache_data
+def chunk_and_preprocess_text(text, model_name= 'philschmid/flan-t5-base-samsum'):
     '''Chunk and preprocess text for summarization'''
     combined_length = len(tokenizer.tokenize(sentence)) + length # add the no. of sentence tokens to the length counter
     if combined_length  <= tokenizer.max_len_single_sentence: # if it doesn't exceed
+        chunk += sentence + " " # add the sentence to the chunk
+        length = combined_length # update the length counter
       # if it is the last sentence
+        if count == len(sentences) - 1:
+            chunks.append(chunk) # save the chunk
     else:
+        chunks.append(chunk) # save the chunk
+        # reset
+        length = 0
+        chunk = ""
+        # take care of the overflow sentence
+        chunk += sentence + " "
+        length = len(tokenizer.tokenize(sentence))
     return chunks