Update functions.py
Browse files- functions.py +4 -5
functions.py
CHANGED
@@ -81,7 +81,7 @@ def inference(link, upload, _asr_model):
|
|
81 |
def sentiment_pipe(earnings_text):
|
82 |
'''Determine the sentiment of the text'''
|
83 |
|
84 |
-
earnings_sentences = chunk_long_text(earnings_text,
|
85 |
earnings_sentiment = sent_pipe(earnings_sentences)
|
86 |
|
87 |
return earnings_sentiment, earnings_sentences
|
@@ -99,12 +99,11 @@ def clean_text(text):
|
|
99 |
return text
|
100 |
|
101 |
@st.experimental_memo(suppress_st_warning=True)
|
102 |
-
def chunk_long_text(text,threshold,window_size=3):
|
103 |
'''Preprocess text and chunk for semantic search and sentiment analysis'''
|
104 |
|
105 |
#Convert cleaned text into sentences
|
106 |
sentences = sent_tokenize(text)
|
107 |
-
|
108 |
out = []
|
109 |
|
110 |
#Limit the length of each sentence to a threshold
|
@@ -121,12 +120,12 @@ def chunk_long_text(text,threshold,window_size=3):
|
|
121 |
|
122 |
#Combine sentences into a window of size window_size
|
123 |
for paragraph in [out]:
|
124 |
-
for start_idx in range(0, len(paragraph),
|
125 |
end_idx = min(start_idx+window_size, len(paragraph))
|
126 |
passages.append(" ".join(paragraph[start_idx:end_idx]))
|
127 |
|
128 |
return passages
|
129 |
-
|
130 |
@st.experimental_memo(suppress_st_warning=True)
|
131 |
def chunk_and_preprocess_text(text,thresh=500):
|
132 |
|
|
|
81 |
def sentiment_pipe(earnings_text):
|
82 |
'''Determine the sentiment of the text'''
|
83 |
|
84 |
+
earnings_sentences = chunk_long_text(earnings_text,150,1,1)
|
85 |
earnings_sentiment = sent_pipe(earnings_sentences)
|
86 |
|
87 |
return earnings_sentiment, earnings_sentences
|
|
|
99 |
return text
|
100 |
|
101 |
@st.experimental_memo(suppress_st_warning=True)
|
102 |
+
def chunk_long_text(text,threshold,window_size=3,stride=2):
|
103 |
'''Preprocess text and chunk for semantic search and sentiment analysis'''
|
104 |
|
105 |
#Convert cleaned text into sentences
|
106 |
sentences = sent_tokenize(text)
|
|
|
107 |
out = []
|
108 |
|
109 |
#Limit the length of each sentence to a threshold
|
|
|
120 |
|
121 |
#Combine sentences into a window of size window_size
|
122 |
for paragraph in [out]:
|
123 |
+
for start_idx in range(0, len(paragraph), stride):
|
124 |
end_idx = min(start_idx+window_size, len(paragraph))
|
125 |
passages.append(" ".join(paragraph[start_idx:end_idx]))
|
126 |
|
127 |
return passages
|
128 |
+
|
129 |
@st.experimental_memo(suppress_st_warning=True)
|
130 |
def chunk_and_preprocess_text(text,thresh=500):
|
131 |
|