Spaces:

valurank
/

keyword-extraction-demo

Build error

App Files Files Community

numBery commited on May 21, 2022

Commit

48029cd

•

1 Parent(s): 94ab8d9

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -15

app.py CHANGED Viewed

@@ -20,19 +20,29 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 HfFolder.save_token(st.secrets["hf-auth-token"])
-# Load KeyBert Model
-tmp_model = SentenceTransformer('valurank/MiniLM-L6-Keyword-Extraction', use_auth_token=True)
-kw_extractor = KeyBERT(tmp_model)
-# Load T5 for Paraphrasing
-t5_model = T5ForConditionalGeneration.from_pretrained('valurank/t5-paraphraser', use_auth_token=True)
-t5_tokenizer = T5Tokenizer.from_pretrained('t5-base')
-t5_model = t5_model.to(device)
 def get_keybert_results_with_vectorizer(text, number_of_results=20):
     keywords = kw_extractor.extract_keywords(text, vectorizer=KeyphraseCountVectorizer(), stop_words=None, top_n=number_of_results)
     return keywords
 def t5_paraphraser(text, number_of_results=5):
     text =  "paraphrase: " + text + " </s>"
     max_len = 2048
@@ -56,9 +66,9 @@ def t5_paraphraser(text, number_of_results=5):
     return final_outputs
-  #### Extract Sentences with Keywords -> Paraphrase multiple versions -> Extract Keywords again
 def extract_paraphrased_sentences(article):
     start1 = time.time()
@@ -71,7 +81,7 @@ def extract_paraphrased_sentences(article):
     start2 = time.time()
-    with st.spinner('Extraction Keywords from Paraphrased Target Sentences...'):
         t5_paraphrasing_keywords = []
         for sent in target_sentences:
@@ -81,7 +91,7 @@ def extract_paraphrased_sentences(article):
             t5_keywords = [(word[0], word[1]) for s in t5_keywords for word in s]
             t5_paraphrasing_keywords.extend(t5_keywords)
-    st.success('Keyword Extraction from araphrased Target Sentences finished in {}'.format(time.time() - start2))
     original_keywords_df = pd.DataFrame(original_keywords, columns=['Keyword', 'Score'])
@@ -105,9 +115,9 @@ if doc:
     st.subheader('\nOriginal Keywords Extracted:\n\n')
     st.dataframe(original_keywords_df)
     st.subheader('\nT5 Keywords Extracted:\n\n')
     st.dataframe(t5_keywords_df)
-    st.subheader('\nT5 Unique New Keywords Extracted:\n\n')
-    st.dataframe(unique_keywords_df)

 HfFolder.save_token(st.secrets["hf-auth-token"])
+@st.cache(allow_output_mutation=True)
+def load_model():
+    # Load KeyBert Model
+    tmp_model = SentenceTransformer('valurank/MiniLM-L6-Keyword-Extraction', use_auth_token=True)
+    kw_extractor = KeyBERT(tmp_model)
+    # Load T5 for Paraphrasing
+    t5_model = T5ForConditionalGeneration.from_pretrained('valurank/t5-paraphraser', use_auth_token=True)
+    t5_tokenizer = T5Tokenizer.from_pretrained('t5-base')
+    t5_model = t5_model.to(device)
+    return kw_extractor, t5_model, t5_tokenizer
+kw_extractor, t5_model, t5_tokenizer = load_model()
+@st.cache()
 def get_keybert_results_with_vectorizer(text, number_of_results=20):
     keywords = kw_extractor.extract_keywords(text, vectorizer=KeyphraseCountVectorizer(), stop_words=None, top_n=number_of_results)
     return keywords
+@st.cache()
 def t5_paraphraser(text, number_of_results=5):
     text =  "paraphrase: " + text + " </s>"
     max_len = 2048
     return final_outputs
+#### Extract Sentences with Keywords -> Paraphrase multiple versions -> Extract Keywords again
+@st.cache()
 def extract_paraphrased_sentences(article):
     start1 = time.time()
     start2 = time.time()
+    with st.spinner('Extracting Keywords from Paraphrased Target Sentences...'):
         t5_paraphrasing_keywords = []
         for sent in target_sentences:
             t5_keywords = [(word[0], word[1]) for s in t5_keywords for word in s]
             t5_paraphrasing_keywords.extend(t5_keywords)
+    st.success('Keyword Extraction from Paraphrased Target Sentences finished in {}'.format(time.time() - start2))
     original_keywords_df = pd.DataFrame(original_keywords, columns=['Keyword', 'Score'])
     st.subheader('\nOriginal Keywords Extracted:\n\n')
     st.dataframe(original_keywords_df)
+    st.subheader('\nT5 Unique New Keywords Extracted:\n\n')
+    st.dataframe(unique_keywords_df)
     st.subheader('\nT5 Keywords Extracted:\n\n')
     st.dataframe(t5_keywords_df)