numBery commited on
Commit
48029cd
1 Parent(s): 94ab8d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -15
app.py CHANGED
@@ -20,19 +20,29 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20
 
21
  HfFolder.save_token(st.secrets["hf-auth-token"])
22
 
23
- # Load KeyBert Model
24
- tmp_model = SentenceTransformer('valurank/MiniLM-L6-Keyword-Extraction', use_auth_token=True)
25
- kw_extractor = KeyBERT(tmp_model)
26
 
27
- # Load T5 for Paraphrasing
28
- t5_model = T5ForConditionalGeneration.from_pretrained('valurank/t5-paraphraser', use_auth_token=True)
29
- t5_tokenizer = T5Tokenizer.from_pretrained('t5-base')
30
- t5_model = t5_model.to(device)
 
 
 
 
 
 
 
 
 
 
31
 
 
32
  def get_keybert_results_with_vectorizer(text, number_of_results=20):
33
  keywords = kw_extractor.extract_keywords(text, vectorizer=KeyphraseCountVectorizer(), stop_words=None, top_n=number_of_results)
34
  return keywords
35
 
 
 
36
  def t5_paraphraser(text, number_of_results=5):
37
  text = "paraphrase: " + text + " </s>"
38
  max_len = 2048
@@ -56,9 +66,9 @@ def t5_paraphraser(text, number_of_results=5):
56
 
57
  return final_outputs
58
 
59
-
60
- #### Extract Sentences with Keywords -> Paraphrase multiple versions -> Extract Keywords again
61
-
62
  def extract_paraphrased_sentences(article):
63
 
64
  start1 = time.time()
@@ -71,7 +81,7 @@ def extract_paraphrased_sentences(article):
71
 
72
 
73
  start2 = time.time()
74
- with st.spinner('Extraction Keywords from Paraphrased Target Sentences...'):
75
  t5_paraphrasing_keywords = []
76
 
77
  for sent in target_sentences:
@@ -81,7 +91,7 @@ def extract_paraphrased_sentences(article):
81
  t5_keywords = [(word[0], word[1]) for s in t5_keywords for word in s]
82
 
83
  t5_paraphrasing_keywords.extend(t5_keywords)
84
- st.success('Keyword Extraction from araphrased Target Sentences finished in {}'.format(time.time() - start2))
85
 
86
  original_keywords_df = pd.DataFrame(original_keywords, columns=['Keyword', 'Score'])
87
 
@@ -105,9 +115,9 @@ if doc:
105
  st.subheader('\nOriginal Keywords Extracted:\n\n')
106
  st.dataframe(original_keywords_df)
107
 
 
 
 
108
  st.subheader('\nT5 Keywords Extracted:\n\n')
109
  st.dataframe(t5_keywords_df)
110
 
111
- st.subheader('\nT5 Unique New Keywords Extracted:\n\n')
112
- st.dataframe(unique_keywords_df)
113
-
 
20
 
21
  HfFolder.save_token(st.secrets["hf-auth-token"])
22
 
 
 
 
23
 
24
+ @st.cache(allow_output_mutation=True)
25
+ def load_model():
26
+ # Load KeyBert Model
27
+ tmp_model = SentenceTransformer('valurank/MiniLM-L6-Keyword-Extraction', use_auth_token=True)
28
+ kw_extractor = KeyBERT(tmp_model)
29
+
30
+ # Load T5 for Paraphrasing
31
+ t5_model = T5ForConditionalGeneration.from_pretrained('valurank/t5-paraphraser', use_auth_token=True)
32
+ t5_tokenizer = T5Tokenizer.from_pretrained('t5-base')
33
+ t5_model = t5_model.to(device)
34
+ return kw_extractor, t5_model, t5_tokenizer
35
+
36
+ kw_extractor, t5_model, t5_tokenizer = load_model()
37
+
38
 
39
+ @st.cache()
40
  def get_keybert_results_with_vectorizer(text, number_of_results=20):
41
  keywords = kw_extractor.extract_keywords(text, vectorizer=KeyphraseCountVectorizer(), stop_words=None, top_n=number_of_results)
42
  return keywords
43
 
44
+
45
+ @st.cache()
46
  def t5_paraphraser(text, number_of_results=5):
47
  text = "paraphrase: " + text + " </s>"
48
  max_len = 2048
 
66
 
67
  return final_outputs
68
 
69
+
70
+ #### Extract Sentences with Keywords -> Paraphrase multiple versions -> Extract Keywords again
71
+ @st.cache()
72
  def extract_paraphrased_sentences(article):
73
 
74
  start1 = time.time()
 
81
 
82
 
83
  start2 = time.time()
84
+ with st.spinner('Extracting Keywords from Paraphrased Target Sentences...'):
85
  t5_paraphrasing_keywords = []
86
 
87
  for sent in target_sentences:
 
91
  t5_keywords = [(word[0], word[1]) for s in t5_keywords for word in s]
92
 
93
  t5_paraphrasing_keywords.extend(t5_keywords)
94
+ st.success('Keyword Extraction from Paraphrased Target Sentences finished in {}'.format(time.time() - start2))
95
 
96
  original_keywords_df = pd.DataFrame(original_keywords, columns=['Keyword', 'Score'])
97
 
 
115
  st.subheader('\nOriginal Keywords Extracted:\n\n')
116
  st.dataframe(original_keywords_df)
117
 
118
+ st.subheader('\nT5 Unique New Keywords Extracted:\n\n')
119
+ st.dataframe(unique_keywords_df)
120
+
121
  st.subheader('\nT5 Keywords Extracted:\n\n')
122
  st.dataframe(t5_keywords_df)
123