asoria HF staff commited on
Commit
5a8d02c
·
1 Parent(s): b5ec742

First try: flan-t5-base for representation model

Browse files
Files changed (1) hide show
  1. app.py +8 -5
app.py CHANGED
@@ -6,17 +6,18 @@ import duckdb
6
  import numpy as np
7
  import requests
8
 
 
9
  from torch import cuda
10
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
11
  from bertopic import BERTopic
12
  from bertopic.representation import KeyBERTInspired
 
13
  from cuml.manifold import UMAP
14
  from cuml.cluster import HDBSCAN
15
  from huggingface_hub import HfApi
16
  from sklearn.feature_extraction.text import CountVectorizer
17
  from sentence_transformers import SentenceTransformer
18
-
19
- from dotenv import load_dotenv
20
 
21
  # These imports at the end because of torch/datamapplot issue in Zero GPU
22
  # import spaces
@@ -51,10 +52,12 @@ CHUNK_SIZE = 10_000
51
 
52
  session = requests.Session()
53
  sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
54
- keybert = KeyBERTInspired()
55
- vectorizer_model = CountVectorizer(stop_words="english")
56
 
57
- representation_model = KeyBERTInspired()
 
 
 
 
58
 
59
  global_topic_model = None
60
 
 
6
  import numpy as np
7
  import requests
8
 
9
+ from dotenv import load_dotenv
10
  from torch import cuda
11
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
12
  from bertopic import BERTopic
13
  from bertopic.representation import KeyBERTInspired
14
+ from bertopic.representation import TextGeneration
15
  from cuml.manifold import UMAP
16
  from cuml.cluster import HDBSCAN
17
  from huggingface_hub import HfApi
18
  from sklearn.feature_extraction.text import CountVectorizer
19
  from sentence_transformers import SentenceTransformer
20
+ from transformers import pipeline
 
21
 
22
  # These imports at the end because of torch/datamapplot issue in Zero GPU
23
  # import spaces
 
52
 
53
  session = requests.Session()
54
  sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
 
 
55
 
56
+ prompt = "I have a topic described by the following keywords: [KEYWORDS]. Based on the previous keywords, what is this topic about?"
57
+ generator = pipeline("text2text-generation", model="google/flan-t5-base")
58
+ representation_model = TextGeneration(generator)
59
+
60
+ vectorizer_model = CountVectorizer(stop_words="english")
61
 
62
  global_topic_model = None
63