Spaces:
Sleeping
Sleeping
Move spaces.GPU to generate_topics
Browse files
app.py
CHANGED
@@ -15,8 +15,6 @@ from bertopic import BERTopic
|
|
15 |
from bertopic.representation import KeyBERTInspired
|
16 |
from bertopic.representation import TextGeneration
|
17 |
|
18 |
-
from cuml.manifold import UMAP
|
19 |
-
from cuml.cluster import HDBSCAN
|
20 |
|
21 |
from huggingface_hub import HfApi, SpaceCard
|
22 |
from sklearn.feature_extraction.text import CountVectorizer
|
@@ -126,7 +124,7 @@ def get_docs_from_parquet(parquet_urls, column, offset, limit):
|
|
126 |
return df[column].tolist()
|
127 |
|
128 |
|
129 |
-
@spaces.GPU
|
130 |
def calculate_embeddings(docs):
|
131 |
return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
|
132 |
|
@@ -137,8 +135,11 @@ def calculate_n_neighbors_and_components(n_rows):
|
|
137 |
return n_neighbors, n_components
|
138 |
|
139 |
|
140 |
-
@spaces.GPU
|
141 |
def fit_model(docs, embeddings, n_neighbors, n_components):
|
|
|
|
|
|
|
142 |
umap_model = UMAP(
|
143 |
n_neighbors=n_neighbors,
|
144 |
n_components=n_components,
|
@@ -234,7 +235,10 @@ datasets:
|
|
234 |
return repo_id
|
235 |
|
236 |
|
|
|
237 |
def generate_topics(dataset, config, split, column, nested_column, plot_type):
|
|
|
|
|
238 |
logging.info(
|
239 |
f"Generating topics for {dataset} with config {config} {split} {column} {nested_column}"
|
240 |
)
|
|
|
15 |
from bertopic.representation import KeyBERTInspired
|
16 |
from bertopic.representation import TextGeneration
|
17 |
|
|
|
|
|
18 |
|
19 |
from huggingface_hub import HfApi, SpaceCard
|
20 |
from sklearn.feature_extraction.text import CountVectorizer
|
|
|
124 |
return df[column].tolist()
|
125 |
|
126 |
|
127 |
+
# @spaces.GPU
|
128 |
def calculate_embeddings(docs):
|
129 |
return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
|
130 |
|
|
|
135 |
return n_neighbors, n_components
|
136 |
|
137 |
|
138 |
+
# @spaces.GPU
|
139 |
def fit_model(docs, embeddings, n_neighbors, n_components):
|
140 |
+
from cuml.manifold import UMAP
|
141 |
+
from cuml.cluster import HDBSCAN
|
142 |
+
|
143 |
umap_model = UMAP(
|
144 |
n_neighbors=n_neighbors,
|
145 |
n_components=n_components,
|
|
|
235 |
return repo_id
|
236 |
|
237 |
|
238 |
+
@spaces.GPU(duration=600)
|
239 |
def generate_topics(dataset, config, split, column, nested_column, plot_type):
|
240 |
+
from cuml.manifold import UMAP
|
241 |
+
|
242 |
logging.info(
|
243 |
f"Generating topics for {dataset} with config {config} {split} {column} {nested_column}"
|
244 |
)
|