asoria HF staff commited on
Commit
e545887
1 Parent(s): 6cb619c

Move spaces.GPU to generate_topics

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -15,8 +15,6 @@ from bertopic import BERTopic
15
  from bertopic.representation import KeyBERTInspired
16
  from bertopic.representation import TextGeneration
17
 
18
- from cuml.manifold import UMAP
19
- from cuml.cluster import HDBSCAN
20
 
21
  from huggingface_hub import HfApi, SpaceCard
22
  from sklearn.feature_extraction.text import CountVectorizer
@@ -126,7 +124,7 @@ def get_docs_from_parquet(parquet_urls, column, offset, limit):
126
  return df[column].tolist()
127
 
128
 
129
- @spaces.GPU
130
  def calculate_embeddings(docs):
131
  return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
132
 
@@ -137,8 +135,11 @@ def calculate_n_neighbors_and_components(n_rows):
137
  return n_neighbors, n_components
138
 
139
 
140
- @spaces.GPU
141
  def fit_model(docs, embeddings, n_neighbors, n_components):
 
 
 
142
  umap_model = UMAP(
143
  n_neighbors=n_neighbors,
144
  n_components=n_components,
@@ -234,7 +235,10 @@ datasets:
234
  return repo_id
235
 
236
 
 
237
  def generate_topics(dataset, config, split, column, nested_column, plot_type):
 
 
238
  logging.info(
239
  f"Generating topics for {dataset} with config {config} {split} {column} {nested_column}"
240
  )
 
15
  from bertopic.representation import KeyBERTInspired
16
  from bertopic.representation import TextGeneration
17
 
 
 
18
 
19
  from huggingface_hub import HfApi, SpaceCard
20
  from sklearn.feature_extraction.text import CountVectorizer
 
124
  return df[column].tolist()
125
 
126
 
127
+ # @spaces.GPU
128
  def calculate_embeddings(docs):
129
  return sentence_model.encode(docs, show_progress_bar=True, batch_size=32)
130
 
 
135
  return n_neighbors, n_components
136
 
137
 
138
+ # @spaces.GPU
139
  def fit_model(docs, embeddings, n_neighbors, n_components):
140
+ from cuml.manifold import UMAP
141
+ from cuml.cluster import HDBSCAN
142
+
143
  umap_model = UMAP(
144
  n_neighbors=n_neighbors,
145
  n_components=n_components,
 
235
  return repo_id
236
 
237
 
238
+ @spaces.GPU(duration=600)
239
  def generate_topics(dataset, config, split, column, nested_column, plot_type):
240
+ from cuml.manifold import UMAP
241
+
242
  logging.info(
243
  f"Generating topics for {dataset} with config {config} {split} {column} {nested_column}"
244
  )