Spaces:
Running
on
T4
Running
on
T4
import spacy | |
import os | |
# Download the model if it is not already present | |
if not spacy.util.is_package("en_core_web_sm"): | |
spacy.cli.download("en_core_web_sm") | |
nlp = spacy.load("en_core_web_sm") | |
# It would be possible to remove bolding for stopwords without removing them from the query, | |
# but that would require a java plugin which we didn't want to complicate this sample app with. | |
def filter(text): | |
doc = nlp(text) | |
tokens = [token.text for token in doc if not token.is_stop] | |
if len(tokens) == 0: | |
# if we remove all the words we don't have a query at all, so use the original | |
return text | |
return " ".join(tokens) | |