Spaces:
Sleeping
Sleeping
File size: 1,127 Bytes
7fafac4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
from nltk.tokenize import sent_tokenize
# Function to map keywords to sentences with customizable context window size
def map_keywords_to_sentences(text, keywords, context_window_size):
sentences = sent_tokenize(text)
keyword_sentence_mapping = {}
print(f"\n\nSentences: {sentences}\n\n")
for keyword in keywords:
for i, sentence in enumerate(sentences):
if keyword in sentence:
# Combine current sentence with surrounding sentences for context
# start = max(0, i - context_window_size)
# end = min(len(sentences), i + context_window_size + 1)
start = max(0,i - context_window_size)
context_sentenses = sentences[start:i+1]
context = ' '.join(context_sentenses)
# context = ' '.join(sentences[start:end])
if keyword not in keyword_sentence_mapping:
keyword_sentence_mapping[keyword] = context
else:
keyword_sentence_mapping[keyword] += ' ' + context
return keyword_sentence_mapping |