File size: 1,127 Bytes
7fafac4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from nltk.tokenize import sent_tokenize

# Function to map keywords to sentences with customizable context window size
def map_keywords_to_sentences(text, keywords, context_window_size):
    sentences = sent_tokenize(text)
    keyword_sentence_mapping = {}
    print(f"\n\nSentences: {sentences}\n\n")
    for keyword in keywords:
        for i, sentence in enumerate(sentences):
            if keyword in sentence:
                # Combine current sentence with surrounding sentences for context
                # start = max(0, i - context_window_size)
                # end = min(len(sentences), i + context_window_size + 1)
                start = max(0,i - context_window_size)
                context_sentenses = sentences[start:i+1]
                context = ' '.join(context_sentenses)
                # context = ' '.join(sentences[start:end])
                if keyword not in keyword_sentence_mapping:
                    keyword_sentence_mapping[keyword] = context
                else:
                    keyword_sentence_mapping[keyword] += ' ' + context
    return keyword_sentence_mapping