|
import spacy
|
|
import unicodedata
|
|
import re
|
|
import streamlit as st
|
|
from sentence_transformers import SentenceTransformer, util
|
|
|
|
|
|
def clean_text(text):
|
|
|
|
text = unicodedata.normalize('NFKC', text)
|
|
|
|
|
|
text = text.replace('\u200a', ' ').replace('\u00a0', ' ')
|
|
|
|
return text
|
|
|
|
|
|
model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
|
|
|
|
with open('long-story.txt', encoding='utf-8') as f:
|
|
text = f.read()
|
|
|
|
cleaned_text = clean_text(text)
|
|
|
|
|
|
pattern = r"\n\n([A-Z\s]+)\n([^\n]+(?:\n[^\n]+)*)"
|
|
|
|
|
|
sections = re.findall(pattern, cleaned_text)
|
|
|
|
|
|
sections_dict = {}
|
|
embeddings_dict = {}
|
|
|
|
for title, content in sections:
|
|
|
|
cleaned_title = title.strip().lower()
|
|
cleaned_content = content.strip()
|
|
|
|
|
|
content_embedding = model.encode(cleaned_content, convert_to_tensor=True)
|
|
|
|
sections_dict[cleaned_title] = cleaned_content
|
|
embeddings_dict[cleaned_title] = content_embedding
|
|
|
|
|
|
def get_section(user_input):
|
|
|
|
user_input = user_input.lower()
|
|
|
|
|
|
user_input_embedding = model.encode(user_input, convert_to_tensor=True)
|
|
|
|
best_match = None
|
|
best_score = -1
|
|
|
|
|
|
for title, section_embedding in embeddings_dict.items():
|
|
cosine_score = util.pytorch_cos_sim(user_input_embedding, section_embedding)[0][0].item()
|
|
|
|
if cosine_score > best_score:
|
|
best_score = cosine_score
|
|
best_match = title
|
|
|
|
|
|
if best_score > 0.5:
|
|
return sections_dict[best_match]
|
|
else:
|
|
return "No matching section found."
|
|
|
|
|
|
def chatbot_ui():
|
|
st.title("Text-Based Chatbot")
|
|
|
|
|
|
st.write("Ask the chatbot for specific sections from the document by typing keywords like 'productivity', 'life hacks', 'communication skills', 'skill development', 'personal development', 'goal setting' ")
|
|
|
|
|
|
user_input = st.text_input("Enter a keyword", "")
|
|
|
|
|
|
if user_input:
|
|
section_content = get_section(user_input)
|
|
st.write(section_content)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
chatbot_ui()
|
|
|