import streamlit as st import pandas as pd from sentence_transformers import SentenceTransformer,util import torch import numpy as np from os.path import exists st.sidebar.image("./NarrativaLogoBlanco.png") topK = st.sidebar.slider("Number of results: ", 1, 20, 5, 1) st.write("# Semantic News Search 🔍📰") model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu') df = pd.read_csv('financial-sentences.csv') sentences = df['sentences'].to_list() # check if embedding is available if exists('embeddings.npy'): corpus_embeddings = np.load('embeddings.npy') else: corpus_embeddings = model.encode(sentences, batch_size=23, show_progress_bar=False, convert_to_tensor=True) np.save('embeddings.npy', np.array(corpus_embeddings.cpu())) sentence = st.text_input('Enter a sentence:') if sentence: embedding = model.encode(sentences=[sentence], convert_to_tensor=True) cosine_scores = util.cos_sim(embedding, corpus_embeddings)[0] top_results = torch.topk(cosine_scores, k=topK) st.write() st.write(" **Query:**", sentence) st.write(f"\n **Top {topK} most similar sentences in corpus:**\n") for score, idx in zip(top_results[0], top_results[1]): st.write(sentences[idx]) st.write(f"*Score:* {score:.4f}") st.write() st.write()