Spaces:

vnemala
/

StreamlitHeatmapAndCluster

Runtime error

App Files Files Community

StreamlitHeatmapAndCluster / app.py

vnemala

Create new file

35710be over 2 years ago

raw

history blame contribute delete

2.68 kB

	import streamlit as st
	import nltk
	from transformers import pipeline
	from sentence_transformers import SentenceTransformer
	from scipy.spatial.distance import cosine
	import numpy as np
	import seaborn as sns
	import matplotlib.pyplot as plt
	from sklearn.cluster import KMeans
	import tensorflow as tf
	import tensorflow_hub as hub


	def cluster_examples(messages, embed, nc=3):
	km = KMeans(
	n_clusters=nc, init='random',
	n_init=10, max_iter=300,
	tol=1e-04, random_state=0
	)
	km = km.fit_predict(embed)
	for n in range(nc):
	idxs = [i for i in range(len(km)) if km[i] == n]
	ms = [messages[i] for i in idxs]
	st.markdown ("CLUSTER : %d"%n)
	for m in ms:
	st.markdown (m)


	def plot_heatmap(labels, heatmap, rotation=90):
	sns.set(font_scale=1.2)
	fig, ax = plt.subplots()
	g = sns.heatmap(
	heatmap,
	xticklabels=labels,
	yticklabels=labels,
	vmin=-1,
	vmax=1,
	cmap="coolwarm")
	g.set_xticklabels(labels, rotation=rotation)
	g.set_title("Textual Similarity")
	st.pyplot(fig)

	# Streamlit text boxes
	text = st.text_area('Enter sentences:', value="Behavior right this is a kind of Heisenberg uncertainty principle situation if I told you, then you behave differently. What would be the impressive thing is you have talked about winning a nobel prize in a system winning a nobel prize. Adjusting it and then making your own. That is when I fell in love with computers. I realized that they were a very magical device. Can go to sleep come back the next day and it is solved. You know that feels magical to me.")

	nc = st.slider('Select a number of clusters:', min_value=1, max_value=15, value=3)

	model_type = st.radio("Choose model:", ('Sentence Transformer', 'Universal Sentence Encoder'), index=0)

	# Model setup
	if model_type == "Sentence Transformer":
	model = SentenceTransformer('paraphrase-distilroberta-base-v1')
	elif model_type == "Universal Sentence Encoder":
	model_url = "https://tfhub.dev/google/universal-sentence-encoder-large/5"
	model = hub.load(model_url)

	nltk.download('punkt')

	# Run model
	if text:
	sentences = nltk.tokenize.sent_tokenize(text)
	if model_type == "Sentence Transformer":
	embed = model.encode(sentences)
	elif model_type == "Universal Sentence Encoder":
	embed = model(sentences).numpy()
	sim = np.zeros([len(embed), len(embed)])
	for i,em in enumerate(embed):
	for j,ea in enumerate(embed):
	sim[i][j] = 1.0-cosine(em,ea)
	st.subheader("Similarity Heatmap")
	plot_heatmap(sentences, sim)
	st.subheader("Results from K-Means Clustering")
	cluster_examples(sentences, embed, nc)