HarryLee commited on
Commit
88cace9
1 Parent(s): ce27c03

Add app.py and datasets

Browse files
Files changed (3) hide show
  1. Data/tiktok_utf8.csv +0 -0
  2. app.py +35 -0
  3. requirements.txt +1 -0
Data/tiktok_utf8.csv ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from bertopic import BERTopic
3
+ import re
4
+ import pandas as pd
5
+ from sklearn.feature_extraction.text import CountVectorizer
6
+
7
+ st.set_page_config(page_title='eRupt Topic Trendy (e-Commerce x Social Media)', page_icon=None, layout='centered', initial_sidebar_state='auto')
8
+
9
+ st.markdown("<h1 style='text-align: center;'>Topic Trendy</h1>", unsafe_allow_html=True)
10
+
11
+ BerTopic_model = BERTopic.load("my_topics_model")
12
+ input_text = st.text_area("Enter product topic here")
13
+
14
+ topic = pd.read_csv('./Data/tiktok_utf8.csv')
15
+
16
+ timestamps = topic.date.to_list()
17
+ tiktok = topic.text.to_list()
18
+
19
+ vectorizer_model = CountVectorizer(stop_words="english")
20
+ topic_model = BERTopic(verbose=True,vectorizer_model=vectorizer_model)
21
+ topics, probs = topic_model.fit_transform(tiktok)
22
+
23
+ similar_topics, similarity = topic_model.find_topics(input_text, top_n=20)
24
+
25
+ most_similar = similar_topics[0]
26
+ print(similar_topics[0])
27
+ print("Most Similar Topic Info: \n{}".format(topic_model.get_topic(most_similar)))
28
+ print("Similarity Score: {}".format(similarity[0]))
29
+
30
+ answer_as_string = topic_model.get_topic(most_similar)
31
+
32
+ st.text_area("Most Similar Topic List is Here",answer_as_string,key="topic_list")
33
+ st.image('https://freepngimg.com/download/keyboard/6-2-keyboard-png-file.png',use_column_width=True)
34
+ st.markdown("<h6 style='text-align: center; color: #808080;'>Created By LiHE</a></h6>", unsafe_allow_html=True)
35
+
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ bertopic==0.10.0