NazmusAshrafi commited on
Commit
57f0fa1
1 Parent(s): c303720

updated with larger model

Browse files
Files changed (2) hide show
  1. app.py +24 -11
  2. instructions.txt +7 -0
app.py CHANGED
@@ -6,7 +6,7 @@ from transformers import pipeline, AutoModelForSequenceClassification, AutoToken
6
  def get_model():
7
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
8
  pulled_model = AutoModelForSequenceClassification.from_pretrained(
9
- "NazmusAshrafi/stock_twitter_topic_Bert")
10
  return tokenizer, pulled_model
11
 
12
 
@@ -21,21 +21,34 @@ classifier = pipeline("sentiment-analysis",
21
 
22
  st.title("Find the topic of a stock related tweets")
23
  st.subheader(
24
- 'This model can predict 3 topics - :blue[Investment decision], :green[User Volume], :orange[Market crisis] - Entering a topic related tweet will yeild the best results')
25
-
 
 
 
 
 
 
 
 
 
 
26
 
 
 
 
 
27
  st.markdown(
28
- ':blue[Investment decision example: "AAP Dow Theory = Will go down in future, Do not buy"]')
29
  st.markdown(
30
- ':green[User Volume example: "Loosing a lot of volume, user are leaving this platform"]')
31
  st.markdown(
32
- ':orange[Market crisis example: "Market is in danger because of corona virus"]')
 
 
33
 
34
 
35
  st.subheader("", divider='rainbow')
36
- # 0 = Investment decision
37
- # 1 = User volume
38
- # 2 = Market crisis
39
 
40
 
41
  user_input = st.text_area("Enter a tweet about a stock")
@@ -50,5 +63,5 @@ if user_input and button:
50
  st.write("Confidence Score: ", classifier(user_input)[0]['score'])
51
 
52
 
53
- st.markdown(
54
- 'Note: Model may be predicting "Investment decision" way too often, this is because of the data imbalance. The model was trained on 3 topics, but the data was not balanced. The model was trained on a dataset where a large portion of the data was on "Investment decision" topic. This is why the model is biased towards "Investment decision" topic. This is a flaw of this model, and it can be fixed by training the model on a balanced dataset.')
 
6
  def get_model():
7
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
8
  pulled_model = AutoModelForSequenceClassification.from_pretrained(
9
+ "NazmusAshrafi/large_dataset_stock_twitter_topic_Bert")
10
  return tokenizer, pulled_model
11
 
12
 
 
21
 
22
  st.title("Find the topic of a stock related tweets")
23
  st.subheader(
24
+ 'This model can predict 5 topics - :blue[Tesla Investment News], :green[Technology stock watchlist], :orange[Esports News], :blue[Apple Market Updates], :green[Amazon Updates] - Entering a topic related tweet will yeild the best results')
25
+
26
+ # elif topic == 0:
27
+ # return 'Tesla Investment News'
28
+ # elif topic == 1:
29
+ # return 'Technology stock watchlist'
30
+ # elif topic == 2:
31
+ # return 'Esports News'
32
+ # elif topic == 3:
33
+ # return 'Apple Market Updates'
34
+ # elif topic == 4:
35
+ # return 'Amazon Updates'
36
 
37
+ st.subheader(
38
+ 'Examples:')
39
+ st.markdown(
40
+ ':blue[Tesla Investment News: Electric cars stock going to go up in future, buy now"]')
41
  st.markdown(
42
+ ':green[Technology stock watchlist: "Keep a look out for that stock, its really good!"]')
43
  st.markdown(
44
+ ':orange[Esports News: "Ninja is going to play in the next tournament, the best is back playing Fortnite."]')
45
  st.markdown(
46
+ ':blue[Apple Market Updates: "$AAPL released a new phone but the looks never change much. Buy anyway!"]')
47
+ st.markdown(
48
+ ':green[Amazon Updates: "Black Fridays at Amazon, hail Jeff Bezos"]')
49
 
50
 
51
  st.subheader("", divider='rainbow')
 
 
 
52
 
53
 
54
  user_input = st.text_area("Enter a tweet about a stock")
 
63
  st.write("Confidence Score: ", classifier(user_input)[0]['score'])
64
 
65
 
66
+ # st.markdown(
67
+ # 'Note: Model may be predicting "Investment decision" way too often, this is because of the data imbalance. The model was trained on 3 topics, but the data was not balanced. The model was trained on a dataset where a large portion of the data was on "Investment decision" topic. This is why the model is biased towards "Investment decision" topic. This is a flaw of this model, and it can be fixed by training the model on a balanced dataset.')
instructions.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Run using:
2
+ streamlit run app.py
3
+
4
+ Push using:
5
+ git add .
6
+ git comit -m "New feature added"
7
+ git push