Spaces:
Running
Running
Merge branch 'master' of https://github.com/Tymec/projekt-psi
Browse files- README.md +4 -0
- app/model/__init__.py +0 -0
- app/model/base.py +49 -0
- app/model/tfid_lr.py +35 -0
README.md
CHANGED
@@ -12,6 +12,10 @@ Sentiment Analysis
|
|
12 |
- [IMDb](https://www.kaggle.com/datasets/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews)
|
13 |
- [Amazon Reviews](https://www.kaggle.com/datasets/bittlingmayer/amazonreviews)
|
14 |
|
|
|
|
|
|
|
|
|
15 |
### TODO
|
16 |
- [ ] CLI using `click` (commands: predict, train, evaluate) with settings set via flags or environment variables
|
17 |
- [ ] GUI using `gradio` (tabs: predict, train, evaluate, compare, settings)
|
|
|
12 |
- [IMDb](https://www.kaggle.com/datasets/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews)
|
13 |
- [Amazon Reviews](https://www.kaggle.com/datasets/bittlingmayer/amazonreviews)
|
14 |
|
15 |
+
### Required tools
|
16 |
+
- `just`
|
17 |
+
- `poetry`
|
18 |
+
|
19 |
### TODO
|
20 |
- [ ] CLI using `click` (commands: predict, train, evaluate) with settings set via flags or environment variables
|
21 |
- [ ] GUI using `gradio` (tabs: predict, train, evaluate, compare, settings)
|
app/model/__init__.py
ADDED
File without changes
|
app/model/base.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
from abc import ABC, abstractmethod
|
4 |
+
from typing import TYPE_CHECKING
|
5 |
+
|
6 |
+
import joblib
|
7 |
+
|
8 |
+
if TYPE_CHECKING:
|
9 |
+
from pathlib import Path
|
10 |
+
|
11 |
+
from sklearn.pipeline import Pipeline
|
12 |
+
|
13 |
+
|
14 |
+
class Model(ABC):
|
15 |
+
"""Base class for all models"""
|
16 |
+
|
17 |
+
@property
|
18 |
+
@abstractmethod
|
19 |
+
def pipeline(self) -> Pipeline:
|
20 |
+
"""Pipeline used for the model"""
|
21 |
+
...
|
22 |
+
|
23 |
+
@property
|
24 |
+
@abstractmethod
|
25 |
+
def description(self) -> str:
|
26 |
+
"""Description of the architecture"""
|
27 |
+
...
|
28 |
+
|
29 |
+
@abstractmethod
|
30 |
+
def _predict(self, text: str) -> int:
|
31 |
+
"""Predict the sentiment of the given text"""
|
32 |
+
...
|
33 |
+
|
34 |
+
@staticmethod
|
35 |
+
def from_file(path: Path) -> Model:
|
36 |
+
"""Load the model from the given file"""
|
37 |
+
return joblib.load(path)
|
38 |
+
|
39 |
+
def to_file(self, path: Path) -> None:
|
40 |
+
"""Save the model to the given file"""
|
41 |
+
joblib.dump(self, path)
|
42 |
+
|
43 |
+
def predict(self, text: str) -> int:
|
44 |
+
"""Perform sentiment analysis on the given text"""
|
45 |
+
return self._predict(text)
|
46 |
+
|
47 |
+
def train(self, x: list[str], y: list[int]) -> None:
|
48 |
+
"""Train the model on the given data"""
|
49 |
+
self.pipeline.fit(x, y)
|
app/model/tfid_lr.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
|
4 |
+
from sklearn.linear_model import LogisticRegression
|
5 |
+
from sklearn.pipeline import Pipeline
|
6 |
+
|
7 |
+
from .base import Model
|
8 |
+
|
9 |
+
|
10 |
+
class TfidfLR(Model):
|
11 |
+
"""Sentiment analysis model using TF-IDF and Logistic Regression"""
|
12 |
+
|
13 |
+
def __init__(self):
|
14 |
+
self._pipeline = Pipeline(
|
15 |
+
[
|
16 |
+
(
|
17 |
+
"vectorize",
|
18 |
+
CountVectorizer(stop_words="english", ngram_range=(1, 2), max_features=10000),
|
19 |
+
),
|
20 |
+
("tfidf", TfidfTransformer()),
|
21 |
+
("clf", LogisticRegression(max_iter=1000, random_state=self.rng)),
|
22 |
+
],
|
23 |
+
memory=self.cache,
|
24 |
+
)
|
25 |
+
|
26 |
+
@property
|
27 |
+
def pipeline(self) -> Pipeline:
|
28 |
+
return self._pipeline
|
29 |
+
|
30 |
+
@property
|
31 |
+
def description(self) -> str:
|
32 |
+
return "TF-IDF with Logistic Regression"
|
33 |
+
|
34 |
+
def _predict(self, text: str) -> int:
|
35 |
+
return self.pipeline.predict([text])[0]
|