Model added
Browse files- .gitattributes +2 -0
- README.md +43 -0
- rmh.w2v.model +3 -0
- rmh.w2v.model.syn1neg.npy +3 -0
- rmh.w2v.model.wv.vectors.npy +3 -0
.gitattributes
CHANGED
@@ -25,3 +25,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
25 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
26 |
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
25 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
26 |
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
rmh.w2v.model.syn1neg.npy filter=lfs diff=lfs merge=lfs -text
|
29 |
+
rmh.w2v.model.wv.vectors.npy filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# word2vec model trained on Icelandic
|
2 |
+
|
3 |
+
This model is trained on the lemmas of the Icelandic Gigaword Corpus version 20.05. It is trained using the gensim package, version 4.1.0. and parameters were set to default (100 dimensions, windows size 5)
|
4 |
+
|
5 |
+
This model can not be loaded directly since it uses gensim, clone the repository and run the following to use it.
|
6 |
+
|
7 |
+
```python
|
8 |
+
import gensim
|
9 |
+
model = gensim.models.Word2Vec.load("./rmh.w2v.model")
|
10 |
+
```
|
11 |
+
|
12 |
+
## Example output
|
13 |
+
|
14 |
+
```bash
|
15 |
+
In [6]: model.wv.most_similar("england")
|
16 |
+
Out[6]:
|
17 |
+
[('wales', 0.8113704323768616),
|
18 |
+
('skotland', 0.7611601948738098),
|
19 |
+
('bretlandseyjar', 0.7280426621437073),
|
20 |
+
('gateshead', 0.6975484490394592),
|
21 |
+
('ástralía', 0.6963852047920227),
|
22 |
+
('eastbourne', 0.6939234137535095),
|
23 |
+
('englandi', 0.6908402442932129),
|
24 |
+
('bath', 0.6849308013916016),
|
25 |
+
('lynndie', 0.6826340556144714),
|
26 |
+
('glasgow', 0.6815919876098633)]
|
27 |
+
|
28 |
+
In [7]: model.wv.most_similar("ísland")
|
29 |
+
Out[7]:
|
30 |
+
[('norðurlönd', 0.6843729615211487),
|
31 |
+
('land', 0.6696498990058899),
|
32 |
+
('íslendingur', 0.6645756959915161),
|
33 |
+
('íslenskur', 0.6627770662307739),
|
34 |
+
('hérlendis', 0.6609933376312256),
|
35 |
+
('íslandi', 0.6514216661453247),
|
36 |
+
('evrópa', 0.6289927959442139),
|
37 |
+
('fróðskaparsetur', 0.6046777367591858),
|
38 |
+
('evrópuland', 0.5911464095115662),
|
39 |
+
('bandaríkin', 0.5906434655189514)]
|
40 |
+
|
41 |
+
```
|
42 |
+
|
43 |
+
|
rmh.w2v.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3b0e9c7653c8eb4ab7a6bf662b6b25db878e520c8829c265d5b9fca072a8d5f
|
3 |
+
size 22182144
|
rmh.w2v.model.syn1neg.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b0050b23d5fd9e44c2b0a8b91584adbd6aa412b6ef7e33df7b7dd08a3ebd8cb
|
3 |
+
size 254173328
|
rmh.w2v.model.wv.vectors.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ad46c1a37641fdc6dafc536a8668e318aafa46abe3a8a98c2b69240b073c002
|
3 |
+
size 254173328
|