File size: 425 Bytes
16d5400
 
 
 
 
 
 
 
 
 
 
 
e508a7d
16d5400
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from m_conf import *
from keras.preprocessing.text import Tokenizer
from gensim.models import Word2Vec

with open('train.txt', 'r') as file:
    lines = file.readlines()

tokenizer = Tokenizer()
tokenizer.fit_on_texts(lines)
sequences = tokenizer.texts_to_sequences(lines)
tokens = [[str(i) for i in seq] for seq in sequences]

model = Word2Vec(tokens, window=10, min_count=1, vector_size=emb_o_dim)

model.save("w2v.model")