SuperPunk2077-text-to-image / textToImage.py
SuperPunk2077's picture
Create textToImage.py
3eabd63
import os, requests, huggingface.dataset
import tensorflow as tf
PAGES = ['xing', 'pacific', 'gsc', 'rrc']
PAGE_SIZE = 100
WEIGHTS_PATH = os.path.expanduser('~/.huggingface')
WEIGHTS_PATH += '/model.h5'
FLATTENED_PATH = WEIGHTS_PATH + '/flattened'
BELIEF_PATH = WEIGHTS_PATH + '/belief'
TRAIN_DIR = os.path.expanduser('~/.huggingface')
+ '/models/nlp-train'
DEMO_DIR = os.path.expanduser('~/.huggingface')
+ '/models/nlp-demo'
PAGE_DIRS = []
for page in PAGES:
filename = page.replace('-', '_')
PAGE_DIRS.append(os.path.expanduser('~/.huggingface') + '/text/{0}/pages'.format(filename))
Y_TO_X = {}
def add_page(page, dirs):
page_dir = dirs[page]
assert page_dir is not None
train_page, test_page, train_test_ratio, _, _ = huggingface.dataset.read_page(page, page_dir, page_size=PAGE_SIZE)
assert train_page is not None
assert test_page is not None
if train_test_ratio == 0.5:
assert train_page.shape == (PAGE_SIZE,)
assert test_page.shape == (PAGE_SIZE,)
else:
assert train_page.shape == (int(train_page.shape[0] * train_test_ratio),)
assert test_page.shape == (PAGE_SIZE - int(train_page.shape[0] * train_test_ratio),)
X = np.hstack([train_page, test_page])
if page in Y_TO_X:
Y = Y_TO_X[page]
else:
Y = list(huggingface.dataset.read_text(page).encode('utf8'))
Y_TO_X[page] = Y
return X, Y
add_page(xing, PAGE_DIRS)
add_page(pacific, PAGE_DIRS)
add_page(gsc, PAGE_DIRS)
add_page(rrc, PAGE_DIRS)
# load model
with tf.Session() as sess:
model = huggingface.model.load(sess, FLATTENED_PATH, PAGE_DIRS)
model.to(sess)
X, Y = np.array(list(map(add_page, PAGES))), []
for page in PAGES:
X, Y = np.array(list(map(add_page, PAGES))), list(Y_TO_X[page])
X = np.array(X)
/ 255.0
Y = np.array(Y) / 255.0
X = np.reshape(X, (-1, 100, 200, 1))
Y = np.reshape(Y, (-1, 10))
model = tf.keras.models.Model(inputs=model.input, outputs=model.output)
model.compile(optimizer=tf.keras.optimizers.Adam(),
loss='categorical_crossentropy',
metrics=['accuracy'])
model.fit(X, Y, batch_size=100, nb_epoch=1000, verbose=2, validation_data=(X, Y))
model.save_weights(WEIGHTS_PATH)
BELIEF_PATH = WEIGHTS_PATH + '/belief'
model.compile(optimizer=tf.keras.optimizers.Adam(),
loss=tf.keras.losses.HuberLoss(delta=0.01))
model.load_weights(WEIGHTS_PATH)
model.fit(X, Y, batch_size=10, epochs=1)
NOTE : To know more about why to use Huber loss instead of Mean Square Error follow this link
Good Luck!