Spaces:
Runtime error
Runtime error
import os, requests, huggingface.dataset | |
import tensorflow as tf | |
PAGES = ['xing', 'pacific', 'gsc', 'rrc'] | |
PAGE_SIZE = 100 | |
WEIGHTS_PATH = os.path.expanduser('~/.huggingface') | |
WEIGHTS_PATH += '/model.h5' | |
FLATTENED_PATH = WEIGHTS_PATH + '/flattened' | |
BELIEF_PATH = WEIGHTS_PATH + '/belief' | |
TRAIN_DIR = os.path.expanduser('~/.huggingface') | |
+ '/models/nlp-train' | |
DEMO_DIR = os.path.expanduser('~/.huggingface') | |
+ '/models/nlp-demo' | |
PAGE_DIRS = [] | |
for page in PAGES: | |
filename = page.replace('-', '_') | |
PAGE_DIRS.append(os.path.expanduser('~/.huggingface') + '/text/{0}/pages'.format(filename)) | |
Y_TO_X = {} | |
def add_page(page, dirs): | |
page_dir = dirs[page] | |
assert page_dir is not None | |
train_page, test_page, train_test_ratio, _, _ = huggingface.dataset.read_page(page, page_dir, page_size=PAGE_SIZE) | |
assert train_page is not None | |
assert test_page is not None | |
if train_test_ratio == 0.5: | |
assert train_page.shape == (PAGE_SIZE,) | |
assert test_page.shape == (PAGE_SIZE,) | |
else: | |
assert train_page.shape == (int(train_page.shape[0] * train_test_ratio),) | |
assert test_page.shape == (PAGE_SIZE - int(train_page.shape[0] * train_test_ratio),) | |
X = np.hstack([train_page, test_page]) | |
if page in Y_TO_X: | |
Y = Y_TO_X[page] | |
else: | |
Y = list(huggingface.dataset.read_text(page).encode('utf8')) | |
Y_TO_X[page] = Y | |
return X, Y | |
add_page(xing, PAGE_DIRS) | |
add_page(pacific, PAGE_DIRS) | |
add_page(gsc, PAGE_DIRS) | |
add_page(rrc, PAGE_DIRS) | |
# load model | |
with tf.Session() as sess: | |
model = huggingface.model.load(sess, FLATTENED_PATH, PAGE_DIRS) | |
model.to(sess) | |
X, Y = np.array(list(map(add_page, PAGES))), [] | |
for page in PAGES: | |
X, Y = np.array(list(map(add_page, PAGES))), list(Y_TO_X[page]) | |
X = np.array(X) | |
/ 255.0 | |
Y = np.array(Y) / 255.0 | |
X = np.reshape(X, (-1, 100, 200, 1)) | |
Y = np.reshape(Y, (-1, 10)) | |
model = tf.keras.models.Model(inputs=model.input, outputs=model.output) | |
model.compile(optimizer=tf.keras.optimizers.Adam(), | |
loss='categorical_crossentropy', | |
metrics=['accuracy']) | |
model.fit(X, Y, batch_size=100, nb_epoch=1000, verbose=2, validation_data=(X, Y)) | |
model.save_weights(WEIGHTS_PATH) | |
BELIEF_PATH = WEIGHTS_PATH + '/belief' | |
model.compile(optimizer=tf.keras.optimizers.Adam(), | |
loss=tf.keras.losses.HuberLoss(delta=0.01)) | |
model.load_weights(WEIGHTS_PATH) | |
model.fit(X, Y, batch_size=10, epochs=1) | |
NOTE : To know more about why to use Huber loss instead of Mean Square Error follow this link | |
Good Luck! |