Spaces:
Runtime error
Runtime error
File size: 2,528 Bytes
3eabd63 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import os, requests, huggingface.dataset
import tensorflow as tf
PAGES = ['xing', 'pacific', 'gsc', 'rrc']
PAGE_SIZE = 100
WEIGHTS_PATH = os.path.expanduser('~/.huggingface')
WEIGHTS_PATH += '/model.h5'
FLATTENED_PATH = WEIGHTS_PATH + '/flattened'
BELIEF_PATH = WEIGHTS_PATH + '/belief'
TRAIN_DIR = os.path.expanduser('~/.huggingface')
+ '/models/nlp-train'
DEMO_DIR = os.path.expanduser('~/.huggingface')
+ '/models/nlp-demo'
PAGE_DIRS = []
for page in PAGES:
filename = page.replace('-', '_')
PAGE_DIRS.append(os.path.expanduser('~/.huggingface') + '/text/{0}/pages'.format(filename))
Y_TO_X = {}
def add_page(page, dirs):
page_dir = dirs[page]
assert page_dir is not None
train_page, test_page, train_test_ratio, _, _ = huggingface.dataset.read_page(page, page_dir, page_size=PAGE_SIZE)
assert train_page is not None
assert test_page is not None
if train_test_ratio == 0.5:
assert train_page.shape == (PAGE_SIZE,)
assert test_page.shape == (PAGE_SIZE,)
else:
assert train_page.shape == (int(train_page.shape[0] * train_test_ratio),)
assert test_page.shape == (PAGE_SIZE - int(train_page.shape[0] * train_test_ratio),)
X = np.hstack([train_page, test_page])
if page in Y_TO_X:
Y = Y_TO_X[page]
else:
Y = list(huggingface.dataset.read_text(page).encode('utf8'))
Y_TO_X[page] = Y
return X, Y
add_page(xing, PAGE_DIRS)
add_page(pacific, PAGE_DIRS)
add_page(gsc, PAGE_DIRS)
add_page(rrc, PAGE_DIRS)
# load model
with tf.Session() as sess:
model = huggingface.model.load(sess, FLATTENED_PATH, PAGE_DIRS)
model.to(sess)
X, Y = np.array(list(map(add_page, PAGES))), []
for page in PAGES:
X, Y = np.array(list(map(add_page, PAGES))), list(Y_TO_X[page])
X = np.array(X)
/ 255.0
Y = np.array(Y) / 255.0
X = np.reshape(X, (-1, 100, 200, 1))
Y = np.reshape(Y, (-1, 10))
model = tf.keras.models.Model(inputs=model.input, outputs=model.output)
model.compile(optimizer=tf.keras.optimizers.Adam(),
loss='categorical_crossentropy',
metrics=['accuracy'])
model.fit(X, Y, batch_size=100, nb_epoch=1000, verbose=2, validation_data=(X, Y))
model.save_weights(WEIGHTS_PATH)
BELIEF_PATH = WEIGHTS_PATH + '/belief'
model.compile(optimizer=tf.keras.optimizers.Adam(),
loss=tf.keras.losses.HuberLoss(delta=0.01))
model.load_weights(WEIGHTS_PATH)
model.fit(X, Y, batch_size=10, epochs=1)
NOTE : To know more about why to use Huber loss instead of Mean Square Error follow this link
Good Luck! |