import os, requests, huggingface.dataset
import tensorflow as tf

PAGES = ['xing', 'pacific', 'gsc', 'rrc']

PAGE_SIZE = 100
WEIGHTS_PATH = os.path.expanduser('~/.huggingface')
WEIGHTS_PATH += '/model.h5'
FLATTENED_PATH = WEIGHTS_PATH + '/flattened'
BELIEF_PATH = WEIGHTS_PATH + '/belief'
TRAIN_DIR = os.path.expanduser('~/.huggingface')
+ '/models/nlp-train'
DEMO_DIR = os.path.expanduser('~/.huggingface')
+ '/models/nlp-demo'

PAGE_DIRS = []
for page in PAGES:
    filename = page.replace('-', '_')
    PAGE_DIRS.append(os.path.expanduser('~/.huggingface') + '/text/{0}/pages'.format(filename))

Y_TO_X = {}
def add_page(page, dirs):
    page_dir = dirs[page]
    assert page_dir is not None
    train_page, test_page, train_test_ratio, _, _ = huggingface.dataset.read_page(page, page_dir, page_size=PAGE_SIZE)
    assert train_page is not None
    assert test_page is not None
    if train_test_ratio == 0.5:
        assert train_page.shape == (PAGE_SIZE,)
        assert test_page.shape == (PAGE_SIZE,)
    else:
        assert train_page.shape == (int(train_page.shape[0] * train_test_ratio),)
        assert test_page.shape == (PAGE_SIZE - int(train_page.shape[0] * train_test_ratio),)
    X = np.hstack([train_page, test_page])
    if page in Y_TO_X:
        Y = Y_TO_X[page]
    else:
        Y = list(huggingface.dataset.read_text(page).encode('utf8'))
        Y_TO_X[page] = Y
    return X, Y

add_page(xing, PAGE_DIRS)
add_page(pacific, PAGE_DIRS)
add_page(gsc, PAGE_DIRS)
add_page(rrc, PAGE_DIRS)

# load model
with tf.Session() as sess:
    model = huggingface.model.load(sess, FLATTENED_PATH, PAGE_DIRS)
    model.to(sess)

X, Y = np.array(list(map(add_page, PAGES))), []

for page in PAGES:
    X, Y = np.array(list(map(add_page, PAGES))), list(Y_TO_X[page])

X = np.array(X)
/ 255.0
Y = np.array(Y) / 255.0
X = np.reshape(X, (-1, 100, 200, 1))
Y = np.reshape(Y, (-1, 10))

model = tf.keras.models.Model(inputs=model.input, outputs=model.output)
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(X, Y, batch_size=100, nb_epoch=1000, verbose=2, validation_data=(X, Y))
model.save_weights(WEIGHTS_PATH)

BELIEF_PATH = WEIGHTS_PATH + '/belief'
model.compile(optimizer=tf.keras.optimizers.Adam(),
loss=tf.keras.losses.HuberLoss(delta=0.01))
model.load_weights(WEIGHTS_PATH)
model.fit(X, Y, batch_size=10, epochs=1)

NOTE : To know more about why to use Huber loss instead of Mean Square Error follow this link
Good Luck!