SuperPunk2077 commited on
Commit
3eabd63
1 Parent(s): 26f8ecf

Create textToImage.py

Browse files
Files changed (1) hide show
  1. textToImage.py +77 -0
textToImage.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, requests, huggingface.dataset
2
+ import tensorflow as tf
3
+
4
+ PAGES = ['xing', 'pacific', 'gsc', 'rrc']
5
+
6
+ PAGE_SIZE = 100
7
+ WEIGHTS_PATH = os.path.expanduser('~/.huggingface')
8
+ WEIGHTS_PATH += '/model.h5'
9
+ FLATTENED_PATH = WEIGHTS_PATH + '/flattened'
10
+ BELIEF_PATH = WEIGHTS_PATH + '/belief'
11
+ TRAIN_DIR = os.path.expanduser('~/.huggingface')
12
+ + '/models/nlp-train'
13
+ DEMO_DIR = os.path.expanduser('~/.huggingface')
14
+ + '/models/nlp-demo'
15
+
16
+ PAGE_DIRS = []
17
+ for page in PAGES:
18
+ filename = page.replace('-', '_')
19
+ PAGE_DIRS.append(os.path.expanduser('~/.huggingface') + '/text/{0}/pages'.format(filename))
20
+
21
+ Y_TO_X = {}
22
+ def add_page(page, dirs):
23
+ page_dir = dirs[page]
24
+ assert page_dir is not None
25
+ train_page, test_page, train_test_ratio, _, _ = huggingface.dataset.read_page(page, page_dir, page_size=PAGE_SIZE)
26
+ assert train_page is not None
27
+ assert test_page is not None
28
+ if train_test_ratio == 0.5:
29
+ assert train_page.shape == (PAGE_SIZE,)
30
+ assert test_page.shape == (PAGE_SIZE,)
31
+ else:
32
+ assert train_page.shape == (int(train_page.shape[0] * train_test_ratio),)
33
+ assert test_page.shape == (PAGE_SIZE - int(train_page.shape[0] * train_test_ratio),)
34
+ X = np.hstack([train_page, test_page])
35
+ if page in Y_TO_X:
36
+ Y = Y_TO_X[page]
37
+ else:
38
+ Y = list(huggingface.dataset.read_text(page).encode('utf8'))
39
+ Y_TO_X[page] = Y
40
+ return X, Y
41
+
42
+ add_page(xing, PAGE_DIRS)
43
+ add_page(pacific, PAGE_DIRS)
44
+ add_page(gsc, PAGE_DIRS)
45
+ add_page(rrc, PAGE_DIRS)
46
+
47
+ # load model
48
+ with tf.Session() as sess:
49
+ model = huggingface.model.load(sess, FLATTENED_PATH, PAGE_DIRS)
50
+ model.to(sess)
51
+
52
+ X, Y = np.array(list(map(add_page, PAGES))), []
53
+
54
+ for page in PAGES:
55
+ X, Y = np.array(list(map(add_page, PAGES))), list(Y_TO_X[page])
56
+
57
+ X = np.array(X)
58
+ / 255.0
59
+ Y = np.array(Y) / 255.0
60
+ X = np.reshape(X, (-1, 100, 200, 1))
61
+ Y = np.reshape(Y, (-1, 10))
62
+
63
+ model = tf.keras.models.Model(inputs=model.input, outputs=model.output)
64
+ model.compile(optimizer=tf.keras.optimizers.Adam(),
65
+ loss='categorical_crossentropy',
66
+ metrics=['accuracy'])
67
+ model.fit(X, Y, batch_size=100, nb_epoch=1000, verbose=2, validation_data=(X, Y))
68
+ model.save_weights(WEIGHTS_PATH)
69
+
70
+ BELIEF_PATH = WEIGHTS_PATH + '/belief'
71
+ model.compile(optimizer=tf.keras.optimizers.Adam(),
72
+ loss=tf.keras.losses.HuberLoss(delta=0.01))
73
+ model.load_weights(WEIGHTS_PATH)
74
+ model.fit(X, Y, batch_size=10, epochs=1)
75
+
76
+ NOTE : To know more about why to use Huber loss instead of Mean Square Error follow this link
77
+ Good Luck!