Spaces:

microhum
/

ThaiVecFont

Sleeping

App Files Files Community

microhum commited on Jun 2, 2024

Commit

b762e56

1 Parent(s): 7001a19

add source code into folder

Browse files

Files changed (31) hide show

DockerFile +1 -1
__init__.py → ThaiVecFont/__init__.py +0 -0
{data_utils → ThaiVecFont/data_utils}/__init__.py +0 -0
{data_utils → ThaiVecFont/data_utils}/augment.py +111 -111
{data_utils → ThaiVecFont/data_utils}/common_utils.py +75 -75
{data_utils → ThaiVecFont/data_utils}/convert_ttf_to_sfd.py +102 -102
{data_utils → ThaiVecFont/data_utils}/relax_rep.py +135 -135
{data_utils → ThaiVecFont/data_utils}/svg_utils.py +1082 -1082
{data_utils → ThaiVecFont/data_utils}/svg_utils_backup.py +1174 -1174
{data_utils → ThaiVecFont/data_utils}/write_data_to_dirs.py +230 -230
{data_utils → ThaiVecFont/data_utils}/write_glyph_imgs.py +180 -180
dataloader.py → ThaiVecFont/dataloader.py +67 -67
{font_sample → ThaiVecFont/font_sample}/Athiti-Regular.ttf +0 -0
{font_sample → ThaiVecFont/font_sample}/SaoChingcha-Bold.otf +0 -0
{font_sample → ThaiVecFont/font_sample}/SaoChingcha-Light.otf +0 -0
{font_sample → ThaiVecFont/font_sample}/SaoChingcha-Regular.otf +0 -0
generate.py → ThaiVecFont/generate.py +143 -143
{inference_model → ThaiVecFont/inference_model}/950_49452.ckpt +0 -0
{models → ThaiVecFont/models}/__init__.py +0 -0
{models → ThaiVecFont/models}/image_decoder.py +48 -48
{models → ThaiVecFont/models}/image_encoder.py +42 -42
{models → ThaiVecFont/models}/modality_fusion.py +64 -64
{models → ThaiVecFont/models}/model_main.py +211 -211
{models → ThaiVecFont/models}/pos_enc.py +20 -20
{models → ThaiVecFont/models}/transformers.py +710 -710
{models → ThaiVecFont/models}/util_funcs.py +95 -95
{models → ThaiVecFont/models}/vgg_perceptual_loss.py +68 -68
options.py → ThaiVecFont/options.py +67 -67
test.py → ThaiVecFont/test.py +59 -59
test_few_shot.py → ThaiVecFont/test_few_shot.py +163 -163
train.py → ThaiVecFont/train.py +216 -216

DockerFile CHANGED Viewed

@@ -10,6 +10,6 @@ RUN xargs apt-get install -y <packages.txt
 COPY /home/usr/lib/python3/dist-packages/fontforge.cpython-310-x86_64-linux-gnu.so /home/usr/lib/python3.10/dist-packages/
-COPY . .
 CMD ["streamlit", "app.py", "--server.port", "7860"]

 COPY /home/usr/lib/python3/dist-packages/fontforge.cpython-310-x86_64-linux-gnu.so /home/usr/lib/python3.10/dist-packages/
+COPY . /code/
 CMD ["streamlit", "app.py", "--server.port", "7860"]

__init__.py → ThaiVecFont/__init__.py RENAMED Viewed

File without changes

{data_utils → ThaiVecFont/data_utils}/__init__.py RENAMED Viewed

File without changes

{data_utils → ThaiVecFont/data_utils}/augment.py RENAMED Viewed

@@ -1,112 +1,112 @@
-import argparse
-import multiprocessing as mp
-import os
-import numpy as np
-import math
-import cairosvg
-import shutil
-from data_utils.svg_utils import clockwise, render
-from common_utils import affine_shear, affine_rotate, affine_scale, trans2_white_bg
-def render_svg(svg_str, font_dir, char_idx, aug_idx, img_size):
-    svg_html = render(svg_str)
-    svg_path = open(f'{font_dir}/aug_svgs/{str(char_idx)}.svg', 'w')
-    svg_path.write(svg_html)
-    svg_path.close()
-    cairosvg.svg2png(url=f'{font_dir}/aug_svgs/{str(char_idx)}.svg',
-                        write_to=f'{font_dir}/aug_imgs/{str(char_idx)}_{aug_idx}.png', output_width=img_size, output_height=img_size)
-    img_arr = trans2_white_bg(f'{font_dir}/aug_imgs/{str(char_idx)}_{aug_idx}.png')
-    return img_arr
-def aug_rules(char_seq, aug_idx):
-    if aug_idx == 0:
-        return clockwise(affine_shear(char_seq, dx=0.2))['sequence']
-    elif aug_idx == 1:
-        return clockwise(affine_shear(char_seq, dy=-0.1))['sequence']
-    elif aug_idx == 2:
-        return clockwise(affine_scale(char_seq, 0.8))['sequence']
-    elif aug_idx == 3:
-        return clockwise(affine_rotate(char_seq, theta=5))['sequence']
-    else:
-        return clockwise(affine_rotate(char_seq, theta=-5))['sequence']
-def copy_others(dir_src, dir_tgt):
-    for item in ['class.npy', 'font_id.npy', 'seq_len.npy']:
-        shutil.copy(f'{dir_src}/{item}', f'{dir_tgt}/{item}')
-def apply_aug(opts):
-    """
-    applying data augmentation for Chinese fonts
-    """
-    data_path = os.path.join(opts.output_path, opts.language, opts.split)
-    font_dirs_ = os.listdir(data_path)
-    font_dirs = []
-    for idx in range(len(font_dirs_)):
-        if '_' not in font_dirs_[idx].split('/')[-1]:
-            font_dirs.append(font_dirs_[idx])
-    font_dirs.sort()
-    num_fonts = len(font_dirs)
-    print(f"Number {opts.split} fonts before processing", num_fonts)
-    num_processes = mp.cpu_count() - 2
-    fonts_per_process = num_fonts // num_processes + 1
-    def process(process_id):
-        for i in range(process_id * fonts_per_process, (process_id + 1) * fonts_per_process):
-            if i >= num_fonts:
-                break
-            font_dir = os.path.join(data_path, font_dirs[i])
-            font_seq = np.load(os.path.join(font_dir, 'sequence.npy')).reshape(opts.n_chars, opts.max_len, -1)
-            ret_seq_list = []
-            ret_img_list = []
-            for k in range(opts.n_aug):
-                os.makedirs(font_dir + '_' + str(k), exist_ok=True)
-                ret_seq_list.append([])
-                ret_img_list.append([])
-            os.makedirs(f'{font_dir}/aug_svgs', exist_ok=True)
-            os.makedirs(f'{font_dir}/aug_imgs', exist_ok=True)
-            for j in range(opts.n_chars):
-                char_seq = font_seq[j] # default as [71, 12]
-                for k in range(opts.n_aug):
-                    char_seq_aug = aug_rules(char_seq, k)
-                    ret_seq_list[k].append(char_seq_aug)
-                    img_arr = render_svg(char_seq_aug, font_dir, j, aug_idx=k, img_size=opts.img_size)
-                    ret_img_list[k].append(img_arr)
-            for k in range(opts.n_aug):
-                ret_seq_list[k] = np.array(ret_seq_list[k]).reshape(opts.n_chars, opts.max_len * 10)
-                ret_img_list[k] = np.array(ret_img_list[k]).reshape(opts.n_chars, opts.img_size, opts.img_size)
-                np.save(os.path.join(font_dir + '_' + str(k), f'sequence.npy'), ret_seq_list[k])
-                np.save(os.path.join(font_dir + '_' + str(k), f'rendered_{opts.img_size}.npy'), ret_img_list[k])
-                copy_others(font_dir, font_dir + '_' + str(k))
-    processes = [mp.Process(target=process, args=[pid]) for pid in range(num_processes)]
-    for p in processes:
-        p.start()
-    for p in processes:
-        p.join()
-def main():
-    parser = argparse.ArgumentParser(description="relax representation")
-    parser.add_argument("--language", type=str, default='eng', choices=['eng', 'chn', 'tha'])
-    parser.add_argument("--output_path", type=str, default='../data/vecfont_dataset_/', help="Path to write the database to")
-    parser.add_argument('--max_len', type=int, default=71, help="by default, 51 for english and 71 for chinese")
-    parser.add_argument('--n_aug', type=int, default=5, help="for each font, augment it for n_aug times")
-    parser.add_argument('--n_chars', type=int, default=52)
-    parser.add_argument('--img_size', type=int, default=64, help="the height and width of glyph images")
-    parser.add_argument("--split", type=str, default='train')
-    parser.add_argument('--debug', type=bool, default=True)
-    opts = parser.parse_args()
-    apply_aug(opts)
-if __name__ == "__main__":
-    main()

+import argparse
+import multiprocessing as mp
+import os
+import numpy as np
+import math
+import cairosvg
+import shutil
+from data_utils.svg_utils import clockwise, render
+from common_utils import affine_shear, affine_rotate, affine_scale, trans2_white_bg
+def render_svg(svg_str, font_dir, char_idx, aug_idx, img_size):
+    svg_html = render(svg_str)
+    svg_path = open(f'{font_dir}/aug_svgs/{str(char_idx)}.svg', 'w')
+    svg_path.write(svg_html)
+    svg_path.close()
+    cairosvg.svg2png(url=f'{font_dir}/aug_svgs/{str(char_idx)}.svg',
+                        write_to=f'{font_dir}/aug_imgs/{str(char_idx)}_{aug_idx}.png', output_width=img_size, output_height=img_size)
+    img_arr = trans2_white_bg(f'{font_dir}/aug_imgs/{str(char_idx)}_{aug_idx}.png')
+    return img_arr
+def aug_rules(char_seq, aug_idx):
+    if aug_idx == 0:
+        return clockwise(affine_shear(char_seq, dx=0.2))['sequence']
+    elif aug_idx == 1:
+        return clockwise(affine_shear(char_seq, dy=-0.1))['sequence']
+    elif aug_idx == 2:
+        return clockwise(affine_scale(char_seq, 0.8))['sequence']
+    elif aug_idx == 3:
+        return clockwise(affine_rotate(char_seq, theta=5))['sequence']
+    else:
+        return clockwise(affine_rotate(char_seq, theta=-5))['sequence']
+def copy_others(dir_src, dir_tgt):
+    for item in ['class.npy', 'font_id.npy', 'seq_len.npy']:
+        shutil.copy(f'{dir_src}/{item}', f'{dir_tgt}/{item}')
+def apply_aug(opts):
+    """
+    applying data augmentation for Chinese fonts
+    """
+    data_path = os.path.join(opts.output_path, opts.language, opts.split)
+    font_dirs_ = os.listdir(data_path)
+    font_dirs = []
+    for idx in range(len(font_dirs_)):
+        if '_' not in font_dirs_[idx].split('/')[-1]:
+            font_dirs.append(font_dirs_[idx])
+    font_dirs.sort()
+    num_fonts = len(font_dirs)
+    print(f"Number {opts.split} fonts before processing", num_fonts)
+    num_processes = mp.cpu_count() - 2
+    fonts_per_process = num_fonts // num_processes + 1
+    def process(process_id):
+        for i in range(process_id * fonts_per_process, (process_id + 1) * fonts_per_process):
+            if i >= num_fonts:
+                break
+            font_dir = os.path.join(data_path, font_dirs[i])
+            font_seq = np.load(os.path.join(font_dir, 'sequence.npy')).reshape(opts.n_chars, opts.max_len, -1)
+            ret_seq_list = []
+            ret_img_list = []
+            for k in range(opts.n_aug):
+                os.makedirs(font_dir + '_' + str(k), exist_ok=True)
+                ret_seq_list.append([])
+                ret_img_list.append([])
+            os.makedirs(f'{font_dir}/aug_svgs', exist_ok=True)
+            os.makedirs(f'{font_dir}/aug_imgs', exist_ok=True)
+            for j in range(opts.n_chars):
+                char_seq = font_seq[j] # default as [71, 12]
+                for k in range(opts.n_aug):
+                    char_seq_aug = aug_rules(char_seq, k)
+                    ret_seq_list[k].append(char_seq_aug)
+                    img_arr = render_svg(char_seq_aug, font_dir, j, aug_idx=k, img_size=opts.img_size)
+                    ret_img_list[k].append(img_arr)
+            for k in range(opts.n_aug):
+                ret_seq_list[k] = np.array(ret_seq_list[k]).reshape(opts.n_chars, opts.max_len * 10)
+                ret_img_list[k] = np.array(ret_img_list[k]).reshape(opts.n_chars, opts.img_size, opts.img_size)
+                np.save(os.path.join(font_dir + '_' + str(k), f'sequence.npy'), ret_seq_list[k])
+                np.save(os.path.join(font_dir + '_' + str(k), f'rendered_{opts.img_size}.npy'), ret_img_list[k])
+                copy_others(font_dir, font_dir + '_' + str(k))
+    processes = [mp.Process(target=process, args=[pid]) for pid in range(num_processes)]
+    for p in processes:
+        p.start()
+    for p in processes:
+        p.join()
+def main():
+    parser = argparse.ArgumentParser(description="relax representation")
+    parser.add_argument("--language", type=str, default='eng', choices=['eng', 'chn', 'tha'])
+    parser.add_argument("--output_path", type=str, default='../data/vecfont_dataset_/', help="Path to write the database to")
+    parser.add_argument('--max_len', type=int, default=71, help="by default, 51 for english and 71 for chinese")
+    parser.add_argument('--n_aug', type=int, default=5, help="for each font, augment it for n_aug times")
+    parser.add_argument('--n_chars', type=int, default=52)
+    parser.add_argument('--img_size', type=int, default=64, help="the height and width of glyph images")
+    parser.add_argument("--split", type=str, default='train')
+    parser.add_argument('--debug', type=bool, default=True)
+    opts = parser.parse_args()
+    apply_aug(opts)
+if __name__ == "__main__":
+    main()

{data_utils → ThaiVecFont/data_utils}/common_utils.py RENAMED Viewed

@@ -1,75 +1,75 @@
-import math
-import numpy as np
-from PIL import Image
-def trans2_white_bg(img_path):
-    img = Image.open(img_path)
-    img_arr = 255 - np.array(img)[:, :, 3]
-    img_ = Image.fromarray(img_arr)
-    img_.save(img_path)
-    return img_arr
-def affine_shear(seq, dx=-0.3, dy=0.0):
-    mask = ~(seq == 0)
-    seq_12 = seq.copy()
-    seq_12[:,4] -= 12.0
-    seq_12[:,5]  = -seq_12[:,5] + 12
-    seq_12[:,6] -= 12.0
-    seq_12[:,7]  = -seq_12[:,7] + 12
-    seq_12[:,8] -= 12.0
-    seq_12[:,9]  = -seq_12[:,9] + 12
-    seq_args = seq_12[:,4:]
-    seq_args = np.concatenate([seq_args[:, :2], seq_args[:, 2:4], seq_args[:, 4:6]], 0).transpose(1,0)
-    affine_matrix=np.array([[1, dx],
-                            [dy, 1]])
-    rotated_args = np.dot(affine_matrix,seq_args)
-    rotated_args = rotated_args.transpose(1,0)
-    new_args = np.concatenate([rotated_args[:seq.shape[0]], rotated_args[seq.shape[0]:seq.shape[0]*2], rotated_args[seq.shape[0]*2:]],-1)
-    new_args[:,0] += 12.0
-    new_args[:,1] = -(new_args[:,1] - 12)
-    new_args[:,2] += 12.0
-    new_args[:,3] = -(new_args[:,3] - 12)
-    new_args[:,4] += 12.0
-    new_args[:,5] = -(new_args[:,5] - 12)
-    new_seq = np.concatenate([seq[:, :4], new_args],1)
-    new_seq = new_seq * mask
-    return new_seq
-def affine_scale(seq, scale=0.8):
-    mask = ~(seq==0)
-    seq_args = seq[:, 4:] - 12.0
-    seq_args *= scale
-    seq_args = seq_args + 12.0
-    new_seq = np.concatenate([seq[:, :4], seq_args], 1)
-    new_seq = new_seq * mask
-    return new_seq
-def affine_rotate(seq,theta=-5):
-    mask = ~(seq==0)
-    seq_12 = seq.copy()
-    seq_12[:,4] -=12.0
-    seq_12[:,5]  = -seq_12[:,5] + 12
-    seq_12[:,6] -=12.0
-    seq_12[:,7]  = -seq_12[:,7] + 12
-    seq_12[:,8] -=12.0
-    seq_12[:,9]  = -seq_12[:,9] + 12
-    seq_args =seq_12[:, 4:] # default as [71,6]
-    seq_args = np.concatenate([seq_args[:,:2],seq_args[:,2:4],seq_args[:,4:6]],0).transpose(1,0)# note 2,213
-    theta = math.radians(theta)
-    affine_matrix=np.array([[np.cos(theta),-np.sin(theta)], [np.sin(theta), np.cos(theta)]])# note 2,2
-    rotated_args = np.dot(affine_matrix,seq_args)# note 2,213
-    rotated_args = rotated_args.transpose(1,0)# note 213,2
-    new_args = np.concatenate([rotated_args[:seq.shape[0]],rotated_args[seq.shape[0]:seq.shape[0]*2],rotated_args[seq.shape[0]*2:]],-1)# note 2,213
-    new_args[:,0] +=12.0
-    new_args[:,1] = -(new_args[:,1]-12)
-    new_args[:,2] +=12.0
-    new_args[:,3] = -(new_args[:,3]-12)
-    new_args[:,4] +=12.0
-    new_args[:,5] = -(new_args[:,5]-12)
-    new_seq = np.concatenate([seq[:,:4],new_args],1)
-    new_seq =new_seq *mask
-    return new_seq

+import math
+import numpy as np
+from PIL import Image
+def trans2_white_bg(img_path):
+    img = Image.open(img_path)
+    img_arr = 255 - np.array(img)[:, :, 3]
+    img_ = Image.fromarray(img_arr)
+    img_.save(img_path)
+    return img_arr
+def affine_shear(seq, dx=-0.3, dy=0.0):
+    mask = ~(seq == 0)
+    seq_12 = seq.copy()
+    seq_12[:,4] -= 12.0
+    seq_12[:,5]  = -seq_12[:,5] + 12
+    seq_12[:,6] -= 12.0
+    seq_12[:,7]  = -seq_12[:,7] + 12
+    seq_12[:,8] -= 12.0
+    seq_12[:,9]  = -seq_12[:,9] + 12
+    seq_args = seq_12[:,4:]
+    seq_args = np.concatenate([seq_args[:, :2], seq_args[:, 2:4], seq_args[:, 4:6]], 0).transpose(1,0)
+    affine_matrix=np.array([[1, dx],
+                            [dy, 1]])
+    rotated_args = np.dot(affine_matrix,seq_args)
+    rotated_args = rotated_args.transpose(1,0)
+    new_args = np.concatenate([rotated_args[:seq.shape[0]], rotated_args[seq.shape[0]:seq.shape[0]*2], rotated_args[seq.shape[0]*2:]],-1)
+    new_args[:,0] += 12.0
+    new_args[:,1] = -(new_args[:,1] - 12)
+    new_args[:,2] += 12.0
+    new_args[:,3] = -(new_args[:,3] - 12)
+    new_args[:,4] += 12.0
+    new_args[:,5] = -(new_args[:,5] - 12)
+    new_seq = np.concatenate([seq[:, :4], new_args],1)
+    new_seq = new_seq * mask
+    return new_seq
+def affine_scale(seq, scale=0.8):
+    mask = ~(seq==0)
+    seq_args = seq[:, 4:] - 12.0
+    seq_args *= scale
+    seq_args = seq_args + 12.0
+    new_seq = np.concatenate([seq[:, :4], seq_args], 1)
+    new_seq = new_seq * mask
+    return new_seq
+def affine_rotate(seq,theta=-5):
+    mask = ~(seq==0)
+    seq_12 = seq.copy()
+    seq_12[:,4] -=12.0
+    seq_12[:,5]  = -seq_12[:,5] + 12
+    seq_12[:,6] -=12.0
+    seq_12[:,7]  = -seq_12[:,7] + 12
+    seq_12[:,8] -=12.0
+    seq_12[:,9]  = -seq_12[:,9] + 12
+    seq_args =seq_12[:, 4:] # default as [71,6]
+    seq_args = np.concatenate([seq_args[:,:2],seq_args[:,2:4],seq_args[:,4:6]],0).transpose(1,0)# note 2,213
+    theta = math.radians(theta)
+    affine_matrix=np.array([[np.cos(theta),-np.sin(theta)], [np.sin(theta), np.cos(theta)]])# note 2,2
+    rotated_args = np.dot(affine_matrix,seq_args)# note 2,213
+    rotated_args = rotated_args.transpose(1,0)# note 213,2
+    new_args = np.concatenate([rotated_args[:seq.shape[0]],rotated_args[seq.shape[0]:seq.shape[0]*2],rotated_args[seq.shape[0]*2:]],-1)# note 2,213
+    new_args[:,0] +=12.0
+    new_args[:,1] = -(new_args[:,1]-12)
+    new_args[:,2] +=12.0
+    new_args[:,3] = -(new_args[:,3]-12)
+    new_args[:,4] +=12.0
+    new_args[:,5] = -(new_args[:,5]-12)
+    new_seq = np.concatenate([seq[:,:4],new_args],1)
+    new_seq =new_seq *mask
+    return new_seq

{data_utils → ThaiVecFont/data_utils}/convert_ttf_to_sfd.py RENAMED Viewed

@@ -1,103 +1,103 @@
-import fontforge  # noqa
-import os
-import sys
-from tqdm import tqdm
-import multiprocessing as mp
-import argparse
-def convert_mp(opts):
-    """Useing multiprocessing to convert all fonts to sfd files"""
-    charset_th = open(f"{opts.data_path}/char_set/{opts.language}.txt", 'r').read()
-    charset = charset_th
-    if opts.ref_nshot == 52:
-        charset_eng = open(f"{opts.data_path}/char_set/eng.txt", 'r').read()
-        charset = charset_th + charset_eng
-    charset_lenw = len(str(len(charset)))
-    fonts_file_path = os.path.join(opts.ttf_path, opts.language) # opts.ttf_path,opts.language,
-    sfd_path = os.path.join(opts.sfd_path, opts.language)
-    print(os.path.join(fonts_file_path, opts.split))
-    for root, dirs, files in os.walk(os.path.join(fonts_file_path, opts.split)):
-        ttf_fnames = files
-        print(ttf_fnames)
-    font_num = len(ttf_fnames)
-    process_num = mp.cpu_count() - 1
-    font_num_per_process = font_num // process_num + 1
-    def process(process_id, font_num_p_process):
-        for i in tqdm(range(process_id * font_num_p_process, (process_id + 1) * font_num_p_process)):
-            if i >= font_num:
-                break
-            font_id = ttf_fnames[i].split('.')[0]
-            split = opts.split
-            font_name = ttf_fnames[i]
-            font_file_path = os.path.join(fonts_file_path, split, font_name)
-            try:
-                cur_font = fontforge.open(font_file_path)
-            except Exception as e:
-                print('Cannot open ', font_name)
-                print(e)
-                continue
-            target_dir = os.path.join(sfd_path, split, "{}".format(font_id))
-            if not os.path.exists(target_dir):
-                os.makedirs(target_dir)
-            for char_id, char in enumerate(charset):
-              try:
-                char_description = open(os.path.join(target_dir, '{}_{num:0{width}}.txt'.format(font_id, num=char_id, width=charset_lenw)), 'w')
-                if char in charset_th:
-                    char = 'uni' + char.encode("unicode_escape")[2:].decode("utf-8")
-                cur_font.selection.select(char)
-                cur_font.copy()
-                new_font_for_char = fontforge.font()
-                # new_font_for_char.ascent = 750
-                # new_font_for_char.descent = 250
-                # new_font_for_char.em = new_font_for_char.ascent + new_font_for_char.descent
-                char = 'A'
-                new_font_for_char.selection.select(char)
-                new_font_for_char.paste()
-                new_font_for_char.fontname = "{}_".format(font_id) + font_name
-                new_font_for_char.save(os.path.join(target_dir, '{}_{num:0{width}}.sfd'.format(font_id, num=char_id, width=charset_lenw)))
-                char_description.write(str(ord(char)) + '\n')
-                char_description.write(str(new_font_for_char[char].width) + '\n')
-                char_description.write(str(new_font_for_char[char].vwidth) + '\n')
-                char_description.write('{num:0{width}}'.format(num=char_id, width=charset_lenw) + '\n')
-                char_description.write('{}'.format(font_id))
-                # print('{}_{num:0{width}}.sfd'.format(font_id, num=char_id, width=charset_lenw))
-                char_description.close()
-              except Exception as e:
-                print("Found Error:", font_id, font_name ,char_id, char)
-                print(e)
-            cur_font.close()
-    processes = [mp.Process(target=process, args=(pid, font_num_per_process)) for pid in range(process_num)]
-    for p in processes:
-        p.start()
-    for p in processes:
-        p.join()
-def main():
-    parser = argparse.ArgumentParser(description="Convert ttf fonts to sfd fonts")
-    parser.add_argument("--language", type=str, default='eng', choices=['eng', 'chn', 'tha'])
-    parser.add_argument("--data_path", type=str, default='./Font_Dataset', help="Path to Dataset")
-    parser.add_argument("--ttf_path", type=str, default='../data/font_ttfs')
-    parser.add_argument('--sfd_path', type=str, default='../data/font_sfds')
-    parser.add_argument('--split', type=str, default='train')
-    opts = parser.parse_args()
-    convert_mp(opts)
-if __name__ == "__main__":
     main()

+import fontforge  # noqa
+import os
+import sys
+from tqdm import tqdm
+import multiprocessing as mp
+import argparse
+def convert_mp(opts):
+    """Useing multiprocessing to convert all fonts to sfd files"""
+    charset_th = open(f"{opts.data_path}/char_set/{opts.language}.txt", 'r').read()
+    charset = charset_th
+    if opts.ref_nshot == 52:
+        charset_eng = open(f"{opts.data_path}/char_set/eng.txt", 'r').read()
+        charset = charset_th + charset_eng
+    charset_lenw = len(str(len(charset)))
+    fonts_file_path = os.path.join(opts.ttf_path, opts.language) # opts.ttf_path,opts.language,
+    sfd_path = os.path.join(opts.sfd_path, opts.language)
+    print(os.path.join(fonts_file_path, opts.split))
+    for root, dirs, files in os.walk(os.path.join(fonts_file_path, opts.split)):
+        ttf_fnames = files
+        print(ttf_fnames)
+    font_num = len(ttf_fnames)
+    process_num = mp.cpu_count() - 1
+    font_num_per_process = font_num // process_num + 1
+    def process(process_id, font_num_p_process):
+        for i in tqdm(range(process_id * font_num_p_process, (process_id + 1) * font_num_p_process)):
+            if i >= font_num:
+                break
+            font_id = ttf_fnames[i].split('.')[0]
+            split = opts.split
+            font_name = ttf_fnames[i]
+            font_file_path = os.path.join(fonts_file_path, split, font_name)
+            try:
+                cur_font = fontforge.open(font_file_path)
+            except Exception as e:
+                print('Cannot open ', font_name)
+                print(e)
+                continue
+            target_dir = os.path.join(sfd_path, split, "{}".format(font_id))
+            if not os.path.exists(target_dir):
+                os.makedirs(target_dir)
+            for char_id, char in enumerate(charset):
+              try:
+                char_description = open(os.path.join(target_dir, '{}_{num:0{width}}.txt'.format(font_id, num=char_id, width=charset_lenw)), 'w')
+                if char in charset_th:
+                    char = 'uni' + char.encode("unicode_escape")[2:].decode("utf-8")
+                cur_font.selection.select(char)
+                cur_font.copy()
+                new_font_for_char = fontforge.font()
+                # new_font_for_char.ascent = 750
+                # new_font_for_char.descent = 250
+                # new_font_for_char.em = new_font_for_char.ascent + new_font_for_char.descent
+                char = 'A'
+                new_font_for_char.selection.select(char)
+                new_font_for_char.paste()
+                new_font_for_char.fontname = "{}_".format(font_id) + font_name
+                new_font_for_char.save(os.path.join(target_dir, '{}_{num:0{width}}.sfd'.format(font_id, num=char_id, width=charset_lenw)))
+                char_description.write(str(ord(char)) + '\n')
+                char_description.write(str(new_font_for_char[char].width) + '\n')
+                char_description.write(str(new_font_for_char[char].vwidth) + '\n')
+                char_description.write('{num:0{width}}'.format(num=char_id, width=charset_lenw) + '\n')
+                char_description.write('{}'.format(font_id))
+                # print('{}_{num:0{width}}.sfd'.format(font_id, num=char_id, width=charset_lenw))
+                char_description.close()
+              except Exception as e:
+                print("Found Error:", font_id, font_name ,char_id, char)
+                print(e)
+            cur_font.close()
+    processes = [mp.Process(target=process, args=(pid, font_num_per_process)) for pid in range(process_num)]
+    for p in processes:
+        p.start()
+    for p in processes:
+        p.join()
+def main():
+    parser = argparse.ArgumentParser(description="Convert ttf fonts to sfd fonts")
+    parser.add_argument("--language", type=str, default='eng', choices=['eng', 'chn', 'tha'])
+    parser.add_argument("--data_path", type=str, default='./Font_Dataset', help="Path to Dataset")
+    parser.add_argument("--ttf_path", type=str, default='../data/font_ttfs')
+    parser.add_argument('--sfd_path', type=str, default='../data/font_sfds')
+    parser.add_argument('--split', type=str, default='train')
+    opts = parser.parse_args()
+    convert_mp(opts)
+if __name__ == "__main__":
     main()

{data_utils → ThaiVecFont/data_utils}/relax_rep.py RENAMED Viewed

@@ -1,136 +1,136 @@
-import argparse
-import multiprocessing as mp
-import os
-import numpy as np
-from dataclasses import dataclass
-from tqdm import tqdm
-def numericalize(cmd, n=64):
-    """NOTE: shall only be called after normalization"""
-    cmd = ((cmd) / 30 * n).round().clip(min=0, max=n-1).astype(int)
-    return cmd
-def denumericalize(cmd, n=64):
-    cmd = cmd / n * 30
-    return cmd
-def cal_aux_bezier_pts(font_seq, opts):
-    """
-    calculate aux pts along bezier curves
-    """
-    pts_aux_all = []
-    for j in range(opts.char_num):
-        char_seq = font_seq[j] # shape: opts.max_len ,12
-        pts_aux_char = []
-        for k in range(opts.max_seq_len):
-            stroke_seq = char_seq[k]
-            stroke_cmd = np.argmax(stroke_seq[:4], -1)
-            stroke_seq[4:] = denumericalize(numericalize(stroke_seq[4:]))
-            p0, p1, p2, p3 = stroke_seq[4:6], stroke_seq[6:8], stroke_seq[8:10], stroke_seq[10:12]
-            pts_aux_stroke = []
-            if stroke_cmd == 0:
-                for t in range(6):
-                    pts_aux_stroke.append(0)
-            elif stroke_cmd == 1: # move
-                for t in [0.25, 0.5, 0.75]:
-                    coord_t = p0 + t*(p3-p0)
-                    pts_aux_stroke.append(coord_t[0])
-                    pts_aux_stroke.append(coord_t[1])
-            elif stroke_cmd == 2: # line
-                for t in [0.25, 0.5, 0.75]:
-                    coord_t = p0 + t*(p3-p0)
-                    pts_aux_stroke.append(coord_t[0])
-                    pts_aux_stroke.append(coord_t[1])
-            elif stroke_cmd == 3: # curve
-                for t in [0.25, 0.5, 0.75]:
-                    coord_t = (1-t)*(1-t)*(1-t)*p0 + 3*t*(1-t)*(1-t)*p1 + 3*t*t*(1-t)*p2 + t*t*t*p3
-                    pts_aux_stroke.append(coord_t[0])
-                    pts_aux_stroke.append(coord_t[1])
-            pts_aux_stroke = np.array(pts_aux_stroke)
-            pts_aux_char.append(pts_aux_stroke)
-        pts_aux_char = np.array(pts_aux_char)
-        pts_aux_all.append(pts_aux_char)
-    pts_aux_all = np.array(pts_aux_all)
-    return pts_aux_all
-def relax_rep(opts):
-    """
-    relaxing the sequence representation, details are shown in paper
-    """
-    data_path = os.path.join(opts.output_path, opts.language, opts.split)
-    font_dirs = os.listdir(data_path)
-    font_dirs.sort()
-    num_fonts = len(font_dirs)
-    print(f"Number {opts.split} fonts before processing", num_fonts)
-    num_processes = mp.cpu_count() - 1
-    # num_processes = 1
-    fonts_per_process = num_fonts // num_processes + 1
-    def process(process_id):
-        for i in tqdm(range(process_id * fonts_per_process, (process_id + 1) * fonts_per_process)):
-            if i >= num_fonts:
-                break
-            font_dir = os.path.join(data_path, font_dirs[i])
-            font_seq = np.load(os.path.join(font_dir, 'sequence.npy')).reshape(opts.char_num, opts.max_seq_len, -1)
-            font_len = np.load(os.path.join(font_dir, 'seq_len.npy')).reshape(-1)
-            cmd = font_seq[:, :, :4]
-            args = font_seq[:, :, 4:]
-            ret = []
-            for j in range(opts.char_num):
-                char_cmds = cmd[j]
-                char_args = args[j]
-                char_len = font_len[j]
-                new_args = []
-                for k in range(char_len):
-                    cur_cls = np.argmax(char_cmds[k], -1)
-                    cur_arg = char_args[k]
-                    if k - 1 > -1:
-                        pre_arg = char_args[k - 1]
-                    if cur_cls == 1: # when k == 0, cur_cls == 1
-                        cur_arg = np.concatenate((np.array([cur_arg[-2], cur_arg[-1]]), cur_arg), -1)
-                    else:
-                        cur_arg = np.concatenate((np.array([pre_arg[-2], pre_arg[-1]]), cur_arg), -1)
-                    new_args.append(cur_arg)
-                while(len(new_args)) < opts.max_seq_len:
-                    new_args.append(np.array([0, 0, 0, 0, 0, 0, 0, 0]))
-                new_args = np.array(new_args)
-                new_seq = np.concatenate((char_cmds, new_args),-1)
-                ret.append(new_seq)
-            ret = np.array(ret)
-            # write relaxed version of sequence.npy
-            np.save(os.path.join(font_dir, 'sequence_relaxed.npy'), ret.reshape(opts.char_num, -1))
-            pts_aux = cal_aux_bezier_pts(ret, opts)
-            np.save(os.path.join(font_dir, 'pts_aux.npy'), pts_aux)
-    processes = [mp.Process(target=process, args=[pid]) for pid in range(num_processes)]
-    for p in processes:
-        p.start()
-    for p in processes:
-        p.join()
-def main():
-    parser = argparse.ArgumentParser(description="relax representation")
-    parser.add_argument("--language", type=str, default='eng', choices=['eng', 'chn', 'tha'])
-    parser.add_argument("--data_path", type=str, default='./Font_Dataset', help="Path to Dataset")
-    parser.add_argument("--output_path", type=str, default='../data/vecfont_dataset_/', help="Path to write the database to")
-    parser.add_argument("--split", type=str, default='train')
-    opts = parser.parse_args()
-    relax_rep(opts)
-if __name__ == "__main__":
     main()

+import argparse
+import multiprocessing as mp
+import os
+import numpy as np
+from dataclasses import dataclass
+from tqdm import tqdm
+def numericalize(cmd, n=64):
+    """NOTE: shall only be called after normalization"""
+    cmd = ((cmd) / 30 * n).round().clip(min=0, max=n-1).astype(int)
+    return cmd
+def denumericalize(cmd, n=64):
+    cmd = cmd / n * 30
+    return cmd
+def cal_aux_bezier_pts(font_seq, opts):
+    """
+    calculate aux pts along bezier curves
+    """
+    pts_aux_all = []
+    for j in range(opts.char_num):
+        char_seq = font_seq[j] # shape: opts.max_len ,12
+        pts_aux_char = []
+        for k in range(opts.max_seq_len):
+            stroke_seq = char_seq[k]
+            stroke_cmd = np.argmax(stroke_seq[:4], -1)
+            stroke_seq[4:] = denumericalize(numericalize(stroke_seq[4:]))
+            p0, p1, p2, p3 = stroke_seq[4:6], stroke_seq[6:8], stroke_seq[8:10], stroke_seq[10:12]
+            pts_aux_stroke = []
+            if stroke_cmd == 0:
+                for t in range(6):
+                    pts_aux_stroke.append(0)
+            elif stroke_cmd == 1: # move
+                for t in [0.25, 0.5, 0.75]:
+                    coord_t = p0 + t*(p3-p0)
+                    pts_aux_stroke.append(coord_t[0])
+                    pts_aux_stroke.append(coord_t[1])
+            elif stroke_cmd == 2: # line
+                for t in [0.25, 0.5, 0.75]:
+                    coord_t = p0 + t*(p3-p0)
+                    pts_aux_stroke.append(coord_t[0])
+                    pts_aux_stroke.append(coord_t[1])
+            elif stroke_cmd == 3: # curve
+                for t in [0.25, 0.5, 0.75]:
+                    coord_t = (1-t)*(1-t)*(1-t)*p0 + 3*t*(1-t)*(1-t)*p1 + 3*t*t*(1-t)*p2 + t*t*t*p3
+                    pts_aux_stroke.append(coord_t[0])
+                    pts_aux_stroke.append(coord_t[1])
+            pts_aux_stroke = np.array(pts_aux_stroke)
+            pts_aux_char.append(pts_aux_stroke)
+        pts_aux_char = np.array(pts_aux_char)
+        pts_aux_all.append(pts_aux_char)
+    pts_aux_all = np.array(pts_aux_all)
+    return pts_aux_all
+def relax_rep(opts):
+    """
+    relaxing the sequence representation, details are shown in paper
+    """
+    data_path = os.path.join(opts.output_path, opts.language, opts.split)
+    font_dirs = os.listdir(data_path)
+    font_dirs.sort()
+    num_fonts = len(font_dirs)
+    print(f"Number {opts.split} fonts before processing", num_fonts)
+    num_processes = mp.cpu_count() - 1
+    # num_processes = 1
+    fonts_per_process = num_fonts // num_processes + 1
+    def process(process_id):
+        for i in tqdm(range(process_id * fonts_per_process, (process_id + 1) * fonts_per_process)):
+            if i >= num_fonts:
+                break
+            font_dir = os.path.join(data_path, font_dirs[i])
+            font_seq = np.load(os.path.join(font_dir, 'sequence.npy')).reshape(opts.char_num, opts.max_seq_len, -1)
+            font_len = np.load(os.path.join(font_dir, 'seq_len.npy')).reshape(-1)
+            cmd = font_seq[:, :, :4]
+            args = font_seq[:, :, 4:]
+            ret = []
+            for j in range(opts.char_num):
+                char_cmds = cmd[j]
+                char_args = args[j]
+                char_len = font_len[j]
+                new_args = []
+                for k in range(char_len):
+                    cur_cls = np.argmax(char_cmds[k], -1)
+                    cur_arg = char_args[k]
+                    if k - 1 > -1:
+                        pre_arg = char_args[k - 1]
+                    if cur_cls == 1: # when k == 0, cur_cls == 1
+                        cur_arg = np.concatenate((np.array([cur_arg[-2], cur_arg[-1]]), cur_arg), -1)
+                    else:
+                        cur_arg = np.concatenate((np.array([pre_arg[-2], pre_arg[-1]]), cur_arg), -1)
+                    new_args.append(cur_arg)
+                while(len(new_args)) < opts.max_seq_len:
+                    new_args.append(np.array([0, 0, 0, 0, 0, 0, 0, 0]))
+                new_args = np.array(new_args)
+                new_seq = np.concatenate((char_cmds, new_args),-1)
+                ret.append(new_seq)
+            ret = np.array(ret)
+            # write relaxed version of sequence.npy
+            np.save(os.path.join(font_dir, 'sequence_relaxed.npy'), ret.reshape(opts.char_num, -1))
+            pts_aux = cal_aux_bezier_pts(ret, opts)
+            np.save(os.path.join(font_dir, 'pts_aux.npy'), pts_aux)
+    processes = [mp.Process(target=process, args=[pid]) for pid in range(num_processes)]
+    for p in processes:
+        p.start()
+    for p in processes:
+        p.join()
+def main():
+    parser = argparse.ArgumentParser(description="relax representation")
+    parser.add_argument("--language", type=str, default='eng', choices=['eng', 'chn', 'tha'])
+    parser.add_argument("--data_path", type=str, default='./Font_Dataset', help="Path to Dataset")
+    parser.add_argument("--output_path", type=str, default='../data/vecfont_dataset_/', help="Path to write the database to")
+    parser.add_argument("--split", type=str, default='train')
+    opts = parser.parse_args()
+    relax_rep(opts)
+if __name__ == "__main__":
     main()

{data_utils → ThaiVecFont/data_utils}/svg_utils.py RENAMED Viewed

@@ -1,1083 +1,1083 @@
-# Copyright 2020 The Magenta Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# Lint as: python3
-"""Defines the Material Design Icons Problem."""
-import io
-import numpy as np
-import re
-from PIL import Image
-from itertools import zip_longest
-from skimage import draw
-SVG_PREFIX_BIG = ('<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="'
-                  'http://www.w3.org/1999/xlink" width="256px" height="256px"'
-                  ' style="-ms-transform: rotate(360deg); -webkit-transform:'
-                  ' rotate(360deg); transform: rotate(360deg);" '
-                  'preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 30">')
-PATH_PREFIX_1 = '<path d="'
-PATH_POSFIX_1 = '" fill="currentColor"/>'
-SVG_POSFIX = '</svg>'
-NUM_ARGS = {'v': 1, 'V': 1, 'h': 1, 'H': 1, 'a': 7, 'A': 7, 'l': 2, 'L': 2,
-            't': 2, 'T': 2, 'c': 6, 'C': 6, 'm': 2, 'M': 2, 's': 4, 'S': 4,
-            'q': 4, 'Q': 4, 'z': 0}
-# in order of arg complexity, with absolutes clustered
-# recall we don't handle all commands (see docstring)
-CMDS_LIST = 'zHVMLTSQCAhvmltsqca' #  was zhvmltsqcaHVMLTSQCA
-CMD_MAPPING = {cmd: i for i, cmd in enumerate(CMDS_LIST)}
-FEATURE_DIM = 10
-MAX_SEQ_LEN = 120
-# Manually Change Max Sequence
-def change_max_seq_len(param):
-    global MAX_SEQ_LEN
-    MAX_SEQ_LEN = param
-    return MAX_SEQ_LEN
-############################### GENERAL UTILS #################################
-def grouper(iterable, batch_size, fill_value=None):
-    """Helper method for returning batches of size batch_size of a dataset."""
-    # grouper('ABCDEF', 3) -> 'ABC', 'DEF'
-    args = [iter(iterable)] * batch_size
-    return zip_longest(*args, fillvalue=fill_value)
-def _map_uni_to_alphanum(uni):
-    """Maps [0-9 A-Z a-z] to numbers 0-62."""
-    if 48 <= uni <= 57:
-        return uni - 48
-    elif 65 <= uni <= 90:
-        return uni - 65 + 10
-    return uni - 97 + 36
-def _map_uni_to_alpha(uni):
-    """Maps [A-Z a-z] to numbers 0-52."""
-    if 65 <= uni <= 90:
-        return uni - 65
-    return uni - 97 + 26
-############# UTILS FOR CONVERTING SFD/SPLINESETS TO SVG PATHS ################
-def _get_spline(sfd):
-    if 'SplineSet' not in sfd:
-        return ''
-    pro = sfd[sfd.index('SplineSet') + 10:]  # 10 is the 'SplineSet'
-    pro = pro[:pro.index('EndSplineSet')]
-    return pro
-def _spline_to_path_list(spline, height, replace_with_prev=False):
-    """Converts SplineSet to a list of tokenized commands in svg path."""
-    path = []
-    prev_xy = []
-    for line in spline.splitlines():
-        if not line:
-            continue
-        tokens = line.split(' ')
-        cmd = tokens[-2]
-        if cmd not in 'cml':
-            # COMMAND NOT RECOGNIZED.
-            return []
-            # assert cmd in 'cml', 'Command not recognized: {}'.format(cmd)
-        args = tokens[:-2]
-        args = [float(x) for x in args if x]
-        if replace_with_prev and cmd in 'c':
-            args[:2] = prev_xy
-        prev_xy = args[-2:]
-        new_y_args = []
-        for i, a in enumerate(args):
-            if i % 2 == 1:
-                new_y_args.append((height - a))
-            else:
-                new_y_args.append((a))
-        path.append([cmd.upper()] + new_y_args)
-    return path
-def _sfd_to_path_list(single, replace_with_prev=False):
-    """Converts the given SFD glyph into a path."""
-    return _spline_to_path_list(_get_spline(single['sfd']), single['vwidth'], replace_with_prev)
-#################### UTILS FOR PROCESSING TOKENIZED PATHS #####################
-def _add_missing_cmds(path, remove_zs=False):
-    """Adds missing cmd tags to the commands in the svg."""
-    # For instance, the command 'a' takes 7 arguments, but some SVGs declare:
-    #   a 1 2 3 4 5 6 7 8 9 10 11 12 13 14
-    # Which is 14 arguments. This function converts the above to the equivalent:
-    #   a 1 2 3 4 5 6 7  a 8 9 10 11 12 13 14
-    #
-    # Note: if remove_zs is True, this also removes any occurences of z commands.
-    new_path = []
-    for cmd in path:
-        if not remove_zs or cmd[0] not in 'Zz':
-            for new_cmd in add_missing_cmd(cmd):
-                new_path.append(new_cmd)
-    return new_path
-def add_missing_cmd(command_list):
-    """Adds missing cmd tags to the given command list."""
-    # E.g.: given:
-    #   ['a', '0', '0', '0', '0', '0', '0', '0',
-    #         '0', '0', '0', '0', '0', '0', '0']
-    # Converts to:
-    #   [['a', '0', '0', '0', '0', '0', '0', '0'],
-    #    ['a', '0', '0', '0', '0', '0', '0', '0']]
-    # And returns a string that joins these elements with spaces.
-    cmd_tag = command_list[0]
-    args = command_list[1:]
-    final_cmds = []
-    for arg_batch in grouper(args, NUM_ARGS[cmd_tag]):
-        final_cmds.append([cmd_tag] + list(arg_batch))
-    if not final_cmds:
-        # command has no args (e.g.: 'z')
-        final_cmds = [[cmd_tag]]
-    return final_cmds
-def _normalize_args(arglist, norm, add=None, flip=False):
-    """Normalize the given args with the given norm value."""
-    new_arglist = []
-    for i, arg in enumerate(arglist):
-        new_arg = float(arg)
-        if add is not None:
-            add_to_x, add_to_y = add
-            # This argument is an x-coordinate if even, y-coordinate if odd
-            # except when flip == True
-            if i % 2 == 0:
-                new_arg += add_to_y if flip else add_to_x
-            else:
-                new_arg += add_to_x if flip else add_to_y
-        new_arglist.append(str(24 * new_arg / norm))
-    return new_arglist
-def _normalize_based_on_viewbox(path, viewbox):
-    """Normalizes all args in a path to a standard 24x24 viewbox."""
-    # Each SVG lives in a 2D plane. The viewbox determines the region of that
-    # plane that gets rendered. For instance, some designers may work with a
-    # viewbox that's 24x24, others with one that's 100x100, etc.
-    # Suppose I design the the letter "h" in the Arial style using a 100x100
-    # viewbox (let's call it icon A). Let's suppose the icon has height 75. Then,
-    # I design the same character using a 20x20 viewbox (call this icon B), with
-    # height 15 (=75% of 20). This means that, when rendered, both icons with look
-    # exactly the same, but the scale of the commands each icon is using is
-    # different. For instance, if icon A has a command like "lineTo 100 100", the
-    # equivalent command in icon B will be "lineTo 20 20".
-    # In order to avoid this problem and bring all real values to the same scale,
-    # I scale all icons' commands to use a 24x24 viewbox. This function does this:
-    # it converts a path that exists in the given viewbox into a standard 24x24
-    # viewbox.
-    viewbox = viewbox.split(' ')
-    norm = max(int(viewbox[-1]), int(viewbox[-2]))
-    if int(viewbox[-1]) > int(viewbox[-2]):
-        add_to_y = 0
-        add_to_x = abs(int(viewbox[-1]) - int(viewbox[-2])) / 2
-    else:
-        add_to_y = abs(int(viewbox[-1]) - int(viewbox[-2])) / 2
-        add_to_x = 0
-    new_path = []
-    for command in path:
-        if command[0] == 'a':
-            new_path.append([command[0]] + _normalize_args(command[1:3], norm)
-                            + command[3:6] + _normalize_args(command[6:], norm))
-        elif command[0] == 'A':
-            new_path.append([command[0]] + _normalize_args(command[1:3], norm)
-                            + command[3:6] + _normalize_args(command[6:], norm, add=(add_to_x, add_to_y)))
-        elif command[0] == 'V':
-            new_path.append([command[0]] + _normalize_args(command[1:], norm, add=(add_to_x, add_to_y), flip=True))
-        elif command[0] == command[0].upper():
-            new_path.append([command[0]] + _normalize_args(command[1:], norm, add=(add_to_x, add_to_y)))
-        elif command[0] in 'zZ':
-            new_path.append([command[0]])
-        else:
-            new_path.append([command[0]] + _normalize_args(command[1:], norm))
-    return new_path
-def _convert_args(args, curr_pos, cmd):
-    """Converts given args to relative values."""
-    # NOTE: glyphs only use a very small subset of commands (L, C, M, and Z -- I
-    # believe). So I'm not handling A and H for now.
-    if cmd in 'AH':
-        raise NotImplementedError('These commands have >6 args (not supported).')
-    new_args = []
-    for i, arg in enumerate(args):
-        x_or_y = i % 2
-        if cmd == 'H':
-            x_or_y = (i + 1) % 2
-        new_args.append(str(float(arg) - curr_pos[x_or_y]))
-    return new_args
-def _update_curr_pos(curr_pos, cmd, start_of_path):
-    """Calculate the position of the pen after cmd is applied."""
-    if cmd[0] in 'ml':
-        curr_pos = [curr_pos[0] + float(cmd[1]), curr_pos[1] + float(cmd[2])]
-        if cmd[0] == 'm':
-            start_of_path = curr_pos
-    elif cmd[0] in 'z':
-        curr_pos = start_of_path
-    elif cmd[0] in 'h':
-        curr_pos = [curr_pos[0] + float(cmd[1]), curr_pos[1]]
-    elif cmd[0] in 'v':
-        curr_pos = [curr_pos[0], curr_pos[1] + float(cmd[1])]
-    elif cmd[0] in 'ctsqa':
-        curr_pos = [curr_pos[0] + float(cmd[-2]), curr_pos[1] + float(cmd[-1])]
-    return curr_pos, start_of_path
-def _make_relative(cmds):
-    """Convert commands in a path to relative positioning."""
-    curr_pos = (0.0, 0.0)
-    start_of_path = (0.0, 0.0)
-    new_cmds = []
-    for cmd in cmds:
-        if cmd[0].lower() == cmd[0]:
-            new_cmd = cmd
-        elif cmd[0].lower() == 'z':
-            new_cmd = [cmd[0].lower()]
-        else:
-            new_cmd = [cmd[0].lower()] + _convert_args(cmd[1:], curr_pos, cmd=cmd[0])
-        new_cmds.append(new_cmd)
-        curr_pos, start_of_path = _update_curr_pos(curr_pos, new_cmd, start_of_path)
-    return new_cmds
-def _is_to_left_of(pt1, pt2):
-    pt1_norm = (pt1[0]**2 + pt1[1]**2)
-    pt2_norm = (pt2[0]**2 + pt2[1]**2)
-    return pt1[1] < pt2[1] or (pt1_norm == pt2_norm and pt1[0] < pt2[0])
-def _get_leftmost_point(path):
-    """Returns the leftmost, topmost point of the path."""
-    leftmost = (float('inf'), float('inf'))
-    idx = -1
-    for i, cmd in enumerate(path):
-        if len(cmd) > 1:
-            endpoint = cmd[-2:]
-            if _is_to_left_of(endpoint, leftmost):
-                leftmost = endpoint
-                idx = i
-    return leftmost, idx
-def _separate_substructures(path):
-    """Returns a list of subpaths, each representing substructures the glyph."""
-    substructures = []
-    curr = []
-    for cmd in path:
-        if cmd[0] in 'mM' and curr:
-            substructures.append(curr)
-            curr = []
-        curr.append(cmd)
-    if curr:
-        substructures.append(curr)
-    return substructures
-def _is_clockwise(subpath):
-    """Returns whether the given subpath is clockwise-oriented."""
-    pts = [cmd[-2:] for cmd in subpath]
-    det = 0
-    for i in range(len(pts) - 1):
-        det += np.linalg.det(pts[i:i + 2])
-    return det > 0
-def _make_clockwise(subpath):
-    """Inverts the cardinality of the given subpath."""
-    new_path = [subpath[0]]
-    other_cmds = list(reversed(subpath[1:]))
-    for i, cmd in enumerate(other_cmds):
-        if i + 1 == len(other_cmds):
-            where_we_were = subpath[0][-2:]
-        else:
-            where_we_were = other_cmds[i + 1][-2:]
-        if len(cmd) > 3:
-            new_cmd = [cmd[0], cmd[3], cmd[4], cmd[1], cmd[2],
-                       where_we_were[0], where_we_were[1]]
-        else:
-            new_cmd = [cmd[0], where_we_were[0], where_we_were[1]]
-        new_path.append(new_cmd)
-    return new_path
-def _canonicalize(path):
-    """Makes all paths start at top left, and go clockwise first."""
-    # convert args to floats
-    path = [[x[0]] + list(map(float, x[1:])) for x in path]
-    # _canonicalize each subpath separately
-    new_substructures = []
-    for subpath in _separate_substructures(path):
-        leftmost_point, leftmost_idx = _get_leftmost_point(subpath)
-        reordered = ([['M', leftmost_point[0], leftmost_point[1]]] + subpath[leftmost_idx + 1:] + subpath[1:leftmost_idx + 1])
-        new_substructures.append((reordered, leftmost_point))
-    new_path = []
-    first_substructure_done = False
-    should_flip_cardinality = False
-    for sp, _ in sorted(new_substructures, key=lambda x: (x[1][1], x[1][0])):
-        if not first_substructure_done:
-            # we're looking at the first substructure now, we can determine whether we
-            # will flip the cardniality of the whole icon or not
-            should_flip_cardinality = not _is_clockwise(sp)
-            first_substructure_done = True
-        if should_flip_cardinality:
-            sp = _make_clockwise(sp)
-        new_path.extend(sp)
-    # convert args to strs
-    path = [[x[0]] + list(map(str, x[1:])) for x in new_path]
-    return path
-# ######### UTILS FOR CONVERTING TOKENIZED PATHS TO VECTORS ###########
-def _path_to_vector(path, categorical=False):
-    """Converts path's commands to a series of vectors."""
-    # Notes:
-    #   - The SimpleSVG dataset does not have any 't', 'q', 'Z', 'T', or 'Q'.
-    #     Thus, we don't handle those here.
-    #   - We also removed all 'z's.
-    #   - The x-axis-rotation argument to a commands is always 0 in this
-    #     dataset, so we ignore it
-    # Many commands have args that correspond to args in other commands.
-    #   v  __,__ _______________ ______________,_________ __,__ __,__ _,y
-    #   h  __,__ _______________ ______________,_________ __,__ __,__ x,_
-    #   z  __,__ _______________ ______________,_________ __,__ __,__ _,_
-    #   a  rx,ry x-axis-rotation large-arc-flag,sweepflag __,__ __,__ x,y
-    #   l  __,__ _______________ ______________,_________ __,__ __,__ x,y
-    #   c  __,__ _______________ ______________,_________ x1,y1 x2,y2 x,y
-    #   m  __,__ _______________ ______________,_________ __,__ __,__ x,y
-    #   s  __,__ _______________ ______________,_________ __,__ x2,y2 x,y
-    # So each command will be converted to a vector where the dimension is the
-    # minimal number of arguments to all commands:
-    #   [rx, ry, large-arc-flag, sweepflag, x1, y1, x2, y2, x, y]
-    # If a command does not output a certain arg, it is set to 0.
-    #   "l 5,5" becomes [0, 0, 0, 0, 0, 0, 0, 0, 5, 5]
-    # Also note, as of now we also output an extra dimension at index 0, which
-    # indicates which command is being outputted (integer).
-    new_path = []
-    for cmd in path:
-        new_path.append(_cmd_to_vector(cmd, categorical=categorical))
-    return new_path
-def _cmd_to_vector(cmd_list, categorical=False):
-    """Converts the given command (given as a list) into a vector."""
-    # For description of how this conversion happens, see
-    # _path_to_vector docstring.
-    cmd = cmd_list[0]
-    args = cmd_list[1:]
-    if not categorical:
-        # integer, for MSE
-        command = [float(CMD_MAPPING[cmd])]
-    else:
-        # one hot + 1 dim for EOS.
-        command = [0.0] * (len(CMDS_LIST) + 1)
-        command[CMD_MAPPING[cmd] + 1] = 1.0
-    arguments = [0.0] * 10
-    if cmd in 'hH':
-        arguments[8] = float(args[0])  # x
-    elif cmd in 'vV':
-        arguments[9] = float(args[0])  # y
-    elif cmd in 'mMlLtT':
-        arguments[8] = float(args[0])  # x
-        arguments[9] = float(args[1])  # y
-    elif cmd in 'sSqQ':
-        arguments[6] = float(args[0])  # x2
-        arguments[7] = float(args[1])  # y2
-        arguments[8] = float(args[2])  # x
-        arguments[9] = float(args[3])  # y
-    elif cmd in 'cC':
-        arguments[4] = float(args[0])  # x1
-        arguments[5] = float(args[1])  # y1
-        arguments[6] = float(args[2])  # x2
-        arguments[7] = float(args[3])  # y2
-        arguments[8] = float(args[4])  # x
-        arguments[9] = float(args[5])  # y
-    elif cmd in 'aA':
-        arguments[0] = float(args[0])  # rx
-        arguments[1] = float(args[1])  # ry
-        # we skip x-axis-rotation
-        arguments[2] = float(args[3])  # large-arc-flag
-        arguments[3] = float(args[4])  # sweep-flag
-        # a does not have x1, y1, x2, y2 args
-        arguments[8] = float(args[5])  # x
-        arguments[9] = float(args[6])  # y
-    return command + arguments
-################## UTILS FOR RENDERING PATH INTO IMAGE #################
-def _cubicbezier(x0, y0, x1, y1, x2, y2, x3, y3, n=40):
-    """Return n points along cubiz bezier with given control points."""
-    # from http://rosettacode.org/wiki/Bitmap/B%C3%A9zier_curves/Cubic
-    pts = []
-    for i in range(n + 1):
-        t = float(i) / float(n)
-        a = (1. - t)**3
-        b = 3. * t * (1. - t)**2
-        c = 3.0 * t**2 * (1.0 - t)
-        d = t**3
-        x = float(a * x0 + b * x1 + c * x2 + d * x3)
-        y = float(a * y0 + b * y1 + c * y2 + d * y3)
-        pts.append((x, y))
-    return list(zip(*pts))
-def _update_pos(curr_pos, end_pos, absolute):
-    if absolute:
-        return end_pos
-    return curr_pos[0] + end_pos[0], curr_pos[1] + end_pos[1]
-def constant_color(*unused_args):
-    return np.array([255, 255, 255])
-def _render_cubic(canvas, curr_pos, c_args, absolute, color):
-    """Renders a cubic bezier curve in the given canvas."""
-    if not absolute:
-        c_args[0] += curr_pos[0]
-        c_args[1] += curr_pos[1]
-        c_args[2] += curr_pos[0]
-        c_args[3] += curr_pos[1]
-        c_args[4] += curr_pos[0]
-        c_args[5] += curr_pos[1]
-    x, y = _cubicbezier(curr_pos[0], curr_pos[1],
-                        c_args[0], c_args[1],
-                        c_args[2], c_args[3],
-                        c_args[4], c_args[5])
-    max_possible = len(canvas)
-    x = [int(round(x_)) for x_ in x]
-    y = [int(round(y_)) for y_ in y]
-    def within_range(x):
-        return 0 <= x < max_possible
-    filtered = [(x_, y_) for x_, y_ in zip(x, y)
-                if within_range(x_) and within_range(y_)]
-    if not filtered:
-        return
-    x, y = list(zip(*filtered))
-    canvas[y, x, :] = color
-def _render_line(canvas, curr_pos, l_args, absolute, color):
-    """Renders a line in the given canvas."""
-    end_point = l_args
-    if not absolute:
-        end_point[0] += curr_pos[0]
-        end_point[1] += curr_pos[1]
-    rr, cc, val = draw.line_aa(int(curr_pos[0]), int(curr_pos[1]),
-                               int(end_point[0]), int(end_point[1]))
-    max_possible = len(canvas)
-    def within_range(x):
-        return 0 <= x < max_possible
-    filtered = [(x, y, v) for x, y, v in zip(rr, cc, val)
-                if within_range(x) and within_range(y)]
-    if not filtered:
-        return
-    rr, cc, val = list(zip(*filtered))
-    val = [(v * color) for v in val]
-    canvas[cc, rr, :] = val
-def _per_step_render(path, absolute=False, color=constant_color):
-    """Render the icon's edges, given its path."""
-    def to_canvas_size(l):
-        return [float(f) * (64. / 24.) for f in l]
-    canvas = np.zeros((64, 64, 3))
-    curr_pos = (0.0, 0.0)
-    for i, cmd in enumerate(path):
-        if not cmd:
-            continue
-        if cmd[0] in 'mM':
-            curr_pos = _update_pos(curr_pos, to_canvas_size(cmd[-2:]), absolute)
-        elif cmd[0] in 'cC':
-            _render_cubic(canvas, curr_pos, to_canvas_size(cmd[1:]), absolute, color(i, 55))
-            curr_pos = _update_pos(curr_pos, to_canvas_size(cmd[-2:]), absolute)
-        elif cmd[0] in 'lL':
-            _render_line(canvas, curr_pos, to_canvas_size(cmd[1:]), absolute, color(i, 55))
-            curr_pos = _update_pos(curr_pos, to_canvas_size(cmd[1:]), absolute)
-    return canvas
-def _zoom_out(path_list, add_baseline=0., per=22):
-    """Makes glyph slightly smaller in viewbox, makes some descenders visible."""
-    # assumes tensor is already unnormalized, and in long form
-    new_path = []
-    for command in path_list:
-        args = []
-        is_even = False
-        for arg in command[1:]:
-            if is_even:
-                args.append(str(float(arg) - ((24. - per) / 24.) * 64. / 4.))
-                is_even = False
-            else:
-                args.append(str(float(arg) - add_baseline))
-                is_even = True
-        new_path.append([command[0]] + args)
-    return new_path
-##################### UTILS FOR PROCESSING VECTORS ################
-def _append_eos(sample, categorical, feature_dim):
-    if not categorical:
-        eos = -1 * np.ones(feature_dim)
-    else:
-        eos = np.zeros(feature_dim)
-        eos[0] = 1.0
-    sample.append(eos)
-    return sample
-def _make_simple_cmds_long(out):
-    """Converts svg decoder output to format required by some render functions."""
-    # out has 10 dims
-    # the first 4 are respectively dims 0, 4, 5, 9 of the full 20-dim onehot vec
-    # the latter 6 are the 6 last dims of the 10-dim arg vec
-    shape_minus_dim = list(np.shape(out))[:-1]
-    return np.concatenate([out[..., :1],
-                           np.zeros(shape_minus_dim + [3]),
-                           out[..., 1:3],
-                           np.zeros(shape_minus_dim + [3]),
-                           out[..., 3:4],
-                           np.zeros(shape_minus_dim + [14]),
-                           out[..., 4:]], -1)
-################# UTILS FOR CONVERTING VECTORS TO SVGS ########################
-def _vector_to_svg(vectors, stop_at_eos=False, categorical=False):
-    """Tranforms a given vector to an svg string."""
-    new_path = []
-    for vector in vectors:
-        if stop_at_eos:
-            if categorical:
-                try:
-                    is_eos = np.argmax(vector[:len(CMDS_LIST) + 1]) == 0
-                except Exception:
-                    raise Exception(vector)
-            else:
-                is_eos = vector[0] < -0.5
-            if is_eos:
-                break
-        new_path.append(' '.join(_vector_to_cmd(vector, categorical=categorical)))
-    new_path = ' '.join(new_path)
-    return SVG_PREFIX_BIG + PATH_PREFIX_1 + new_path + PATH_POSFIX_1 + SVG_POSFIX
-def _vector_to_cmd(vector, categorical=False, return_floats=False):
-    """Does the inverse transformation as _cmd_to_vector()."""
-    cast_fn = float if return_floats else str
-    if categorical:
-        command = vector[:len(CMDS_LIST) + 1],
-        arguments = vector[len(CMDS_LIST) + 1:]
-        cmd_idx = np.argmax(command) - 1
-    else:
-        command, arguments = vector[:1], vector[1:]
-        cmd_idx = int(round(command[0]))
-    if cmd_idx < -0.5:
-        # EOS
-        return []
-    if cmd_idx >= len(CMDS_LIST):
-        cmd_idx = len(CMDS_LIST) - 1
-    cmd = CMDS_LIST[cmd_idx]
-    cmd = cmd.upper()
-    cmd_list = [cmd]
-    if cmd in 'hH':
-        cmd_list.append(cast_fn(arguments[8]))  # x
-    elif cmd in 'vV':
-        cmd_list.append(cast_fn(arguments[9]))  # y
-    elif cmd in 'mMlLtT':
-        cmd_list.append(cast_fn(arguments[8]))  # x
-        cmd_list.append(cast_fn(arguments[9]))  # y
-    elif cmd in 'sSqQ':
-        cmd_list.append(cast_fn(arguments[6]))  # x2
-        cmd_list.append(cast_fn(arguments[7]))  # y2
-        cmd_list.append(cast_fn(arguments[8]))  # x
-        cmd_list.append(cast_fn(arguments[9]))  # y
-    elif cmd in 'cC':
-        cmd_list.append(cast_fn(arguments[4]))  # x1
-        cmd_list.append(cast_fn(arguments[5]))  # y1
-        cmd_list.append(cast_fn(arguments[6]))  # x2
-        cmd_list.append(cast_fn(arguments[7]))  # y2
-        cmd_list.append(cast_fn(arguments[8]))  # x
-        cmd_list.append(cast_fn(arguments[9]))  # y
-    elif cmd in 'aA':
-        cmd_list.append(cast_fn(arguments[0]))  # rx
-        cmd_list.append(cast_fn(arguments[1]))  # ry
-        # x-axis-rotation is always 0
-        cmd_list.append(cast_fn('0'))
-        # the following two flags are binary.
-        cmd_list.append(cast_fn(1 if arguments[2] > 0.5 else 0))  # large-arc-flag
-        cmd_list.append(cast_fn(1 if arguments[3] > 0.5 else 0))  # sweep-flag
-        cmd_list.append(cast_fn(arguments[8]))  # x
-        cmd_list.append(cast_fn(arguments[9]))  # y
-    return cmd_list
-############## UTILS FOR CONVERTING SVGS/VECTORS TO IMAGES ###################
-# From Infer notebook
-start = ("""<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www."""
-         """w3.org/1999/xlink" width="256px" height="256px" style="-ms-trans"""
-         """form: rotate(360deg); -webkit-transform: rotate(360deg); transfo"""
-         """rm: rotate(360deg);" preserveAspectRatio="xMidYMid meet" viewBox"""
-         """="0 0 24 30"><path d=\"""")
-end = """\" fill="currentColor"/></svg>"""
-COMMAND_RX = re.compile("([MmLlHhVvCcSsQqTtAaZz])")
-FLOAT_RX = re.compile("[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?")  # noqa
-def svg_html_to_path_string(svg):
-    return svg.replace(start, '').replace(end, '')
-def _tokenize(pathdef):
-    """Returns each svg token from path list."""
-    # e.g.: 'm0.1-.5c0,6' -> m', '0.1, '-.5', 'c', '0', '6'
-    for x in COMMAND_RX.split(pathdef):
-        if x != '' and x in 'MmLlHhVvCcSsQqTtAaZz':
-            yield x
-        for token in FLOAT_RX.findall(x):
-            yield token
-def path_string_to_tokenized_commands(path):
-    """Tokenizes the given path string.
-    E.g.:
-        Given M 0.5 0.5 l 0.25 0.25 z
-        Returns [['M', '0.5', '0.5'], ['l', '0.25', '0.25'], ['z']]
-    """
-    new_path = []
-    current_cmd = []
-    for token in _tokenize(path):
-        if len(current_cmd) > 0:
-            if token in 'MmLlHhVvCcSsQqTtAaZz':
-                # cmd ended, convert to vector and add to new_path
-                new_path.append(current_cmd)
-                current_cmd = [token]
-            else:
-                # add arg to command
-                current_cmd.append(token)
-        else:
-            # add to start new cmd
-            current_cmd.append(token)
-    if current_cmd:
-        # process command still unprocessed
-        new_path.append(current_cmd)
-    return new_path
-def separate_substructures(tokenized_commands):
-  """Returns a list of SVG substructures."""
-  # every moveTo command starts a new substructure
-  # an SVG substructure is a subpath that closes on itself
-  # such as the outter and the inner edge of the character `o`
-  substructures = []
-  curr = []
-  for cmd in tokenized_commands:
-    if cmd[0] in 'mM' and len(curr) > 0:
-      substructures.append(curr)
-      curr = []
-    curr.append(cmd)
-  if len(curr) > 0:
-    substructures.append(curr)
-  return substructures
-def postprocess(svg, dist_thresh=2., skip=False):
-    path = svg_html_to_path_string(svg)
-    svg_template = svg.replace(path, '{}')
-    tokenized_commands = path_string_to_tokenized_commands(path)
-    def dist(a, b):
-        return np.sqrt((float(a[0]) - float(b[0]))**2 + (float(a[1]) - float(b[1]))**2)
-    def are_close_together(a, b, t):
-        return dist(a, b) < t
-    # first, go through each start/end point and merge if they're close enough
-    # together (that is, make end point the same as the start point).
-    # TODO: there are better ways of doing this, in a way that propagates error
-    # back (so if total error is 0.2, go through all N commands in this
-    # substructure and fix each by 0.2/N (unless they have 0 vertical change))
-    substructures = separate_substructures(tokenized_commands)
-    previous_substructure_endpoint = (0., 0.,)
-    for substructure in substructures:
-        # first, if the last substructure's endpoint was updated, we must update
-        # the start point of this one to reflect the opposite update
-        substructure[0][-2] = str(float(substructure[0][-2]) -
-                                  previous_substructure_endpoint[0])
-        substructure[0][-1] = str(float(substructure[0][-1]) -
-                                  previous_substructure_endpoint[1])
-        start = list(map(float, substructure[0][-2:]))
-        curr_pos = (0., 0.)
-        for cmd in substructure:
-            curr_pos, _ = _update_curr_pos(curr_pos, cmd, (0., 0.))
-        if are_close_together(start, curr_pos, dist_thresh):
-            new_point = np.array(start)
-            previous_substructure_endpoint = ((new_point[0] - curr_pos[0]),
-                                              (new_point[1] - curr_pos[1]))
-            substructure[-1][-2] = str(float(substructure[-1][-2]) +
-                                       (new_point[0] - curr_pos[0]))
-            substructure[-1][-1] = str(float(substructure[-1][-1]) +
-                                       (new_point[1] - curr_pos[1]))
-            if substructure[-1][0] in 'cC':
-                substructure[-1][-4] = str(float(substructure[-1][-4]) +
-                                           (new_point[0] - curr_pos[0]))
-                substructure[-1][-3] = str(float(substructure[-1][-3]) +
-                                           (new_point[1] - curr_pos[1]))
-    if skip:
-        return svg_template.format(' '.join([' '.join(' '.join(cmd) for cmd in s)
-                                             for s in substructures]))
-    def cosa(x, y):
-        return (x[0] * y[0] + x[1] * y[1]) / ((np.sqrt(x[0]**2 + x[1]**2) * np.sqrt(y[0]**2 + y[1]**2)))
-    def rotate(a, x, y):
-        return (x * np.cos(a) - y * np.sin(a), y * np.cos(a) + x * np.sin(a))
-    # second, gotta find adjacent bezier curves and, if their control points
-    # are well enough aligned, fully align them
-    for substructure in substructures:
-        curr_pos = (0., 0.)
-        new_curr_pos, _ = _update_curr_pos((0., 0.,), substructure[0], (0., 0.))
-        for cmd_idx in range(1, len(substructure)):
-            prev_cmd = substructure[cmd_idx-1]
-            cmd = substructure[cmd_idx]
-            new_new_curr_pos, _ = _update_curr_pos(
-                new_curr_pos, cmd, (0., 0.))
-            if cmd[0] == 'c':
-                if prev_cmd[0] == 'c':
-                    # check the vectors and update if needed
-                    # previous control pt wrt new curr point
-                    prev_ctr_point = (curr_pos[0] + float(prev_cmd[3]) - new_curr_pos[0],
-                                      curr_pos[1] + float(prev_cmd[4]) - new_curr_pos[1])
-                    ctr_point = (float(cmd[1]), float(cmd[2]))
-                    if -1. < cosa(prev_ctr_point, ctr_point) < -0.95:
-                        # calculate exact angle between the two vectors
-                        angle_diff = (np.pi - np.arccos(cosa(prev_ctr_point, ctr_point)))/2
-                        # rotate each vector by angle/2 in the correct direction for each.
-                        sign = np.sign(np.cross(prev_ctr_point, ctr_point))
-                        new_ctr_point = rotate(sign * angle_diff, *ctr_point)
-                        new_prev_ctr_point = rotate(-sign * angle_diff, *prev_ctr_point)
-                        # override the previous control points
-                        # (which has to be wrt previous curr position)
-                        substructure[cmd_idx-1][3] = str(new_prev_ctr_point[0] -
-                                                         curr_pos[0] + new_curr_pos[0])
-                        substructure[cmd_idx-1][4] = str(new_prev_ctr_point[1] -
-                                                         curr_pos[1] + new_curr_pos[1])
-                        substructure[cmd_idx][1] = str(new_ctr_point[0])
-                        substructure[cmd_idx][2] = str(new_ctr_point[1])
-            curr_pos = new_curr_pos
-            new_curr_pos = new_new_curr_pos
-    return svg_template.format(' '.join([' '.join(' '.join(cmd) for cmd in s)
-                                         for s in substructures]))
-# def get_means_stdevs(data_dir):
-#     """Returns the means and stdev saved in data_dir."""
-#     if data_dir not in means_stdevs:
-#         with tf.gfile.Open(os.path.join(data_dir, 'mean.npz'), 'r') as f:
-#             mean_npz = np.load(f)
-#         with tf.gfile.Open(os.path.join(data_dir, 'stdev.npz'), 'r') as f:
-#             stdev_npz = np.load(f)
-#         means_stdevs[data_dir] = (mean_npz, stdev_npz)
-#     return means_stdevs[data_dir]
-def render(tensor, data_dir=None):
-    """Converts SVG decoder output into HTML svg."""
-    # undo normalization
-    # mean_npz, stdev_npz = get_means_stdevs(data_dir)
-    # tensor = (tensor * stdev_npz) + mean_npz
-    # convert to html
-    tensor = _make_simple_cmds_long(tensor)
-    # vector = np.squeeze(np.squeeze(tensor, 0), 2)
-    html = _vector_to_svg(tensor, stop_at_eos=True, categorical=True)
-    # some aesthetic postprocessing
-    html = postprocess(html)
-    html = html.replace('256px', '50px')
-    return html
-###############
-def convert_to_svg(decoder_output, categorical=False):
-    converted = []
-    for example in decoder_output:
-        converted.append(_vector_to_svg(example, True, categorical=categorical))
-    return np.array(converted)
-def create_image_conversion_fn(max_outputs, categorical=False):
-    """Binds the number of outputs to the image conversion fn (to svg or png)."""
-    def convert_to_svg(decoder_output):
-        converted = []
-        for example in decoder_output:
-            if len(converted) == max_outputs:
-                break
-            converted.append(_vector_to_svg(example, True, categorical=categorical))
-        return np.array(converted)
-    return convert_to_svg
-################### UTILS FOR CREATING TF SUMMARIES ##########################
-def _make_encoded_image(img_tensor):
-    pil_img = Image.fromarray(np.squeeze(img_tensor * 255).astype(np.uint8), mode='L')
-    buff = io.BytesIO()
-    pil_img.save(buff, format='png')
-    encoded_image = buff.getvalue()
-    return encoded_image
-################### CHECK GLYPH/PATH VALID ##############################################
-def is_valid_glyph(g):
-    is_09 = 48 <= g['uni'] <= 57
-    is_capital_az = 65 <= g['uni'] <= 90
-    is_az = 97 <= g['uni'] <= 122
-    is_valid_dims = g['width'] != 0 and g['vwidth'] != 0
-    return (is_09 or is_capital_az or is_az) and is_valid_dims
-def is_valid_path(pathunibfp):
-    return pathunibfp[0] and len(pathunibfp[0]) <= MAX_SEQ_LEN
-################### DATASET PROCESSING #######################################
-def convert_to_path(g):
-    """Converts SplineSet in SFD font to str path."""
-    path = _sfd_to_path_list(g)
-    path = _add_missing_cmds(path, remove_zs=False)
-    path = _normalize_based_on_viewbox(path, '0 0 {} {}'.format(g['width'], g['vwidth']))
-    return path, g['uni'], g['binary_fp']
-def create_example(pathunibfp):
-    """Bulk of dataset processing. Converts str path to np array"""
-    path, uni, binary_fp = pathunibfp
-    final = {}
-    # zoom out
-    path = _zoom_out(path)
-    # make clockwise
-    path = _canonicalize(path)
-    # render path for training
-    final['rendered'] = _per_step_render(path, absolute=True)
-    # make path relative
-    # path = _make_relative(path)
-    # convert to vector
-    vector = _path_to_vector(path, categorical=True)
-    # make simple vector
-    vector = np.array(vector)
-    vector = np.concatenate([np.take(vector, [0, 4, 5, 9], axis=-1), vector[..., -6:]], axis=-1)
-    # count some stats
-    final['seq_len'] = np.shape(vector)[0]
-    # final['class'] = int(_map_uni_to_alphanum(uni))
-    final['class'] = int(_map_uni_to_alpha(uni)) # be advised that the class is useless bcz it is all 0
-    final['binary_fp'] = str(binary_fp)
-    # append eos
-    vector = _append_eos(vector.tolist(), True, 10)
-    # pad path to MAX_SEQ_LEN + 1 (with eos)
-    final['sequence'] = np.concatenate((vector, np.zeros(((MAX_SEQ_LEN - final['seq_len']), 10))), 0)
-    # make pure list:
-    # use last channel only
-    final['rendered'] = np.reshape(final['rendered'][..., 0], [64 * 64]).astype(np.float32).tolist()
-    final['sequence'] = np.reshape(final['sequence'], [(MAX_SEQ_LEN + 1) * 10]).astype(np.float32).tolist()
-    final['class'] = np.reshape(final['class'], [1]).astype(np.int64).tolist()
-    final['seq_len'] = np.reshape(final['seq_len'], [1]).astype(np.int64).tolist()
-    return final
-def mean_to_example(mean_stdev):
-    """Converts the found mean and stdev to example."""
-    # mean_stdev is a dict
-    mean_stdev['mean'] = np.reshape(mean_stdev['mean'], [10]).astype(np.float32).tolist()
-    mean_stdev['variance'] = np.reshape(mean_stdev['variance'], [10]).astype(np.float32).tolist()
-    mean_stdev['stddev'] = np.reshape(mean_stdev['stddev'], [10]).astype(np.float32).tolist()
-    mean_stdev['count'] = np.reshape(mean_stdev['count'], [1]).astype(np.int64).tolist()
-    return mean_stdev
-def convert_simple_vector_to_path(seq):
-    path=[]
-    for i in range(seq.shape[0]):
-        path_i=[]
-        cmd  = np.argmax(seq[i][:4])
-        p0 = seq[i][4:6]
-        p1 = seq[i][6:8]
-        p2 = seq[i][8:10]
-        if cmd == 0:
-            break
-        elif cmd == 1:
-            path_i.append('M')
-            path_i.append(str(p2[0]))
-            path_i.append(str(p2[1]))
-        elif cmd == 2:
-            path_i.append('L')
-            path_i.append(str(p2[0]))
-            path_i.append(str(p2[1]))
-        elif cmd == 3:
-            path_i.append('C')
-            path_i.append(str(p0[0]))
-            path_i.append(str(p0[1]))
-            path_i.append(str(p1[0]))
-            path_i.append(str(p1[1]))
-            path_i.append(str(p2[0]))
-            path_i.append(str(p2[1]))
-        else:
-            print("wrong!!! to path")
-        path.append(path_i)
-    return path
-def clockwise(seq):
-    path = convert_simple_vector_to_path(seq)
-    path = _canonicalize(path)
-    ret = {}
-    vector = _path_to_vector(path, categorical=True)
-    vector = np.array(vector)
-    vector = np.concatenate([np.take(vector, [0, 4, 5, 9], axis=-1), vector[..., -6:]], axis=-1)
-    ret['seq_len'] = np.shape(vector)[0]
-    vector = _append_eos(vector.tolist(), True, 10)
-    ret['sequence'] = np.concatenate((vector, np.zeros(((MAX_SEQ_LEN - ret['seq_len']), 10))), 0)
-    return ret
-################### CHECK VALID ##############################################
-class MeanStddev:
-    """Accumulator to compute the mean/stdev of svg commands."""
-    def create_accumulator(self):
-        curr_sum = np.zeros([10])
-        sum_sq = np.zeros([10])
-        return (curr_sum, sum_sq, 0)  # x, x^2, count
-    def add_input(self, sum_count, new_input):
-        (curr_sum, sum_sq, count) = sum_count
-        # new_input is a dict with keys = ['seq_len', 'sequence']
-        new_seq_len = new_input['seq_len'][0]  # Line #754 'seq_len' is a list of one int
-        assert isinstance(new_seq_len, int), print(type(new_seq_len))
-        # remove padding and eos from sequence
-        assert isinstance(new_input['sequence'], list), print(type(new_input['sequence']))
-        new_input_np = np.reshape(np.array(new_input['sequence']), [-1, 10])
-        assert isinstance(new_input_np, np.ndarray), print(type())
-        assert new_input_np.shape[0] >= new_seq_len
-        new_input_np = new_input_np[:new_seq_len, :]
-        # accumulate new_sum and new_sum_sq
-        new_sum = np.sum([curr_sum, np.sum(new_input_np, axis=0)], axis=0)
-        new_sum_sq = np.sum([sum_sq, np.sum(np.power(new_input_np, 2), axis=0)],
-                            axis=0)
-        return new_sum, new_sum_sq, count + new_seq_len
-    def merge_accumulators(self, accumulators):
-        curr_sums, sum_sqs, counts = list(zip(*accumulators))
-        return np.sum(curr_sums, axis=0), np.sum(sum_sqs, axis=0), np.sum(counts)
-    def extract_output(self, sum_count):
-        (curr_sum, curr_sum_sq, count) = sum_count
-        if count:
-            mean = np.divide(curr_sum, count)
-            variance = np.divide(curr_sum_sq, count) - np.power(mean, 2)
-            # -ve value could happen due to rounding
-            variance = np.max([variance, np.zeros(np.shape(variance))], axis=0)
-            stddev = np.sqrt(variance)
-            return {
-                'mean': mean,
-                'variance': variance,
-                'stddev': stddev,
-                'count': count
-            }
-        else:
-            return {
-                'mean': float('NaN'),
-                'variance': float('NaN'),
-                'stddev': float('NaN'),
-                'count': 0
             }

+# Copyright 2020 The Magenta Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Lint as: python3
+"""Defines the Material Design Icons Problem."""
+import io
+import numpy as np
+import re
+from PIL import Image
+from itertools import zip_longest
+from skimage import draw
+SVG_PREFIX_BIG = ('<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="'
+                  'http://www.w3.org/1999/xlink" width="256px" height="256px"'
+                  ' style="-ms-transform: rotate(360deg); -webkit-transform:'
+                  ' rotate(360deg); transform: rotate(360deg);" '
+                  'preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 30">')
+PATH_PREFIX_1 = '<path d="'
+PATH_POSFIX_1 = '" fill="currentColor"/>'
+SVG_POSFIX = '</svg>'
+NUM_ARGS = {'v': 1, 'V': 1, 'h': 1, 'H': 1, 'a': 7, 'A': 7, 'l': 2, 'L': 2,
+            't': 2, 'T': 2, 'c': 6, 'C': 6, 'm': 2, 'M': 2, 's': 4, 'S': 4,
+            'q': 4, 'Q': 4, 'z': 0}
+# in order of arg complexity, with absolutes clustered
+# recall we don't handle all commands (see docstring)
+CMDS_LIST = 'zHVMLTSQCAhvmltsqca' #  was zhvmltsqcaHVMLTSQCA
+CMD_MAPPING = {cmd: i for i, cmd in enumerate(CMDS_LIST)}
+FEATURE_DIM = 10
+MAX_SEQ_LEN = 120
+# Manually Change Max Sequence
+def change_max_seq_len(param):
+    global MAX_SEQ_LEN
+    MAX_SEQ_LEN = param
+    return MAX_SEQ_LEN
+############################### GENERAL UTILS #################################
+def grouper(iterable, batch_size, fill_value=None):
+    """Helper method for returning batches of size batch_size of a dataset."""
+    # grouper('ABCDEF', 3) -> 'ABC', 'DEF'
+    args = [iter(iterable)] * batch_size
+    return zip_longest(*args, fillvalue=fill_value)
+def _map_uni_to_alphanum(uni):
+    """Maps [0-9 A-Z a-z] to numbers 0-62."""
+    if 48 <= uni <= 57:
+        return uni - 48
+    elif 65 <= uni <= 90:
+        return uni - 65 + 10
+    return uni - 97 + 36
+def _map_uni_to_alpha(uni):
+    """Maps [A-Z a-z] to numbers 0-52."""
+    if 65 <= uni <= 90:
+        return uni - 65
+    return uni - 97 + 26
+############# UTILS FOR CONVERTING SFD/SPLINESETS TO SVG PATHS ################
+def _get_spline(sfd):
+    if 'SplineSet' not in sfd:
+        return ''
+    pro = sfd[sfd.index('SplineSet') + 10:]  # 10 is the 'SplineSet'
+    pro = pro[:pro.index('EndSplineSet')]
+    return pro
+def _spline_to_path_list(spline, height, replace_with_prev=False):
+    """Converts SplineSet to a list of tokenized commands in svg path."""
+    path = []
+    prev_xy = []
+    for line in spline.splitlines():
+        if not line:
+            continue
+        tokens = line.split(' ')
+        cmd = tokens[-2]
+        if cmd not in 'cml':
+            # COMMAND NOT RECOGNIZED.
+            return []
+            # assert cmd in 'cml', 'Command not recognized: {}'.format(cmd)
+        args = tokens[:-2]
+        args = [float(x) for x in args if x]
+        if replace_with_prev and cmd in 'c':
+            args[:2] = prev_xy
+        prev_xy = args[-2:]
+        new_y_args = []
+        for i, a in enumerate(args):
+            if i % 2 == 1:
+                new_y_args.append((height - a))
+            else:
+                new_y_args.append((a))
+        path.append([cmd.upper()] + new_y_args)
+    return path
+def _sfd_to_path_list(single, replace_with_prev=False):
+    """Converts the given SFD glyph into a path."""
+    return _spline_to_path_list(_get_spline(single['sfd']), single['vwidth'], replace_with_prev)
+#################### UTILS FOR PROCESSING TOKENIZED PATHS #####################
+def _add_missing_cmds(path, remove_zs=False):
+    """Adds missing cmd tags to the commands in the svg."""
+    # For instance, the command 'a' takes 7 arguments, but some SVGs declare:
+    #   a 1 2 3 4 5 6 7 8 9 10 11 12 13 14
+    # Which is 14 arguments. This function converts the above to the equivalent:
+    #   a 1 2 3 4 5 6 7  a 8 9 10 11 12 13 14
+    #
+    # Note: if remove_zs is True, this also removes any occurences of z commands.
+    new_path = []
+    for cmd in path:
+        if not remove_zs or cmd[0] not in 'Zz':
+            for new_cmd in add_missing_cmd(cmd):
+                new_path.append(new_cmd)
+    return new_path
+def add_missing_cmd(command_list):
+    """Adds missing cmd tags to the given command list."""
+    # E.g.: given:
+    #   ['a', '0', '0', '0', '0', '0', '0', '0',
+    #         '0', '0', '0', '0', '0', '0', '0']
+    # Converts to:
+    #   [['a', '0', '0', '0', '0', '0', '0', '0'],
+    #    ['a', '0', '0', '0', '0', '0', '0', '0']]
+    # And returns a string that joins these elements with spaces.
+    cmd_tag = command_list[0]
+    args = command_list[1:]
+    final_cmds = []
+    for arg_batch in grouper(args, NUM_ARGS[cmd_tag]):
+        final_cmds.append([cmd_tag] + list(arg_batch))
+    if not final_cmds:
+        # command has no args (e.g.: 'z')
+        final_cmds = [[cmd_tag]]
+    return final_cmds
+def _normalize_args(arglist, norm, add=None, flip=False):
+    """Normalize the given args with the given norm value."""
+    new_arglist = []
+    for i, arg in enumerate(arglist):
+        new_arg = float(arg)
+        if add is not None:
+            add_to_x, add_to_y = add
+            # This argument is an x-coordinate if even, y-coordinate if odd
+            # except when flip == True
+            if i % 2 == 0:
+                new_arg += add_to_y if flip else add_to_x
+            else:
+                new_arg += add_to_x if flip else add_to_y
+        new_arglist.append(str(24 * new_arg / norm))
+    return new_arglist
+def _normalize_based_on_viewbox(path, viewbox):
+    """Normalizes all args in a path to a standard 24x24 viewbox."""
+    # Each SVG lives in a 2D plane. The viewbox determines the region of that
+    # plane that gets rendered. For instance, some designers may work with a
+    # viewbox that's 24x24, others with one that's 100x100, etc.
+    # Suppose I design the the letter "h" in the Arial style using a 100x100
+    # viewbox (let's call it icon A). Let's suppose the icon has height 75. Then,
+    # I design the same character using a 20x20 viewbox (call this icon B), with
+    # height 15 (=75% of 20). This means that, when rendered, both icons with look
+    # exactly the same, but the scale of the commands each icon is using is
+    # different. For instance, if icon A has a command like "lineTo 100 100", the
+    # equivalent command in icon B will be "lineTo 20 20".
+    # In order to avoid this problem and bring all real values to the same scale,
+    # I scale all icons' commands to use a 24x24 viewbox. This function does this:
+    # it converts a path that exists in the given viewbox into a standard 24x24
+    # viewbox.
+    viewbox = viewbox.split(' ')
+    norm = max(int(viewbox[-1]), int(viewbox[-2]))
+    if int(viewbox[-1]) > int(viewbox[-2]):
+        add_to_y = 0
+        add_to_x = abs(int(viewbox[-1]) - int(viewbox[-2])) / 2
+    else:
+        add_to_y = abs(int(viewbox[-1]) - int(viewbox[-2])) / 2
+        add_to_x = 0
+    new_path = []
+    for command in path:
+        if command[0] == 'a':
+            new_path.append([command[0]] + _normalize_args(command[1:3], norm)
+                            + command[3:6] + _normalize_args(command[6:], norm))
+        elif command[0] == 'A':
+            new_path.append([command[0]] + _normalize_args(command[1:3], norm)
+                            + command[3:6] + _normalize_args(command[6:], norm, add=(add_to_x, add_to_y)))
+        elif command[0] == 'V':
+            new_path.append([command[0]] + _normalize_args(command[1:], norm, add=(add_to_x, add_to_y), flip=True))
+        elif command[0] == command[0].upper():
+            new_path.append([command[0]] + _normalize_args(command[1:], norm, add=(add_to_x, add_to_y)))
+        elif command[0] in 'zZ':
+            new_path.append([command[0]])
+        else:
+            new_path.append([command[0]] + _normalize_args(command[1:], norm))
+    return new_path
+def _convert_args(args, curr_pos, cmd):
+    """Converts given args to relative values."""
+    # NOTE: glyphs only use a very small subset of commands (L, C, M, and Z -- I
+    # believe). So I'm not handling A and H for now.
+    if cmd in 'AH':
+        raise NotImplementedError('These commands have >6 args (not supported).')
+    new_args = []
+    for i, arg in enumerate(args):
+        x_or_y = i % 2
+        if cmd == 'H':
+            x_or_y = (i + 1) % 2
+        new_args.append(str(float(arg) - curr_pos[x_or_y]))
+    return new_args
+def _update_curr_pos(curr_pos, cmd, start_of_path):
+    """Calculate the position of the pen after cmd is applied."""
+    if cmd[0] in 'ml':
+        curr_pos = [curr_pos[0] + float(cmd[1]), curr_pos[1] + float(cmd[2])]
+        if cmd[0] == 'm':
+            start_of_path = curr_pos
+    elif cmd[0] in 'z':
+        curr_pos = start_of_path
+    elif cmd[0] in 'h':
+        curr_pos = [curr_pos[0] + float(cmd[1]), curr_pos[1]]
+    elif cmd[0] in 'v':
+        curr_pos = [curr_pos[0], curr_pos[1] + float(cmd[1])]
+    elif cmd[0] in 'ctsqa':
+        curr_pos = [curr_pos[0] + float(cmd[-2]), curr_pos[1] + float(cmd[-1])]
+    return curr_pos, start_of_path
+def _make_relative(cmds):
+    """Convert commands in a path to relative positioning."""
+    curr_pos = (0.0, 0.0)
+    start_of_path = (0.0, 0.0)
+    new_cmds = []
+    for cmd in cmds:
+        if cmd[0].lower() == cmd[0]:
+            new_cmd = cmd
+        elif cmd[0].lower() == 'z':
+            new_cmd = [cmd[0].lower()]
+        else:
+            new_cmd = [cmd[0].lower()] + _convert_args(cmd[1:], curr_pos, cmd=cmd[0])
+        new_cmds.append(new_cmd)
+        curr_pos, start_of_path = _update_curr_pos(curr_pos, new_cmd, start_of_path)
+    return new_cmds
+def _is_to_left_of(pt1, pt2):
+    pt1_norm = (pt1[0]**2 + pt1[1]**2)
+    pt2_norm = (pt2[0]**2 + pt2[1]**2)
+    return pt1[1] < pt2[1] or (pt1_norm == pt2_norm and pt1[0] < pt2[0])
+def _get_leftmost_point(path):
+    """Returns the leftmost, topmost point of the path."""
+    leftmost = (float('inf'), float('inf'))
+    idx = -1
+    for i, cmd in enumerate(path):
+        if len(cmd) > 1:
+            endpoint = cmd[-2:]
+            if _is_to_left_of(endpoint, leftmost):
+                leftmost = endpoint
+                idx = i
+    return leftmost, idx
+def _separate_substructures(path):
+    """Returns a list of subpaths, each representing substructures the glyph."""
+    substructures = []
+    curr = []
+    for cmd in path:
+        if cmd[0] in 'mM' and curr:
+            substructures.append(curr)
+            curr = []
+        curr.append(cmd)
+    if curr:
+        substructures.append(curr)
+    return substructures
+def _is_clockwise(subpath):
+    """Returns whether the given subpath is clockwise-oriented."""
+    pts = [cmd[-2:] for cmd in subpath]
+    det = 0
+    for i in range(len(pts) - 1):
+        det += np.linalg.det(pts[i:i + 2])
+    return det > 0
+def _make_clockwise(subpath):
+    """Inverts the cardinality of the given subpath."""
+    new_path = [subpath[0]]
+    other_cmds = list(reversed(subpath[1:]))
+    for i, cmd in enumerate(other_cmds):
+        if i + 1 == len(other_cmds):
+            where_we_were = subpath[0][-2:]
+        else:
+            where_we_were = other_cmds[i + 1][-2:]
+        if len(cmd) > 3:
+            new_cmd = [cmd[0], cmd[3], cmd[4], cmd[1], cmd[2],
+                       where_we_were[0], where_we_were[1]]
+        else:
+            new_cmd = [cmd[0], where_we_were[0], where_we_were[1]]
+        new_path.append(new_cmd)
+    return new_path
+def _canonicalize(path):
+    """Makes all paths start at top left, and go clockwise first."""
+    # convert args to floats
+    path = [[x[0]] + list(map(float, x[1:])) for x in path]
+    # _canonicalize each subpath separately
+    new_substructures = []
+    for subpath in _separate_substructures(path):
+        leftmost_point, leftmost_idx = _get_leftmost_point(subpath)
+        reordered = ([['M', leftmost_point[0], leftmost_point[1]]] + subpath[leftmost_idx + 1:] + subpath[1:leftmost_idx + 1])
+        new_substructures.append((reordered, leftmost_point))
+    new_path = []
+    first_substructure_done = False
+    should_flip_cardinality = False
+    for sp, _ in sorted(new_substructures, key=lambda x: (x[1][1], x[1][0])):
+        if not first_substructure_done:
+            # we're looking at the first substructure now, we can determine whether we
+            # will flip the cardniality of the whole icon or not
+            should_flip_cardinality = not _is_clockwise(sp)
+            first_substructure_done = True
+        if should_flip_cardinality:
+            sp = _make_clockwise(sp)
+        new_path.extend(sp)
+    # convert args to strs
+    path = [[x[0]] + list(map(str, x[1:])) for x in new_path]
+    return path
+# ######### UTILS FOR CONVERTING TOKENIZED PATHS TO VECTORS ###########
+def _path_to_vector(path, categorical=False):
+    """Converts path's commands to a series of vectors."""
+    # Notes:
+    #   - The SimpleSVG dataset does not have any 't', 'q', 'Z', 'T', or 'Q'.
+    #     Thus, we don't handle those here.
+    #   - We also removed all 'z's.
+    #   - The x-axis-rotation argument to a commands is always 0 in this
+    #     dataset, so we ignore it
+    # Many commands have args that correspond to args in other commands.
+    #   v  __,__ _______________ ______________,_________ __,__ __,__ _,y
+    #   h  __,__ _______________ ______________,_________ __,__ __,__ x,_
+    #   z  __,__ _______________ ______________,_________ __,__ __,__ _,_
+    #   a  rx,ry x-axis-rotation large-arc-flag,sweepflag __,__ __,__ x,y
+    #   l  __,__ _______________ ______________,_________ __,__ __,__ x,y
+    #   c  __,__ _______________ ______________,_________ x1,y1 x2,y2 x,y
+    #   m  __,__ _______________ ______________,_________ __,__ __,__ x,y
+    #   s  __,__ _______________ ______________,_________ __,__ x2,y2 x,y
+    # So each command will be converted to a vector where the dimension is the
+    # minimal number of arguments to all commands:
+    #   [rx, ry, large-arc-flag, sweepflag, x1, y1, x2, y2, x, y]
+    # If a command does not output a certain arg, it is set to 0.
+    #   "l 5,5" becomes [0, 0, 0, 0, 0, 0, 0, 0, 5, 5]
+    # Also note, as of now we also output an extra dimension at index 0, which
+    # indicates which command is being outputted (integer).
+    new_path = []
+    for cmd in path:
+        new_path.append(_cmd_to_vector(cmd, categorical=categorical))
+    return new_path
+def _cmd_to_vector(cmd_list, categorical=False):
+    """Converts the given command (given as a list) into a vector."""
+    # For description of how this conversion happens, see
+    # _path_to_vector docstring.
+    cmd = cmd_list[0]
+    args = cmd_list[1:]
+    if not categorical:
+        # integer, for MSE
+        command = [float(CMD_MAPPING[cmd])]
+    else:
+        # one hot + 1 dim for EOS.
+        command = [0.0] * (len(CMDS_LIST) + 1)
+        command[CMD_MAPPING[cmd] + 1] = 1.0
+    arguments = [0.0] * 10
+    if cmd in 'hH':
+        arguments[8] = float(args[0])  # x
+    elif cmd in 'vV':
+        arguments[9] = float(args[0])  # y
+    elif cmd in 'mMlLtT':
+        arguments[8] = float(args[0])  # x
+        arguments[9] = float(args[1])  # y
+    elif cmd in 'sSqQ':
+        arguments[6] = float(args[0])  # x2
+        arguments[7] = float(args[1])  # y2
+        arguments[8] = float(args[2])  # x
+        arguments[9] = float(args[3])  # y
+    elif cmd in 'cC':
+        arguments[4] = float(args[0])  # x1
+        arguments[5] = float(args[1])  # y1
+        arguments[6] = float(args[2])  # x2
+        arguments[7] = float(args[3])  # y2
+        arguments[8] = float(args[4])  # x
+        arguments[9] = float(args[5])  # y
+    elif cmd in 'aA':
+        arguments[0] = float(args[0])  # rx
+        arguments[1] = float(args[1])  # ry
+        # we skip x-axis-rotation
+        arguments[2] = float(args[3])  # large-arc-flag
+        arguments[3] = float(args[4])  # sweep-flag
+        # a does not have x1, y1, x2, y2 args
+        arguments[8] = float(args[5])  # x
+        arguments[9] = float(args[6])  # y
+    return command + arguments
+################## UTILS FOR RENDERING PATH INTO IMAGE #################
+def _cubicbezier(x0, y0, x1, y1, x2, y2, x3, y3, n=40):
+    """Return n points along cubiz bezier with given control points."""
+    # from http://rosettacode.org/wiki/Bitmap/B%C3%A9zier_curves/Cubic
+    pts = []
+    for i in range(n + 1):
+        t = float(i) / float(n)
+        a = (1. - t)**3
+        b = 3. * t * (1. - t)**2
+        c = 3.0 * t**2 * (1.0 - t)
+        d = t**3
+        x = float(a * x0 + b * x1 + c * x2 + d * x3)
+        y = float(a * y0 + b * y1 + c * y2 + d * y3)
+        pts.append((x, y))
+    return list(zip(*pts))
+def _update_pos(curr_pos, end_pos, absolute):
+    if absolute:
+        return end_pos
+    return curr_pos[0] + end_pos[0], curr_pos[1] + end_pos[1]
+def constant_color(*unused_args):
+    return np.array([255, 255, 255])
+def _render_cubic(canvas, curr_pos, c_args, absolute, color):
+    """Renders a cubic bezier curve in the given canvas."""
+    if not absolute:
+        c_args[0] += curr_pos[0]
+        c_args[1] += curr_pos[1]
+        c_args[2] += curr_pos[0]
+        c_args[3] += curr_pos[1]
+        c_args[4] += curr_pos[0]
+        c_args[5] += curr_pos[1]
+    x, y = _cubicbezier(curr_pos[0], curr_pos[1],
+                        c_args[0], c_args[1],
+                        c_args[2], c_args[3],
+                        c_args[4], c_args[5])
+    max_possible = len(canvas)
+    x = [int(round(x_)) for x_ in x]
+    y = [int(round(y_)) for y_ in y]
+    def within_range(x):
+        return 0 <= x < max_possible
+    filtered = [(x_, y_) for x_, y_ in zip(x, y)
+                if within_range(x_) and within_range(y_)]
+    if not filtered:
+        return
+    x, y = list(zip(*filtered))
+    canvas[y, x, :] = color
+def _render_line(canvas, curr_pos, l_args, absolute, color):
+    """Renders a line in the given canvas."""
+    end_point = l_args
+    if not absolute:
+        end_point[0] += curr_pos[0]
+        end_point[1] += curr_pos[1]
+    rr, cc, val = draw.line_aa(int(curr_pos[0]), int(curr_pos[1]),
+                               int(end_point[0]), int(end_point[1]))
+    max_possible = len(canvas)
+    def within_range(x):
+        return 0 <= x < max_possible
+    filtered = [(x, y, v) for x, y, v in zip(rr, cc, val)
+                if within_range(x) and within_range(y)]
+    if not filtered:
+        return
+    rr, cc, val = list(zip(*filtered))
+    val = [(v * color) for v in val]
+    canvas[cc, rr, :] = val
+def _per_step_render(path, absolute=False, color=constant_color):
+    """Render the icon's edges, given its path."""
+    def to_canvas_size(l):
+        return [float(f) * (64. / 24.) for f in l]
+    canvas = np.zeros((64, 64, 3))
+    curr_pos = (0.0, 0.0)
+    for i, cmd in enumerate(path):
+        if not cmd:
+            continue
+        if cmd[0] in 'mM':
+            curr_pos = _update_pos(curr_pos, to_canvas_size(cmd[-2:]), absolute)
+        elif cmd[0] in 'cC':
+            _render_cubic(canvas, curr_pos, to_canvas_size(cmd[1:]), absolute, color(i, 55))
+            curr_pos = _update_pos(curr_pos, to_canvas_size(cmd[-2:]), absolute)
+        elif cmd[0] in 'lL':
+            _render_line(canvas, curr_pos, to_canvas_size(cmd[1:]), absolute, color(i, 55))
+            curr_pos = _update_pos(curr_pos, to_canvas_size(cmd[1:]), absolute)
+    return canvas
+def _zoom_out(path_list, add_baseline=0., per=22):
+    """Makes glyph slightly smaller in viewbox, makes some descenders visible."""
+    # assumes tensor is already unnormalized, and in long form
+    new_path = []
+    for command in path_list:
+        args = []
+        is_even = False
+        for arg in command[1:]:
+            if is_even:
+                args.append(str(float(arg) - ((24. - per) / 24.) * 64. / 4.))
+                is_even = False
+            else:
+                args.append(str(float(arg) - add_baseline))
+                is_even = True
+        new_path.append([command[0]] + args)
+    return new_path
+##################### UTILS FOR PROCESSING VECTORS ################
+def _append_eos(sample, categorical, feature_dim):
+    if not categorical:
+        eos = -1 * np.ones(feature_dim)
+    else:
+        eos = np.zeros(feature_dim)
+        eos[0] = 1.0
+    sample.append(eos)
+    return sample
+def _make_simple_cmds_long(out):
+    """Converts svg decoder output to format required by some render functions."""
+    # out has 10 dims
+    # the first 4 are respectively dims 0, 4, 5, 9 of the full 20-dim onehot vec
+    # the latter 6 are the 6 last dims of the 10-dim arg vec
+    shape_minus_dim = list(np.shape(out))[:-1]
+    return np.concatenate([out[..., :1],
+                           np.zeros(shape_minus_dim + [3]),
+                           out[..., 1:3],
+                           np.zeros(shape_minus_dim + [3]),
+                           out[..., 3:4],
+                           np.zeros(shape_minus_dim + [14]),
+                           out[..., 4:]], -1)
+################# UTILS FOR CONVERTING VECTORS TO SVGS ########################
+def _vector_to_svg(vectors, stop_at_eos=False, categorical=False):
+    """Tranforms a given vector to an svg string."""
+    new_path = []
+    for vector in vectors:
+        if stop_at_eos:
+            if categorical:
+                try:
+                    is_eos = np.argmax(vector[:len(CMDS_LIST) + 1]) == 0
+                except Exception:
+                    raise Exception(vector)
+            else:
+                is_eos = vector[0] < -0.5
+            if is_eos:
+                break
+        new_path.append(' '.join(_vector_to_cmd(vector, categorical=categorical)))
+    new_path = ' '.join(new_path)
+    return SVG_PREFIX_BIG + PATH_PREFIX_1 + new_path + PATH_POSFIX_1 + SVG_POSFIX
+def _vector_to_cmd(vector, categorical=False, return_floats=False):
+    """Does the inverse transformation as _cmd_to_vector()."""
+    cast_fn = float if return_floats else str
+    if categorical:
+        command = vector[:len(CMDS_LIST) + 1],
+        arguments = vector[len(CMDS_LIST) + 1:]
+        cmd_idx = np.argmax(command) - 1
+    else:
+        command, arguments = vector[:1], vector[1:]
+        cmd_idx = int(round(command[0]))
+    if cmd_idx < -0.5:
+        # EOS
+        return []
+    if cmd_idx >= len(CMDS_LIST):
+        cmd_idx = len(CMDS_LIST) - 1
+    cmd = CMDS_LIST[cmd_idx]
+    cmd = cmd.upper()
+    cmd_list = [cmd]
+    if cmd in 'hH':
+        cmd_list.append(cast_fn(arguments[8]))  # x
+    elif cmd in 'vV':
+        cmd_list.append(cast_fn(arguments[9]))  # y
+    elif cmd in 'mMlLtT':
+        cmd_list.append(cast_fn(arguments[8]))  # x
+        cmd_list.append(cast_fn(arguments[9]))  # y
+    elif cmd in 'sSqQ':
+        cmd_list.append(cast_fn(arguments[6]))  # x2
+        cmd_list.append(cast_fn(arguments[7]))  # y2
+        cmd_list.append(cast_fn(arguments[8]))  # x
+        cmd_list.append(cast_fn(arguments[9]))  # y
+    elif cmd in 'cC':
+        cmd_list.append(cast_fn(arguments[4]))  # x1
+        cmd_list.append(cast_fn(arguments[5]))  # y1
+        cmd_list.append(cast_fn(arguments[6]))  # x2
+        cmd_list.append(cast_fn(arguments[7]))  # y2
+        cmd_list.append(cast_fn(arguments[8]))  # x
+        cmd_list.append(cast_fn(arguments[9]))  # y
+    elif cmd in 'aA':
+        cmd_list.append(cast_fn(arguments[0]))  # rx
+        cmd_list.append(cast_fn(arguments[1]))  # ry
+        # x-axis-rotation is always 0
+        cmd_list.append(cast_fn('0'))
+        # the following two flags are binary.
+        cmd_list.append(cast_fn(1 if arguments[2] > 0.5 else 0))  # large-arc-flag
+        cmd_list.append(cast_fn(1 if arguments[3] > 0.5 else 0))  # sweep-flag
+        cmd_list.append(cast_fn(arguments[8]))  # x
+        cmd_list.append(cast_fn(arguments[9]))  # y
+    return cmd_list
+############## UTILS FOR CONVERTING SVGS/VECTORS TO IMAGES ###################
+# From Infer notebook
+start = ("""<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www."""
+         """w3.org/1999/xlink" width="256px" height="256px" style="-ms-trans"""
+         """form: rotate(360deg); -webkit-transform: rotate(360deg); transfo"""
+         """rm: rotate(360deg);" preserveAspectRatio="xMidYMid meet" viewBox"""
+         """="0 0 24 30"><path d=\"""")
+end = """\" fill="currentColor"/></svg>"""
+COMMAND_RX = re.compile("([MmLlHhVvCcSsQqTtAaZz])")
+FLOAT_RX = re.compile("[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?")  # noqa
+def svg_html_to_path_string(svg):
+    return svg.replace(start, '').replace(end, '')
+def _tokenize(pathdef):
+    """Returns each svg token from path list."""
+    # e.g.: 'm0.1-.5c0,6' -> m', '0.1, '-.5', 'c', '0', '6'
+    for x in COMMAND_RX.split(pathdef):
+        if x != '' and x in 'MmLlHhVvCcSsQqTtAaZz':
+            yield x
+        for token in FLOAT_RX.findall(x):
+            yield token
+def path_string_to_tokenized_commands(path):
+    """Tokenizes the given path string.
+    E.g.:
+        Given M 0.5 0.5 l 0.25 0.25 z
+        Returns [['M', '0.5', '0.5'], ['l', '0.25', '0.25'], ['z']]
+    """
+    new_path = []
+    current_cmd = []
+    for token in _tokenize(path):
+        if len(current_cmd) > 0:
+            if token in 'MmLlHhVvCcSsQqTtAaZz':
+                # cmd ended, convert to vector and add to new_path
+                new_path.append(current_cmd)
+                current_cmd = [token]
+            else:
+                # add arg to command
+                current_cmd.append(token)
+        else:
+            # add to start new cmd
+            current_cmd.append(token)
+    if current_cmd:
+        # process command still unprocessed
+        new_path.append(current_cmd)
+    return new_path
+def separate_substructures(tokenized_commands):
+  """Returns a list of SVG substructures."""
+  # every moveTo command starts a new substructure
+  # an SVG substructure is a subpath that closes on itself
+  # such as the outter and the inner edge of the character `o`
+  substructures = []
+  curr = []
+  for cmd in tokenized_commands:
+    if cmd[0] in 'mM' and len(curr) > 0:
+      substructures.append(curr)
+      curr = []
+    curr.append(cmd)
+  if len(curr) > 0:
+    substructures.append(curr)
+  return substructures
+def postprocess(svg, dist_thresh=2., skip=False):
+    path = svg_html_to_path_string(svg)
+    svg_template = svg.replace(path, '{}')
+    tokenized_commands = path_string_to_tokenized_commands(path)
+    def dist(a, b):
+        return np.sqrt((float(a[0]) - float(b[0]))**2 + (float(a[1]) - float(b[1]))**2)
+    def are_close_together(a, b, t):
+        return dist(a, b) < t
+    # first, go through each start/end point and merge if they're close enough
+    # together (that is, make end point the same as the start point).
+    # TODO: there are better ways of doing this, in a way that propagates error
+    # back (so if total error is 0.2, go through all N commands in this
+    # substructure and fix each by 0.2/N (unless they have 0 vertical change))
+    substructures = separate_substructures(tokenized_commands)
+    previous_substructure_endpoint = (0., 0.,)
+    for substructure in substructures:
+        # first, if the last substructure's endpoint was updated, we must update
+        # the start point of this one to reflect the opposite update
+        substructure[0][-2] = str(float(substructure[0][-2]) -
+                                  previous_substructure_endpoint[0])
+        substructure[0][-1] = str(float(substructure[0][-1]) -
+                                  previous_substructure_endpoint[1])
+        start = list(map(float, substructure[0][-2:]))
+        curr_pos = (0., 0.)
+        for cmd in substructure:
+            curr_pos, _ = _update_curr_pos(curr_pos, cmd, (0., 0.))
+        if are_close_together(start, curr_pos, dist_thresh):
+            new_point = np.array(start)
+            previous_substructure_endpoint = ((new_point[0] - curr_pos[0]),
+                                              (new_point[1] - curr_pos[1]))
+            substructure[-1][-2] = str(float(substructure[-1][-2]) +
+                                       (new_point[0] - curr_pos[0]))
+            substructure[-1][-1] = str(float(substructure[-1][-1]) +
+                                       (new_point[1] - curr_pos[1]))
+            if substructure[-1][0] in 'cC':
+                substructure[-1][-4] = str(float(substructure[-1][-4]) +
+                                           (new_point[0] - curr_pos[0]))
+                substructure[-1][-3] = str(float(substructure[-1][-3]) +
+                                           (new_point[1] - curr_pos[1]))
+    if skip:
+        return svg_template.format(' '.join([' '.join(' '.join(cmd) for cmd in s)
+                                             for s in substructures]))
+    def cosa(x, y):
+        return (x[0] * y[0] + x[1] * y[1]) / ((np.sqrt(x[0]**2 + x[1]**2) * np.sqrt(y[0]**2 + y[1]**2)))
+    def rotate(a, x, y):
+        return (x * np.cos(a) - y * np.sin(a), y * np.cos(a) + x * np.sin(a))
+    # second, gotta find adjacent bezier curves and, if their control points
+    # are well enough aligned, fully align them
+    for substructure in substructures:
+        curr_pos = (0., 0.)
+        new_curr_pos, _ = _update_curr_pos((0., 0.,), substructure[0], (0., 0.))
+        for cmd_idx in range(1, len(substructure)):
+            prev_cmd = substructure[cmd_idx-1]
+            cmd = substructure[cmd_idx]
+            new_new_curr_pos, _ = _update_curr_pos(
+                new_curr_pos, cmd, (0., 0.))
+            if cmd[0] == 'c':
+                if prev_cmd[0] == 'c':
+                    # check the vectors and update if needed
+                    # previous control pt wrt new curr point
+                    prev_ctr_point = (curr_pos[0] + float(prev_cmd[3]) - new_curr_pos[0],
+                                      curr_pos[1] + float(prev_cmd[4]) - new_curr_pos[1])
+                    ctr_point = (float(cmd[1]), float(cmd[2]))
+                    if -1. < cosa(prev_ctr_point, ctr_point) < -0.95:
+                        # calculate exact angle between the two vectors
+                        angle_diff = (np.pi - np.arccos(cosa(prev_ctr_point, ctr_point)))/2
+                        # rotate each vector by angle/2 in the correct direction for each.
+                        sign = np.sign(np.cross(prev_ctr_point, ctr_point))
+                        new_ctr_point = rotate(sign * angle_diff, *ctr_point)
+                        new_prev_ctr_point = rotate(-sign * angle_diff, *prev_ctr_point)
+                        # override the previous control points
+                        # (which has to be wrt previous curr position)
+                        substructure[cmd_idx-1][3] = str(new_prev_ctr_point[0] -
+                                                         curr_pos[0] + new_curr_pos[0])
+                        substructure[cmd_idx-1][4] = str(new_prev_ctr_point[1] -
+                                                         curr_pos[1] + new_curr_pos[1])
+                        substructure[cmd_idx][1] = str(new_ctr_point[0])
+                        substructure[cmd_idx][2] = str(new_ctr_point[1])
+            curr_pos = new_curr_pos
+            new_curr_pos = new_new_curr_pos
+    return svg_template.format(' '.join([' '.join(' '.join(cmd) for cmd in s)
+                                         for s in substructures]))
+# def get_means_stdevs(data_dir):
+#     """Returns the means and stdev saved in data_dir."""
+#     if data_dir not in means_stdevs:
+#         with tf.gfile.Open(os.path.join(data_dir, 'mean.npz'), 'r') as f:
+#             mean_npz = np.load(f)
+#         with tf.gfile.Open(os.path.join(data_dir, 'stdev.npz'), 'r') as f:
+#             stdev_npz = np.load(f)
+#         means_stdevs[data_dir] = (mean_npz, stdev_npz)
+#     return means_stdevs[data_dir]
+def render(tensor, data_dir=None):
+    """Converts SVG decoder output into HTML svg."""
+    # undo normalization
+    # mean_npz, stdev_npz = get_means_stdevs(data_dir)
+    # tensor = (tensor * stdev_npz) + mean_npz
+    # convert to html
+    tensor = _make_simple_cmds_long(tensor)
+    # vector = np.squeeze(np.squeeze(tensor, 0), 2)
+    html = _vector_to_svg(tensor, stop_at_eos=True, categorical=True)
+    # some aesthetic postprocessing
+    html = postprocess(html)
+    html = html.replace('256px', '50px')
+    return html
+###############
+def convert_to_svg(decoder_output, categorical=False):
+    converted = []
+    for example in decoder_output:
+        converted.append(_vector_to_svg(example, True, categorical=categorical))
+    return np.array(converted)
+def create_image_conversion_fn(max_outputs, categorical=False):
+    """Binds the number of outputs to the image conversion fn (to svg or png)."""
+    def convert_to_svg(decoder_output):
+        converted = []
+        for example in decoder_output:
+            if len(converted) == max_outputs:
+                break
+            converted.append(_vector_to_svg(example, True, categorical=categorical))
+        return np.array(converted)
+    return convert_to_svg
+################### UTILS FOR CREATING TF SUMMARIES ##########################
+def _make_encoded_image(img_tensor):
+    pil_img = Image.fromarray(np.squeeze(img_tensor * 255).astype(np.uint8), mode='L')
+    buff = io.BytesIO()
+    pil_img.save(buff, format='png')
+    encoded_image = buff.getvalue()
+    return encoded_image
+################### CHECK GLYPH/PATH VALID ##############################################
+def is_valid_glyph(g):
+    is_09 = 48 <= g['uni'] <= 57
+    is_capital_az = 65 <= g['uni'] <= 90
+    is_az = 97 <= g['uni'] <= 122
+    is_valid_dims = g['width'] != 0 and g['vwidth'] != 0
+    return (is_09 or is_capital_az or is_az) and is_valid_dims
+def is_valid_path(pathunibfp):
+    return pathunibfp[0] and len(pathunibfp[0]) <= MAX_SEQ_LEN
+################### DATASET PROCESSING #######################################
+def convert_to_path(g):
+    """Converts SplineSet in SFD font to str path."""
+    path = _sfd_to_path_list(g)
+    path = _add_missing_cmds(path, remove_zs=False)
+    path = _normalize_based_on_viewbox(path, '0 0 {} {}'.format(g['width'], g['vwidth']))
+    return path, g['uni'], g['binary_fp']
+def create_example(pathunibfp):
+    """Bulk of dataset processing. Converts str path to np array"""
+    path, uni, binary_fp = pathunibfp
+    final = {}
+    # zoom out
+    path = _zoom_out(path)
+    # make clockwise
+    path = _canonicalize(path)
+    # render path for training
+    final['rendered'] = _per_step_render(path, absolute=True)
+    # make path relative
+    # path = _make_relative(path)
+    # convert to vector
+    vector = _path_to_vector(path, categorical=True)
+    # make simple vector
+    vector = np.array(vector)
+    vector = np.concatenate([np.take(vector, [0, 4, 5, 9], axis=-1), vector[..., -6:]], axis=-1)
+    # count some stats
+    final['seq_len'] = np.shape(vector)[0]
+    # final['class'] = int(_map_uni_to_alphanum(uni))
+    final['class'] = int(_map_uni_to_alpha(uni)) # be advised that the class is useless bcz it is all 0
+    final['binary_fp'] = str(binary_fp)
+    # append eos
+    vector = _append_eos(vector.tolist(), True, 10)
+    # pad path to MAX_SEQ_LEN + 1 (with eos)
+    final['sequence'] = np.concatenate((vector, np.zeros(((MAX_SEQ_LEN - final['seq_len']), 10))), 0)
+    # make pure list:
+    # use last channel only
+    final['rendered'] = np.reshape(final['rendered'][..., 0], [64 * 64]).astype(np.float32).tolist()
+    final['sequence'] = np.reshape(final['sequence'], [(MAX_SEQ_LEN + 1) * 10]).astype(np.float32).tolist()
+    final['class'] = np.reshape(final['class'], [1]).astype(np.int64).tolist()
+    final['seq_len'] = np.reshape(final['seq_len'], [1]).astype(np.int64).tolist()
+    return final
+def mean_to_example(mean_stdev):
+    """Converts the found mean and stdev to example."""
+    # mean_stdev is a dict
+    mean_stdev['mean'] = np.reshape(mean_stdev['mean'], [10]).astype(np.float32).tolist()
+    mean_stdev['variance'] = np.reshape(mean_stdev['variance'], [10]).astype(np.float32).tolist()
+    mean_stdev['stddev'] = np.reshape(mean_stdev['stddev'], [10]).astype(np.float32).tolist()
+    mean_stdev['count'] = np.reshape(mean_stdev['count'], [1]).astype(np.int64).tolist()
+    return mean_stdev
+def convert_simple_vector_to_path(seq):
+    path=[]
+    for i in range(seq.shape[0]):
+        path_i=[]
+        cmd  = np.argmax(seq[i][:4])
+        p0 = seq[i][4:6]
+        p1 = seq[i][6:8]
+        p2 = seq[i][8:10]
+        if cmd == 0:
+            break
+        elif cmd == 1:
+            path_i.append('M')
+            path_i.append(str(p2[0]))
+            path_i.append(str(p2[1]))
+        elif cmd == 2:
+            path_i.append('L')
+            path_i.append(str(p2[0]))
+            path_i.append(str(p2[1]))
+        elif cmd == 3:
+            path_i.append('C')
+            path_i.append(str(p0[0]))
+            path_i.append(str(p0[1]))
+            path_i.append(str(p1[0]))
+            path_i.append(str(p1[1]))
+            path_i.append(str(p2[0]))
+            path_i.append(str(p2[1]))
+        else:
+            print("wrong!!! to path")
+        path.append(path_i)
+    return path
+def clockwise(seq):
+    path = convert_simple_vector_to_path(seq)
+    path = _canonicalize(path)
+    ret = {}
+    vector = _path_to_vector(path, categorical=True)
+    vector = np.array(vector)
+    vector = np.concatenate([np.take(vector, [0, 4, 5, 9], axis=-1), vector[..., -6:]], axis=-1)
+    ret['seq_len'] = np.shape(vector)[0]
+    vector = _append_eos(vector.tolist(), True, 10)
+    ret['sequence'] = np.concatenate((vector, np.zeros(((MAX_SEQ_LEN - ret['seq_len']), 10))), 0)
+    return ret
+################### CHECK VALID ##############################################
+class MeanStddev:
+    """Accumulator to compute the mean/stdev of svg commands."""
+    def create_accumulator(self):
+        curr_sum = np.zeros([10])
+        sum_sq = np.zeros([10])
+        return (curr_sum, sum_sq, 0)  # x, x^2, count
+    def add_input(self, sum_count, new_input):
+        (curr_sum, sum_sq, count) = sum_count
+        # new_input is a dict with keys = ['seq_len', 'sequence']
+        new_seq_len = new_input['seq_len'][0]  # Line #754 'seq_len' is a list of one int
+        assert isinstance(new_seq_len, int), print(type(new_seq_len))
+        # remove padding and eos from sequence
+        assert isinstance(new_input['sequence'], list), print(type(new_input['sequence']))
+        new_input_np = np.reshape(np.array(new_input['sequence']), [-1, 10])
+        assert isinstance(new_input_np, np.ndarray), print(type())
+        assert new_input_np.shape[0] >= new_seq_len
+        new_input_np = new_input_np[:new_seq_len, :]
+        # accumulate new_sum and new_sum_sq
+        new_sum = np.sum([curr_sum, np.sum(new_input_np, axis=0)], axis=0)
+        new_sum_sq = np.sum([sum_sq, np.sum(np.power(new_input_np, 2), axis=0)],
+                            axis=0)
+        return new_sum, new_sum_sq, count + new_seq_len
+    def merge_accumulators(self, accumulators):
+        curr_sums, sum_sqs, counts = list(zip(*accumulators))
+        return np.sum(curr_sums, axis=0), np.sum(sum_sqs, axis=0), np.sum(counts)
+    def extract_output(self, sum_count):
+        (curr_sum, curr_sum_sq, count) = sum_count
+        if count:
+            mean = np.divide(curr_sum, count)
+            variance = np.divide(curr_sum_sq, count) - np.power(mean, 2)
+            # -ve value could happen due to rounding
+            variance = np.max([variance, np.zeros(np.shape(variance))], axis=0)
+            stddev = np.sqrt(variance)
+            return {
+                'mean': mean,
+                'variance': variance,
+                'stddev': stddev,
+                'count': count
+            }
+        else:
+            return {
+                'mean': float('NaN'),
+                'variance': float('NaN'),
+                'stddev': float('NaN'),
+                'count': 0
             }

{data_utils → ThaiVecFont/data_utils}/svg_utils_backup.py RENAMED Viewed

@@ -1,1174 +1,1174 @@
-# Copyright 2020 The Magenta Authors.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import pdb
-# Lint as: python3
-"""Defines the Material Design Icons Problem."""
-import io
-import numpy as np
-import re
-from PIL import Image
-from itertools import zip_longest
-from skimage import draw
-import sys
-SVG_PREFIX_BIG = ('<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="'
-                  'http://www.w3.org/1999/xlink" width="256px" height="256px"'
-                  ' style="-ms-transform: rotate(360deg); -webkit-transform:'
-                  ' rotate(360deg); transform: rotate(360deg);" '
-                  'preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24">')
-PATH_PREFIX_1 = '<path d="'
-PATH_POSFIX_1 = '" fill="currentColor"/>'
-SVG_POSFIX = '</svg>'
-NUM_ARGS = {'v': 1, 'V': 1, 'h': 1, 'H': 1, 'a': 7, 'A': 7, 'l': 2, 'L': 2,
-            't': 2, 'T': 2, 'c': 6, 'C': 6, 'm': 2, 'M': 2, 's': 4, 'S': 4,
-            'q': 4, 'Q': 4, 'z': 0}
-# in order of arg complexity, with absolutes clustered
-# recall we don't handle all commands (see docstring)
-#note  args:
-# v, h: vertical horizental lines
-# a: elliptical Arc 椭圆
-# l:  lineto
-# t: smooth quadratic Bézier curveto 2次贝塞尔曲线
-# c: curveto
-# m: moveto
-# s: smooth curveto
-# Q: quadratic Bézier curve 2次贝塞尔曲线
-# z: closepath
-#CMDS_LIST = 'zhvmltsqcaHVMLTSQCA'
-CMDS_LIST = 'zHVMLTSQCAhvmltsqca'
-CMD_MAPPING = {cmd: i for i, cmd in enumerate(CMDS_LIST)}
-FEATURE_DIM = 10
-############################### GENERAL UTILS #################################
-def grouper(iterable, batch_size, fill_value=None):
-    """Helper method for returning batches of size batch_size of a dataset."""
-    # grouper('ABCDEF', 3) -> 'ABC', 'DEF'
-    args = [iter(iterable)] * batch_size
-    return zip_longest(*args, fillvalue=fill_value)
-def _map_uni_to_alphanum(uni):
-    """Maps [0-9 A-Z a-z] to numbers 0-62."""
-    if 48 <= uni <= 57:
-        return uni - 48
-    elif 65 <= uni <= 90:
-        return uni - 65 + 10
-    return uni - 97 + 36
-def _map_uni_to_alpha(uni):
-    """Maps [A-Z a-z] to numbers 0-52."""
-    if 65 <= uni <= 90:
-        return uni - 65
-    return uni - 97 + 26
-############# UTILS FOR CONVERTING SFD/SPLINESETS TO SVG PATHS ################
-def _get_spline(sfd):
-    if 'SplineSet' not in sfd:
-        return ''
-    pro = sfd[sfd.index('SplineSet') + 10:]  # 10 is the 'SplineSet'
-    pro = pro[:pro.index('EndSplineSet')]
-    return pro
-def _spline_to_path_list(spline, height, replace_with_prev=False):
-    """Converts SplineSet to a list of tokenized commands in svg path."""
-    path = []
-    prev_xy = []
-    for line in spline.splitlines():
-        if not line:
-            continue
-        tokens = line.split(' ')
-        cmd = tokens[-2]
-        if cmd not in 'cml':
-            # COMMAND NOT RECOGNIZED.
-            return []
-            # assert cmd in 'cml', 'Command not recognized: {}'.format(cmd)
-        args = tokens[:-2]
-        args = [float(x) for x in args if x]
-        if replace_with_prev and cmd in 'c':
-            args[:2] = prev_xy
-        prev_xy = args[-2:]
-        new_y_args = []
-        for i, a in enumerate(args):
-            if i % 2 == 1:
-                new_y_args.append((height - a))
-            else:
-                new_y_args.append((a))
-        path.append([cmd.upper()] + new_y_args)
-    return path
-def _sfd_to_path_list(single, replace_with_prev=False):
-    """Converts the given SFD glyph into a path."""
-    return _spline_to_path_list(_get_spline(single['sfd']), single['vwidth'], replace_with_prev)
-#################### UTILS FOR PROCESSING TOKENIZED PATHS #####################
-def _add_missing_cmds(path, remove_zs=False):
-    """Adds missing cmd tags to the commands in the svg."""
-    # For instance, the command 'a' takes 7 arguments, but some SVGs declare:
-    #   a 1 2 3 4 5 6 7 8 9 10 11 12 13 14
-    # Which is 14 arguments. This function converts the above to the equivalent:
-    #   a 1 2 3 4 5 6 7  a 8 9 10 11 12 13 14
-    #
-    # Note: if remove_zs is True, this also removes any occurences of z commands.
-    new_path = []
-    for cmd in path:
-        if not remove_zs or cmd[0] not in 'Zz':
-            for new_cmd in add_missing_cmd(cmd):
-                new_path.append(new_cmd)
-    return new_path
-def add_missing_cmd(command_list):
-    """Adds missing cmd tags to the given command list."""
-    # E.g.: given:
-    #   ['a', '0', '0', '0', '0', '0', '0', '0',
-    #         '0', '0', '0', '0', '0', '0', '0']
-    # Converts to:
-    #   [['a', '0', '0', '0', '0', '0', '0', '0'],
-    #    ['a', '0', '0', '0', '0', '0', '0', '0']]
-    # And returns a string that joins these elements with spaces.
-    cmd_tag = command_list[0]
-    args = command_list[1:]
-    final_cmds = []
-    for arg_batch in grouper(args, NUM_ARGS[cmd_tag]):
-        final_cmds.append([cmd_tag] + list(arg_batch))
-    if not final_cmds:
-        # command has no args (e.g.: 'z')
-        final_cmds = [[cmd_tag]]
-    return final_cmds
-def _normalize_args(arglist, norm, add=None, flip=False):
-    """Normalize the given args with the given norm value."""
-    new_arglist = []
-    for i, arg in enumerate(arglist):
-        new_arg = float(arg)
-        if add is not None:
-            add_to_x, add_to_y = add
-            # This argument is an x-coordinate if even, y-coordinate if odd
-            # except when flip == True
-            if i % 2 == 0:
-                new_arg += add_to_y if flip else add_to_x
-            else:
-                new_arg += add_to_x if flip else add_to_y
-        new_arglist.append(str(24 * new_arg / norm))
-    return new_arglist
-def _normalize_based_on_viewbox(path, viewbox):
-    """Normalizes all args in a path to a standard 24x24 viewbox."""
-    # Each SVG lives in a 2D plane. The viewbox determines the region of that
-    # plane that gets rendered. For instance, some designers may work with a
-    # viewbox that's 24x24, others with one that's 100x100, etc.
-    # Suppose I design the the letter "h" in the Arial style using a 100x100
-    # viewbox (let's call it icon A). Let's suppose the icon has height 75. Then,
-    # I design the same character using a 20x20 viewbox (call this icon B), with
-    # height 15 (=75% of 20). This means that, when rendered, both icons with look
-    # exactly the same, but the scale of the commands each icon is using is
-    # different. For instance, if icon A has a command like "lineTo 100 100", the
-    # equivalent command in icon B will be "lineTo 20 20".
-    # In order to avoid this problem and bring all real values to the same scale,
-    # I scale all icons' commands to use a 24x24 viewbox. This function does this:
-    # it converts a path that exists in the given viewbox into a standard 24x24
-    # viewbox.
-    viewbox = viewbox.split(' ')
-    norm = max(int(viewbox[-1]), int(viewbox[-2]))
-    if int(viewbox[-1]) > int(viewbox[-2]):
-        add_to_y = 0
-        add_to_x = abs(int(viewbox[-1]) - int(viewbox[-2])) / 2
-    else:
-        add_to_y = abs(int(viewbox[-1]) - int(viewbox[-2])) / 2
-        add_to_x = 0
-    new_path = []
-    for command in path:
-        if command[0] == 'a':
-            new_path.append([command[0]] + _normalize_args(command[1:3], norm)
-                            + command[3:6] + _normalize_args(command[6:], norm))
-        elif command[0] == 'A':
-            new_path.append([command[0]] + _normalize_args(command[1:3], norm)
-                            + command[3:6] + _normalize_args(command[6:], norm, add=(add_to_x, add_to_y)))
-        elif command[0] == 'V':
-            new_path.append([command[0]] + _normalize_args(command[1:], norm, add=(add_to_x, add_to_y), flip=True))
-        elif command[0] == command[0].upper():
-            new_path.append([command[0]] + _normalize_args(command[1:], norm, add=(add_to_x, add_to_y)))
-        elif command[0] in 'zZ':
-            new_path.append([command[0]])
-        else:
-            new_path.append([command[0]] + _normalize_args(command[1:], norm))
-    return new_path
-def _convert_args(args, curr_pos, cmd):
-    """Converts given args to relative values."""
-    # NOTE: glyphs only use a very small subset of commands (L, C, M, and Z -- I
-    # believe). So I'm not handling A and H for now.
-    if cmd in 'AH':
-        raise NotImplementedError('These commands have >6 args (not supported).')
-    new_args = []
-    for i, arg in enumerate(args):
-        x_or_y = i % 2
-        if cmd == 'H':
-            x_or_y = (i + 1) % 2
-        new_args.append(str(float(arg) - curr_pos[x_or_y]))
-    return new_args
-def _update_curr_pos(curr_pos, cmd, start_of_path):
-    """Calculate the position of the pen after cmd is applied."""
-    if cmd[0] in 'ml':
-        curr_pos = [curr_pos[0] + float(cmd[1]), curr_pos[1] + float(cmd[2])]
-        if cmd[0] == 'm':
-            start_of_path = curr_pos
-    elif cmd[0] in 'z':
-        curr_pos = start_of_path
-    elif cmd[0] in 'h':
-        curr_pos = [curr_pos[0] + float(cmd[1]), curr_pos[1]]
-    elif cmd[0] in 'v':
-        curr_pos = [curr_pos[0], curr_pos[1] + float(cmd[1])]
-    elif cmd[0] in 'ctsqa':
-        curr_pos = [curr_pos[0] + float(cmd[-2]), curr_pos[1] + float(cmd[-1])]
-    return curr_pos, start_of_path
-def _make_relative(cmds):
-    """Convert commands in a path to relative positioning."""
-    curr_pos = (0.0, 0.0)
-    start_of_path = (0.0, 0.0)
-    new_cmds = []
-    for cmd in cmds:
-        if cmd[0].lower() == cmd[0]:
-            new_cmd = cmd
-        elif cmd[0].lower() == 'z':
-            new_cmd = [cmd[0].lower()]
-        else:
-            new_cmd = [cmd[0].lower()] + _convert_args(cmd[1:], curr_pos, cmd=cmd[0])
-        new_cmds.append(new_cmd)
-        curr_pos, start_of_path = _update_curr_pos(curr_pos, new_cmd, start_of_path)
-    return new_cmds
-def _is_to_left_of(pt1, pt2):
-    pt1_norm = (pt1[0]**2 + pt1[1]**2)
-    pt2_norm = (pt2[0]**2 + pt2[1]**2)
-    return pt1[1] < pt2[1] or (pt1_norm == pt2_norm and pt1[0] < pt2[0])
-def _get_leftmost_point(path):
-    """Returns the leftmost, topmost point of the path."""
-    leftmost = (float('inf'), float('inf'))
-    idx = -1
-    for i, cmd in enumerate(path):
-        if len(cmd) > 1:
-            endpoint = cmd[-2:]
-            if _is_to_left_of(endpoint, leftmost):
-                leftmost = endpoint
-                idx = i
-    return leftmost, idx
-def _separate_substructures(path):
-    """Returns a list of subpaths, each representing substructures the glyph."""
-    substructures = []
-    curr = []
-    for cmd in path:
-        if cmd[0] in 'mM' and curr:
-            substructures.append(curr)
-            curr = []
-        curr.append(cmd)
-    if curr:
-        substructures.append(curr)
-    return substructures
-def _is_clockwise(subpath):
-    """Returns whether the given subpath is clockwise-oriented."""
-    pts = [cmd[-2:] for cmd in subpath]
-    det = 0
-    for i in range(len(pts) - 1):
-        det += np.linalg.det(pts[i:i + 2])
-    return det > 0
-def _make_clockwise(subpath):
-    """Inverts the cardinality of the given subpath."""
-    new_path = [subpath[0]]
-    other_cmds = list(reversed(subpath[1:]))
-    for i, cmd in enumerate(other_cmds):
-        if i + 1 == len(other_cmds):
-            where_we_were = subpath[0][-2:]
-        else:
-            where_we_were = other_cmds[i + 1][-2:]
-        if len(cmd) > 3:
-            new_cmd = [cmd[0], cmd[3], cmd[4], cmd[1], cmd[2],
-                       where_we_were[0], where_we_were[1]]
-        else:
-            new_cmd = [cmd[0], where_we_were[0], where_we_were[1]]
-        new_path.append(new_cmd)
-    return new_path
-def _canonicalize(path):
-    """Makes all paths start at top left, and go clockwise first."""
-    # convert args to floats
-    #print(len(path),path)
-    path = [[x[0]] + list(map(float, x[1:])) for x in path]
-   # print(len(path),path)
-    # _canonicalize each subpath separately
-    #pdb.set_trace()
-    new_substructures = []
-    for subpath in _separate_substructures(path):
-      #  print(subpath,"\n")
-        leftmost_point, leftmost_idx = _get_leftmost_point(subpath)
-        reordered = ([['M', leftmost_point[0], leftmost_point[1]]] + subpath[leftmost_idx + 1:] + subpath[1:leftmost_idx + 1])
-        new_substructures.append((reordered, leftmost_point))
-   # sys.exit()
-    new_path = []
-    first_substructure_done = False
-    should_flip_cardinality = False
-    for sp, _ in sorted(new_substructures, key=lambda x: (x[1][1], x[1][0])):
-        if not first_substructure_done:
-            # we're looking at the first substructure now, we can determine whether we
-            # will flip the cardniality of the whole icon or not
-            should_flip_cardinality = not _is_clockwise(sp)
-            first_substructure_done = True
-        if should_flip_cardinality:
-            sp = _make_clockwise(sp)
-        new_path.extend(sp)
-    # convert args to strs
-    path = [[x[0]] + list(map(str, x[1:])) for x in new_path]
-    return path
-# ######### UTILS FOR CONVERTING TOKENIZED PATHS TO VECTORS ###########
-def _path_to_vector(path, categorical=False):
-    """Converts path's commands to a series of vectors."""
-    # Notes:
-    #   - The SimpleSVG dataset does not have any 't', 'q', 'Z', 'T', or 'Q'.
-    #     Thus, we don't handle those here.
-    #   - We also removed all 'z's.
-    #   - The x-axis-rotation argument to a commands is always 0 in this
-    #     dataset, so we ignore it
-    # Many commands have args that correspond to args in other commands.
-    #   v  __,__ _______________ ______________,_________ __,__ __,__ _,y
-    #   h  __,__ _______________ ______________,_________ __,__ __,__ x,_
-    #   z  __,__ _______________ ______________,_________ __,__ __,__ _,_
-    #   a  rx,ry x-axis-rotation large-arc-flag,sweepflag __,__ __,__ x,y
-    #   l  __,__ _______________ ______________,_________ __,__ __,__ x,y
-    #   c  __,__ _______________ ______________,_________ x1,y1 x2,y2 x,y
-    #   m  __,__ _______________ ______________,_________ __,__ __,__ x,y
-    #   s  __,__ _______________ ______________,_________ __,__ x2,y2 x,y
-    # So each command will be converted to a vector where the dimension is the
-    # minimal number of arguments to all commands:
-    #   [rx, ry, large-arc-flag, sweepflag, x1, y1, x2, y2, x, y]
-    # If a command does not output a certain arg, it is set to 0.
-    #   "l 5,5" becomes [0, 0, 0, 0, 0, 0, 0, 0, 5, 5]
-    # Also note, as of now we also output an extra dimension at index 0, which
-    # indicates which command is being outputted (integer).
-    new_path = []
-    for cmd in path:
-        new_path.append(_cmd_to_vector(cmd, categorical=categorical))
-    return new_path
-def _cmd_to_vector(cmd_list, categorical=False):
-    """Converts the given command (given as a list) into a vector.
-    UM_ARGS = {'v': 1, 'V': 1, 'h': 1, 'H': 1, 'a': 7, 'A': 7, 'l': 2, 'L': 2,
-            't': 2, 'T': 2, 'c': 6, 'C': 6, 'm': 2, 'M': 2, 's': 4, 'S': 4,
-            'q': 4, 'Q': 4, 'z': 0}
-    CMDS_LIST = 'zhvmltsqcaHVMLTSQCA'
-    CMD_MAPPING = {cmd: i for i, cmd in enumerate(CMDS_LIST)}
-    """
-    # For description of how this conversion happens, see
-    # _path_to_vector docstring.
-    cmd = cmd_list[0]
-    args = cmd_list[1:]
-    if not categorical:
-        # integer, for MSE
-        command = [float(CMD_MAPPING[cmd])]
-    else:
-        # one hot + 1 dim for EOS.
-        command = [0.0] * (len(CMDS_LIST) + 1) # 大概有19个commands?
-        command[CMD_MAPPING[cmd] + 1] = 1.0
-    arguments = [0.0] * 10
-    if cmd in 'hH':
-        arguments[8] = float(args[0])  # x
-    elif cmd in 'vV':
-        arguments[9] = float(args[0])  # y
-    elif cmd in 'mMlLtT':
-        arguments[8] = float(args[0])  # x
-        arguments[9] = float(args[1])  # y
-    elif cmd in 'sSqQ':
-        arguments[6] = float(args[0])  # x2
-        arguments[7] = float(args[1])  # y2
-        arguments[8] = float(args[2])  # x
-        arguments[9] = float(args[3])  # y
-    elif cmd in 'cC':
-        arguments[4] = float(args[0])  # x1
-        arguments[5] = float(args[1])  # y1
-        arguments[6] = float(args[2])  # x2
-        arguments[7] = float(args[3])  # y2
-        arguments[8] = float(args[4])  # x
-        arguments[9] = float(args[5])  # y
-    elif cmd in 'aA':
-        arguments[0] = float(args[0])  # rx
-        arguments[1] = float(args[1])  # ry
-        # we skip x-axis-rotation
-        arguments[2] = float(args[3])  # large-arc-flag
-        arguments[3] = float(args[4])  # sweep-flag
-        # a does not have x1, y1, x2, y2 args
-        arguments[8] = float(args[5])  # x
-        arguments[9] = float(args[6])  # y
-    return command + arguments
-################## UTILS FOR RENDERING PATH INTO IMAGE #################
-def _cubicbezier(x0, y0, x1, y1, x2, y2, x3, y3, n=40):
-    """Return n points along cubiz bezier with given control points."""
-    # from http://rosettacode.org/wiki/Bitmap/B%C3%A9zier_curves/Cubic
-    pts = []
-    for i in range(n + 1):
-        t = float(i) / float(n)
-        a = (1. - t)**3
-        b = 3. * t * (1. - t)**2
-        c = 3.0 * t**2 * (1.0 - t)
-        d = t**3
-        x = float(a * x0 + b * x1 + c * x2 + d * x3)
-        y = float(a * y0 + b * y1 + c * y2 + d * y3)
-        pts.append((x, y))
-    return list(zip(*pts))
-def _update_pos(curr_pos, end_pos, absolute):
-    if absolute:
-        return end_pos
-    return curr_pos[0] + end_pos[0], curr_pos[1] + end_pos[1]
-def constant_color(*unused_args):
-    return np.array([255, 255, 255])
-def _render_cubic(canvas, curr_pos, c_args, absolute, color):
-    """Renders a cubic bezier curve in the given canvas."""
-    if not absolute:
-        c_args[0] += curr_pos[0]
-        c_args[1] += curr_pos[1]
-        c_args[2] += curr_pos[0]
-        c_args[3] += curr_pos[1]
-        c_args[4] += curr_pos[0]
-        c_args[5] += curr_pos[1]
-    x, y = _cubicbezier(curr_pos[0], curr_pos[1],
-                        c_args[0], c_args[1],
-                        c_args[2], c_args[3],
-                        c_args[4], c_args[5])
-    max_possible = len(canvas)
-    x = [int(round(x_)) for x_ in x]
-    y = [int(round(y_)) for y_ in y]
-    def within_range(x):
-        return 0 <= x < max_possible
-    filtered = [(x_, y_) for x_, y_ in zip(x, y)
-                if within_range(x_) and within_range(y_)]
-    if not filtered:
-        return
-    x, y = list(zip(*filtered))
-    canvas[y, x, :] = color
-def _render_line(canvas, curr_pos, l_args, absolute, color):
-    """Renders a line in the given canvas."""
-    end_point = l_args
-    if not absolute:
-        end_point[0] += curr_pos[0]
-        end_point[1] += curr_pos[1]
-    rr, cc, val = draw.line_aa(int(curr_pos[0]), int(curr_pos[1]),
-                               int(end_point[0]), int(end_point[1]))
-    max_possible = len(canvas)
-    def within_range(x):
-        return 0 <= x < max_possible
-    filtered = [(x, y, v) for x, y, v in zip(rr, cc, val)
-                if within_range(x) and within_range(y)]
-    if not filtered:
-        return
-    rr, cc, val = list(zip(*filtered))
-    val = [(v * color) for v in val]
-    canvas[cc, rr, :] = val
-def _per_step_render(path, absolute=False, color=constant_color):
-    """Render the icon's edges, given its path."""
-    def to_canvas_size(l):
-        return [float(f) * (64. / 24.) for f in l]
-    canvas = np.zeros((64, 64, 3))
-    curr_pos = (0.0, 0.0)
-    for i, cmd in enumerate(path):
-        if not cmd:
-            continue
-        if cmd[0] in 'mM':
-            curr_pos = _update_pos(curr_pos, to_canvas_size(cmd[-2:]), absolute)
-        elif cmd[0] in 'cC':
-            _render_cubic(canvas, curr_pos, to_canvas_size(cmd[1:]), absolute, color(i, 55))
-            curr_pos = _update_pos(curr_pos, to_canvas_size(cmd[-2:]), absolute)
-        elif cmd[0] in 'lL':
-            _render_line(canvas, curr_pos, to_canvas_size(cmd[1:]), absolute, color(i, 55))
-            curr_pos = _update_pos(curr_pos, to_canvas_size(cmd[1:]), absolute)
-    return canvas
-def _zoom_out(path_list, add_baseline=0., per=22):
-    """Makes glyph slightly smaller in viewbox, makes some descenders visible."""
-    # assumes tensor is already unnormalized, and in long form
-    new_path = []
-    for command in path_list:
-        args = []
-        is_even = False
-        for arg in command[1:]:
-            if is_even:
-                args.append(str(float(arg) - ((24. - per) / 24.) * 64. / 4.))
-                is_even = False
-            else:
-                args.append(str(float(arg) - add_baseline))
-                is_even = True
-        new_path.append([command[0]] + args)
-    return new_path
-##################### UTILS FOR PROCESSING VECTORS ################
-def _append_eos(sample, categorical, feature_dim):
-    if not categorical:
-        eos = -1 * np.ones(feature_dim)
-    else:
-        eos = np.zeros(feature_dim)
-        eos[0] = 1.0
-    sample.append(eos)
-    return sample
-def _make_simple_cmds_long(out):
-    """Converts svg decoder output to format required by some render functions."""
-    # out has 10 dims
-    # the first 4 are respectively dims 0, 4, 5, 9 of the full 20-dim onehot vec
-    # the latter 6 are the 6 last dims of the 10-dim arg vec
-    shape_minus_dim = list(np.shape(out))[:-1]
-   # print("make?  ",shape_minus_dim ) # [51]
-    return np.concatenate([out[..., :1], # [51,1]  51个steps的第1维特征
-                           np.zeros(shape_minus_dim + [3]),# [51,3]
-                           out[..., 1:3], #[51,2]
-                           np.zeros(shape_minus_dim + [3]),# [51,3]
-                           out[..., 3:4],# [51,1]
-                           np.zeros(shape_minus_dim + [14]),# [51,14]
-                           out[..., 4:]], -1)# [51,6] # 最后的6个绘制参数
-def render(tensor, data_dir=None):
-    """Converts SVG decoder output into HTML svg."""
-    # undo normalization
-    # mean_npz, stdev_npz = get_means_stdevs(data_dir)
-    # tensor = (tensor * stdev_npz) + mean_npz
-    # convert to html
-    #print("before",tensor.shape)# 51, 10)
-    tensor = _make_simple_cmds_long(tensor)
-   # print("after",tensor.shape)#(51, 30)
-    # vector = np.squeeze(np.squeeze(tensor, 0), 2)
-   # print("1",tensor[0,:5])# (51, 30)
-    html = _vector_to_svg(tensor, stop_at_eos=True, categorical=True)
-   # print(html.shape)
-    # some aesthetic postprocessing
-    html = postprocess(html)
-    html = html.replace('256px', '50px')
-    return html
-################# UTILS FOR CONVERTING VECTORS TO SVGS ########################
-#note: transform the decoded trg_seq into the common svg format.把decode出来的seq转成html的svg，命令有前后关系，也都是相对位置。
-def _vector_to_svg(vectors, stop_at_eos=False, categorical=False):
-    """Tranforms a given vector to an svg string.
-    """
-    new_path = []
-    for vector in vectors:
-        if stop_at_eos:
-            if categorical:
-                try:
-                    is_eos = np.argmax(vector[:len(CMDS_LIST) + 1]) == 0
-                except Exception:
-                    raise Exception(vector)
-            else:
-                is_eos = vector[0] < -0.5
-            if is_eos:
-                break
-        new_path.append(' '.join(_vector_to_cmd(vector, categorical=categorical))) #
-    new_path = ' '.join(new_path) # 加入new_path，每个path都以空格分隔
-    return SVG_PREFIX_BIG + PATH_PREFIX_1 + new_path + PATH_POSFIX_1 + SVG_POSFIX
-def _vector_to_path(vectors):
-    new_path = []
-    for vector in vectors:
-        #print(vector,"???")
-        new_path.append(_vector_to_cmd(vector,categorical=True)) #
-        #print(_vector_to_cmd(vector),"hhh")
-   # new_path = ' '.join(new_path) # 加入new_path，每个path都以��格分隔
-    return new_path
-def _vector_to_cmd(vector, categorical=False, return_floats=False):
-    """Does the inverse transformation as _cmd_to_vector().
-        UM_ARGS = {'v': 1, 'V': 1, 'h': 1, 'H': 1, 'a': 7, 'A': 7, 'l': 2, 'L': 2,
-            't': 2, 'T': 2, 'c': 6, 'C': 6, 'm': 2, 'M': 2, 's': 4, 'S': 4,
-            'q': 4, 'Q': 4, 'z': 0}
-    CMDS_LIST = 'zhvmltsqcaHVMLTSQCA'
-    CMD_MAPPING = {cmd: i for i, cmd in enumerate(CMDS_LIST)}
-    """
-    cast_fn = float if return_floats else str
-    if categorical:
-       # print(vector.shape,vector)# 30
-        #print("??",len(CMDS_LIST)) # 19
-        command = vector[:len(CMDS_LIST) + 1],# 前20维
-        arguments = vector[len(CMDS_LIST) + 1:]# 后10维
-        cmd_idx = np.argmax(command) - 1 # 看当前绘制命令属于哪一类
-    else:
-        command, arguments = vector[:1], vector[1:]
-        cmd_idx = int(round(command[0]))
-    if cmd_idx < -0.5:
-        # EOS
-        return []
-    if cmd_idx >= len(CMDS_LIST):
-        cmd_idx = len(CMDS_LIST) - 1
-    cmd = CMDS_LIST[cmd_idx]
-    cmd = cmd.upper()
-    cmd_list = [cmd]
-    if cmd in 'hH': # 如果是画线，而且是x轴
-        cmd_list.append(cast_fn(arguments[8]))  # x
-    elif cmd in 'vV': # 如果是画线，而且是y轴
-        cmd_list.append(cast_fn(arguments[9]))  # y
-    elif cmd in 'mMlLtT':
-        cmd_list.append(cast_fn(arguments[8]))  # x
-        cmd_list.append(cast_fn(arguments[9]))  # y
-    elif cmd in 'sSqQ':
-        cmd_list.append(cast_fn(arguments[6]))  # x2
-        cmd_list.append(cast_fn(arguments[7]))  # y2
-        cmd_list.append(cast_fn(arguments[8]))  # x
-        cmd_list.append(cast_fn(arguments[9]))  # y
-    elif cmd in 'cC':
-        cmd_list.append(cast_fn(arguments[4]))  # x1
-        cmd_list.append(cast_fn(arguments[5]))  # y1
-        cmd_list.append(cast_fn(arguments[6]))  # x2
-        cmd_list.append(cast_fn(arguments[7]))  # y2
-        cmd_list.append(cast_fn(arguments[8]))  # x
-        cmd_list.append(cast_fn(arguments[9]))  # y
-    elif cmd in 'aA':
-        cmd_list.append(cast_fn(arguments[0]))  # rx
-        cmd_list.append(cast_fn(arguments[1]))  # ry
-        # x-axis-rotation is always 0
-        cmd_list.append(cast_fn('0'))
-        # the following two flags are binary.
-        cmd_list.append(cast_fn(1 if arguments[2] > 0.5 else 0))  # large-arc-flag
-        cmd_list.append(cast_fn(1 if arguments[3] > 0.5 else 0))  # sweep-flag
-        cmd_list.append(cast_fn(arguments[8]))  # x
-        cmd_list.append(cast_fn(arguments[9]))  # y
-    return cmd_list
-############## UTILS FOR CONVERTING SVGS/VECTORS TO IMAGES ###################
-# From Infer notebook
-start = ("""<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www."""
-         """w3.org/1999/xlink" width="256px" height="256px" style="-ms-trans"""
-         """form: rotate(360deg); -webkit-transform: rotate(360deg); transfo"""
-         """rm: rotate(360deg);" preserveAspectRatio="xMidYMid meet" viewBox"""
-         """="0 0 24 24"><path d=\"""")
-end = """\" fill="currentColor"/></svg>"""
-COMMAND_RX = re.compile("([MmLlHhVvCcSsQqTtAaZz])")
-FLOAT_RX = re.compile("[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?")  # noqa
-def svg_html_to_path_string(svg):
-    return svg.replace(start, '').replace(end, '')
-def _tokenize(pathdef):
-    """Returns each svg token from path list."""
-    # e.g.: 'm0.1-.5c0,6' -> m', '0.1, '-.5', 'c', '0', '6'
-    for x in COMMAND_RX.split(pathdef):
-        if x != '' and x in 'MmLlHhVvCcSsQqTtAaZz':
-            yield x
-        for token in FLOAT_RX.findall(x):
-            yield token
-def path_string_to_tokenized_commands(path):
-    """Tokenizes the given path string.
-    E.g.:
-        Given M 0.5 0.5 l 0.25 0.25 z
-        Returns [['M', '0.5', '0.5'], ['l', '0.25', '0.25'], ['z']]
-    """
-    new_path = []
-    current_cmd = []
-    for token in _tokenize(path):
-        if len(current_cmd) > 0:
-            if token in 'MmLlHhVvCcSsQqTtAaZz':
-                # cmd ended, convert to vector and add to new_path
-                new_path.append(current_cmd)
-                current_cmd = [token]
-            else:
-                # add arg to command
-                current_cmd.append(token)
-        else:
-            # add to start new cmd
-            current_cmd.append(token)
-    if current_cmd:
-        # process command still unprocessed
-        new_path.append(current_cmd)
-    return new_path
-def separate_substructures(tokenized_commands):
-  """Returns a list of SVG substructures."""
-  # every moveTo command starts a new substructure
-  # an SVG substructure is a subpath that closes on itself
-  # such as the outter and the inner edge of the character `o`
-  substructures = []
-  curr = []
-  for cmd in tokenized_commands:
-    if cmd[0] in 'mM' and len(curr) > 0:
-      substructures.append(curr)
-      curr = []
-    curr.append(cmd)
-  if len(curr) > 0:
-    substructures.append(curr)
-  return substructures
-def postprocess(svg, dist_thresh=2., skip=False):
-    path = svg_html_to_path_string(svg)
-    #print(svg)
-    svg_template = svg.replace(path, '{}')
-    tokenized_commands = path_string_to_tokenized_commands(path)
-    def dist(a, b):
-        return np.sqrt((float(a[0]) - float(b[0]))**2 + (float(a[1]) - float(b[1]))**2)
-    def are_close_together(a, b, t):
-        return dist(a, b) < t
-    # first, go through each start/end point and merge if they're close enough
-    # together (that is, make end point the same as the start point).
-    # TODO: there are better ways of doing this, in a way that propagates errors.
-    # back (so if total error is 0.2, go through all N commands in this
-    # substructure and fix each by 0.2/N (unless they have 0 vertical change))
-    # NOTE: this is the same.
-    substructures = separate_substructures(tokenized_commands)
-   # print(len(substructures))# 7578
-    previous_substructure_endpoint = (0., 0.,)
-    for substructure in substructures:
-        # first, if the last substructure's endpoint was updated, we must update
-        # the start point of this one to reflect the opposite update
-        substructure[0][-2] = str(float(substructure[0][-2]) -
-                                  previous_substructure_endpoint[0])
-        substructure[0][-1] = str(float(substructure[0][-1]) -
-                                  previous_substructure_endpoint[1])
-        start = list(map(float, substructure[0][-2:]))
-        curr_pos = (0., 0.)
-        for cmd in substructure:
-            curr_pos, _ = _update_curr_pos(curr_pos, cmd, (0., 0.))
-        if are_close_together(start, curr_pos, dist_thresh):
-            new_point = np.array(start)
-            previous_substructure_endpoint = ((new_point[0] - curr_pos[0]),
-                                              (new_point[1] - curr_pos[1]))
-            substructure[-1][-2] = str(float(substructure[-1][-2]) +
-                                       (new_point[0] - curr_pos[0]))
-            substructure[-1][-1] = str(float(substructure[-1][-1]) +
-                                       (new_point[1] - curr_pos[1]))
-            if substructure[-1][0] in 'cC':
-                substructure[-1][-4] = str(float(substructure[-1][-4]) +
-                                           (new_point[0] - curr_pos[0]))
-                substructure[-1][-3] = str(float(substructure[-1][-3]) +
-                                           (new_point[1] - curr_pos[1]))
-    if skip:
-        return svg_template.format(' '.join([' '.join(' '.join(cmd) for cmd in s)
-                                             for s in substructures]))
-    def cosa(x, y):
-        return (x[0] * y[0] + x[1] * y[1]) / ((np.sqrt(x[0]**2 + x[1]**2) * np.sqrt(y[0]**2 + y[1]**2)))
-    def rotate(a, x, y):
-        return (x * np.cos(a) - y * np.sin(a), y * np.cos(a) + x * np.sin(a))
-    # second, gotta find adjacent bezier curves and, if their control points
-    # are well enough aligned, fully align them
-    for substructure in substructures:
-        curr_pos = (0., 0.)
-        new_curr_pos, _ = _update_curr_pos((0., 0.,), substructure[0], (0., 0.))
-        for cmd_idx in range(1, len(substructure)):
-            prev_cmd = substructure[cmd_idx-1]
-            cmd = substructure[cmd_idx]
-            new_new_curr_pos, _ = _update_curr_pos(new_curr_pos, cmd, (0., 0.))
-            if cmd[0] == 'c':
-                if prev_cmd[0] == 'c':
-                    # check the vectors and update if needed
-                    # previous control pt wrt new curr point
-                    prev_ctr_point = (curr_pos[0] + float(prev_cmd[3]) - new_curr_pos[0],
-                                      curr_pos[1] + float(prev_cmd[4]) - new_curr_pos[1])
-                    ctr_point = (float(cmd[1]), float(cmd[2]))
-                    if -1. < cosa(prev_ctr_point, ctr_point) < -0.95:
-                        # calculate exact angle between the two vectors
-                        angle_diff = (np.pi - np.arccos(cosa(prev_ctr_point, ctr_point)))/2
-                        # rotate each vector by angle/2 in the correct direction for each.
-                        sign = np.sign(np.cross(prev_ctr_point, ctr_point))
-                        new_ctr_point = rotate(sign * angle_diff, *ctr_point)
-                        new_prev_ctr_point = rotate(-sign * angle_diff, *prev_ctr_point)
-                        # override the previous control points
-                        # (which has to be wrt previous curr position)
-                        substructure[cmd_idx-1][3] = str(new_prev_ctr_point[0] -
-                                                         curr_pos[0] + new_curr_pos[0])
-                        substructure[cmd_idx-1][4] = str(new_prev_ctr_point[1] -
-                                                         curr_pos[1] + new_curr_pos[1])
-                        substructure[cmd_idx][1] = str(new_ctr_point[0])
-                        substructure[cmd_idx][2] = str(new_ctr_point[1])
-            curr_pos = new_curr_pos
-            new_curr_pos = new_new_curr_pos
-   # print('0',substructures)
-    return svg_template.format(' '.join([' '.join(' '.join(cmd) for cmd in s)
-                                         for s in substructures]))
-# def get_means_stdevs(data_dir):
-#     """Returns the means and stdev saved in data_dir."""
-#     if data_dir not in means_stdevs:
-#         with tf.gfile.Open(os.path.join(data_dir, 'mean.npz'), 'r') as f:
-#             mean_npz = np.load(f)
-#         with tf.gfile.Open(os.path.join(data_dir, 'stdev.npz'), 'r') as f:
-#             stdev_npz = np.load(f)
-#         means_stdevs[data_dir] = (mean_npz, stdev_npz)
-#     return means_stdevs[data_dir]
-###############
-def convert_to_svg(decoder_output, categorical=False):
-    converted = []
-    for example in decoder_output:
-        converted.append(_vector_to_svg(example, True, categorical=categorical))
-    return np.array(converted)
-def create_image_conversion_fn(max_outputs, categorical=False):
-    """Binds the number of outputs to the image conversion fn (to svg or png)."""
-    def convert_to_svg(decoder_output):
-        converted = []
-        for example in decoder_output:
-            if len(converted) == max_outputs:
-                break
-            converted.append(_vector_to_svg(example, True, categorical=categorical))
-        return np.array(converted)
-    return convert_to_svg
-################### UTILS FOR CREATING TF SUMMARIES ##########################
-def _make_encoded_image(img_tensor):
-    pil_img = Image.fromarray(np.squeeze(img_tensor * 255).astype(np.uint8), mode='L')
-    buff = io.BytesIO()
-    pil_img.save(buff, format='png')
-    encoded_image = buff.getvalue()
-    return encoded_image
-################### CHECK GLYPH/PATH VALID ##############################################
-def is_valid_glyph(g):
-    is_09 = 48 <= g['uni'] <= 57
-    is_capital_az = 65 <= g['uni'] <= 90
-    is_az = 97 <= g['uni'] <= 122
-    is_valid_dims = g['width'] != 0 and g['vwidth'] != 0
-    return (is_09 or is_capital_az or is_az) and is_valid_dims
-def is_valid_path(pathunibfp):
-   # print(len(pathunibfp[0]))
-    if len(pathunibfp[0])>70:
-        print("!!!more than 400",len(pathunibfp[0]))
-       # sys.exit()
-    return pathunibfp[0] and len(pathunibfp[0]) <= 70,len(pathunibfp[0])
-################### DATASET PROCESSING #######################################
-def convert_to_path(g):
-    """Converts SplineSet in SFD font to str path."""
-    path = _sfd_to_path_list(g)
-    path = _add_missing_cmds(path, remove_zs=False)
-    path = _normalize_based_on_viewbox(path, '0 0 {} {}'.format(g['width'], g['vwidth']))
-    return path, g['uni'], g['binary_fp']
-def convert_simple_vector_to_path(seq):
-    path=[]
-    for i in range(seq.shape[0]):
-       # seq_i = seq[i]
-        path_i=[]
-        cmd  = np.argmax(seq[i][:4])
-       # args = seq[i][4:]
-        p0 = seq[i][4:6]
-        p1 = seq[i][6:8]
-        p2 = seq[i][8:10]
-        if cmd == 0:
-            break
-        elif cmd==1:
-            path_i.append('M')
-            path_i.append(str(p2[0]))
-            path_i.append(str(p2[1]))
-        elif cmd==2:
-            path_i.append('L')
-            path_i.append(str(p2[0]))
-            path_i.append(str(p2[1]))
-        elif cmd==3:
-            path_i.append('C')
-            path_i.append(str(p0[0]))
-            path_i.append(str(p0[1]))
-            path_i.append(str(p1[0]))
-            path_i.append(str(p1[1]))
-            path_i.append(str(p2[0]))
-            path_i.append(str(p2[1]))
-        else:
-            print("wrong!!! to path")
-            sys.exit()
-        path.append(path_i)
-    return path
-   # print("jjj")
-def clockwise(seq):
-    #pdb.set_trace()
-    path=convert_simple_vector_to_path(seq)
-    path = _canonicalize(path)
-    final = {}
-    final['rendered'] = _per_step_render(path, absolute=True)
-    vector = _path_to_vector(path, categorical=True)
-    vector = np.array(vector)
-  #  print(vector.shape,vector[:,9])# note vector: 12,30
-    vector = np.concatenate([np.take(vector, [0, 4, 5, 9], axis=-1), vector[..., -6:]], axis=-1)
-    final['seq_len'] = np.shape(vector)[0]
-    vector = _append_eos(vector.tolist(), True, 10)
-    final['sequence'] = np.concatenate((vector, np.zeros(((70 - final['seq_len']), 10))), 0)
-    final['rendered'] = np.reshape(final['rendered'][..., 0], [64 * 64]).astype(np.float32).tolist()
-    return final
-def create_example(pathunibfp):
-    """Bulk of dataset processing. Converts str path to np array"""
-    path, uni, binary_fp = pathunibfp
-    final = {}
-    # zoom out
-    path = _zoom_out(path)
-    # make clockwise
-    path = _canonicalize(path)
-    # render path for training
-    final['rendered'] = _per_step_render(path, absolute=True)
-    # make path relative
-    #path = _make_relative(path) # note 不rela 直接是绝对的
-    # convert to vector
-    vector = _path_to_vector(path, categorical=True)
-   # path2 = _vector_to_path(vector)# note vector转成path
-    #print(path2)
-    #print(path==path2)
-    vector = np.array(vector)
-  #  print(vector.shape,vector[:,9])# note vector: 12,30
-    vector = np.concatenate([np.take(vector, [0, 4, 5, 9], axis=-1), vector[..., -6:]], axis=-1)
-    #path2 = _vector_to_path(vector)
-    #print(path,"\nhhh",path2)
-    # print("hhh",vector)
-    # print(render(vector))
-    # sys.exit()
-    # count some stats
-    final['seq_len'] = np.shape(vector)[0]
-    # final['class'] = int(_map_uni_to_alphanum(uni))
-    final['class'] = int(_map_uni_to_alpha(uni))
-    final['binary_fp'] = str(binary_fp)
-    # append eos
-    vector = _append_eos(vector.tolist(), True, 10)
-    # pad path to 51 (with eos)
-#    pdb.set_trace()
-    # if final['seq_len']>50:
-   # print( final['seq_len'])
-    final['sequence'] = np.concatenate((vector, np.zeros(((70 - final['seq_len']), 10))), 0)
-    #seq = final['sequence']
-    # new_path = convert_simple_vector_to_path(seq)
-    # print(new_path,path2==new_path)
-   # sys.exit()
-    # make pure list:
-    # use last channel only
-    final['rendered'] = np.reshape(final['rendered'][..., 0], [64 * 64]).astype(np.float32).tolist()
-    final['sequence'] = np.reshape(final['sequence'], [71 * 10]).astype(np.float32).tolist()
-    final['class'] = np.reshape(final['class'], [1]).astype(np.int64).tolist()
-    final['seq_len'] = np.reshape(final['seq_len'], [1]).astype(np.int64).tolist()
-    return final
-def mean_to_example(mean_stdev):
-    """Converts the found mean and stdev to example."""
-    # mean_stdev is a dict
-    mean_stdev['mean'] = np.reshape(mean_stdev['mean'], [10]).astype(np.float32).tolist()
-    mean_stdev['variance'] = np.reshape(mean_stdev['variance'], [10]).astype(np.float32).tolist()
-    mean_stdev['stddev'] = np.reshape(mean_stdev['stddev'], [10]).astype(np.float32).tolist()
-    mean_stdev['count'] = np.reshape(mean_stdev['count'], [1]).astype(np.int64).tolist()
-    return mean_stdev
-################### CHECK VALID ##############################################
-class MeanStddev:
-    """Accumulator to compute the mean/stdev of svg commands."""
-    def create_accumulator(self):
-        curr_sum = np.zeros([10])
-        sum_sq = np.zeros([10])
-        return (curr_sum, sum_sq, 0)  # x, x^2, count
-    def add_input(self, sum_count, new_input):
-        (curr_sum, sum_sq, count) = sum_count
-        # new_input is a dict with keys = ['seq_len', 'sequence']
-        new_seq_len = new_input['seq_len'][0]  # Line #754 'seq_len' is a list of one int
-        assert isinstance(new_seq_len, int), print(type(new_seq_len))
-        # remove padding and eos from sequence
-        assert isinstance(new_input['sequence'], list), print(type(new_input['sequence']))
-        new_input_np = np.reshape(np.array(new_input['sequence']), [-1, 10])
-        assert isinstance(new_input_np, np.ndarray), print(type())
-        assert new_input_np.shape[0] >= new_seq_len
-        new_input_np = new_input_np[:new_seq_len, :]
-        # accumulate new_sum and new_sum_sq
-        new_sum = np.sum([curr_sum, np.sum(new_input_np, axis=0)], axis=0)
-        new_sum_sq = np.sum([sum_sq, np.sum(np.power(new_input_np, 2), axis=0)],
-                            axis=0)
-        return new_sum, new_sum_sq, count + new_seq_len
-    def merge_accumulators(self, accumulators):
-        curr_sums, sum_sqs, counts = list(zip(*accumulators))
-        return np.sum(curr_sums, axis=0), np.sum(sum_sqs, axis=0), np.sum(counts)
-    def extract_output(self, sum_count):
-        (curr_sum, curr_sum_sq, count) = sum_count
-        if count:
-            mean = np.divide(curr_sum, count)
-            variance = np.divide(curr_sum_sq, count) - np.power(mean, 2)
-            # -ve value could happen due to rounding
-            variance = np.max([variance, np.zeros(np.shape(variance))], axis=0)
-            stddev = np.sqrt(variance)
-            return {
-                'mean': mean,
-                'variance': variance,
-                'stddev': stddev,
-                'count': count
-            }
-        else:
-            return {
-                'mean': float('NaN'),
-                'variance': float('NaN'),
-                'stddev': float('NaN'),
-                'count': 0
-            }

+# Copyright 2020 The Magenta Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pdb
+# Lint as: python3
+"""Defines the Material Design Icons Problem."""
+import io
+import numpy as np
+import re
+from PIL import Image
+from itertools import zip_longest
+from skimage import draw
+import sys
+SVG_PREFIX_BIG = ('<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="'
+                  'http://www.w3.org/1999/xlink" width="256px" height="256px"'
+                  ' style="-ms-transform: rotate(360deg); -webkit-transform:'
+                  ' rotate(360deg); transform: rotate(360deg);" '
+                  'preserveAspectRatio="xMidYMid meet" viewBox="0 0 24 24">')
+PATH_PREFIX_1 = '<path d="'
+PATH_POSFIX_1 = '" fill="currentColor"/>'
+SVG_POSFIX = '</svg>'
+NUM_ARGS = {'v': 1, 'V': 1, 'h': 1, 'H': 1, 'a': 7, 'A': 7, 'l': 2, 'L': 2,
+            't': 2, 'T': 2, 'c': 6, 'C': 6, 'm': 2, 'M': 2, 's': 4, 'S': 4,
+            'q': 4, 'Q': 4, 'z': 0}
+# in order of arg complexity, with absolutes clustered
+# recall we don't handle all commands (see docstring)
+#note  args:
+# v, h: vertical horizental lines
+# a: elliptical Arc 椭圆
+# l:  lineto
+# t: smooth quadratic Bézier curveto 2次贝塞尔曲线
+# c: curveto
+# m: moveto
+# s: smooth curveto
+# Q: quadratic Bézier curve 2次贝塞尔曲线
+# z: closepath
+#CMDS_LIST = 'zhvmltsqcaHVMLTSQCA'
+CMDS_LIST = 'zHVMLTSQCAhvmltsqca'
+CMD_MAPPING = {cmd: i for i, cmd in enumerate(CMDS_LIST)}
+FEATURE_DIM = 10
+############################### GENERAL UTILS #################################
+def grouper(iterable, batch_size, fill_value=None):
+    """Helper method for returning batches of size batch_size of a dataset."""
+    # grouper('ABCDEF', 3) -> 'ABC', 'DEF'
+    args = [iter(iterable)] * batch_size
+    return zip_longest(*args, fillvalue=fill_value)
+def _map_uni_to_alphanum(uni):
+    """Maps [0-9 A-Z a-z] to numbers 0-62."""
+    if 48 <= uni <= 57:
+        return uni - 48
+    elif 65 <= uni <= 90:
+        return uni - 65 + 10
+    return uni - 97 + 36
+def _map_uni_to_alpha(uni):
+    """Maps [A-Z a-z] to numbers 0-52."""
+    if 65 <= uni <= 90:
+        return uni - 65
+    return uni - 97 + 26
+############# UTILS FOR CONVERTING SFD/SPLINESETS TO SVG PATHS ################
+def _get_spline(sfd):
+    if 'SplineSet' not in sfd:
+        return ''
+    pro = sfd[sfd.index('SplineSet') + 10:]  # 10 is the 'SplineSet'
+    pro = pro[:pro.index('EndSplineSet')]
+    return pro
+def _spline_to_path_list(spline, height, replace_with_prev=False):
+    """Converts SplineSet to a list of tokenized commands in svg path."""
+    path = []
+    prev_xy = []
+    for line in spline.splitlines():
+        if not line:
+            continue
+        tokens = line.split(' ')
+        cmd = tokens[-2]
+        if cmd not in 'cml':
+            # COMMAND NOT RECOGNIZED.
+            return []
+            # assert cmd in 'cml', 'Command not recognized: {}'.format(cmd)
+        args = tokens[:-2]
+        args = [float(x) for x in args if x]
+        if replace_with_prev and cmd in 'c':
+            args[:2] = prev_xy
+        prev_xy = args[-2:]
+        new_y_args = []
+        for i, a in enumerate(args):
+            if i % 2 == 1:
+                new_y_args.append((height - a))
+            else:
+                new_y_args.append((a))
+        path.append([cmd.upper()] + new_y_args)
+    return path
+def _sfd_to_path_list(single, replace_with_prev=False):
+    """Converts the given SFD glyph into a path."""
+    return _spline_to_path_list(_get_spline(single['sfd']), single['vwidth'], replace_with_prev)
+#################### UTILS FOR PROCESSING TOKENIZED PATHS #####################
+def _add_missing_cmds(path, remove_zs=False):
+    """Adds missing cmd tags to the commands in the svg."""
+    # For instance, the command 'a' takes 7 arguments, but some SVGs declare:
+    #   a 1 2 3 4 5 6 7 8 9 10 11 12 13 14
+    # Which is 14 arguments. This function converts the above to the equivalent:
+    #   a 1 2 3 4 5 6 7  a 8 9 10 11 12 13 14
+    #
+    # Note: if remove_zs is True, this also removes any occurences of z commands.
+    new_path = []
+    for cmd in path:
+        if not remove_zs or cmd[0] not in 'Zz':
+            for new_cmd in add_missing_cmd(cmd):
+                new_path.append(new_cmd)
+    return new_path
+def add_missing_cmd(command_list):
+    """Adds missing cmd tags to the given command list."""
+    # E.g.: given:
+    #   ['a', '0', '0', '0', '0', '0', '0', '0',
+    #         '0', '0', '0', '0', '0', '0', '0']
+    # Converts to:
+    #   [['a', '0', '0', '0', '0', '0', '0', '0'],
+    #    ['a', '0', '0', '0', '0', '0', '0', '0']]
+    # And returns a string that joins these elements with spaces.
+    cmd_tag = command_list[0]
+    args = command_list[1:]
+    final_cmds = []
+    for arg_batch in grouper(args, NUM_ARGS[cmd_tag]):
+        final_cmds.append([cmd_tag] + list(arg_batch))
+    if not final_cmds:
+        # command has no args (e.g.: 'z')
+        final_cmds = [[cmd_tag]]
+    return final_cmds
+def _normalize_args(arglist, norm, add=None, flip=False):
+    """Normalize the given args with the given norm value."""
+    new_arglist = []
+    for i, arg in enumerate(arglist):
+        new_arg = float(arg)
+        if add is not None:
+            add_to_x, add_to_y = add
+            # This argument is an x-coordinate if even, y-coordinate if odd
+            # except when flip == True
+            if i % 2 == 0:
+                new_arg += add_to_y if flip else add_to_x
+            else:
+                new_arg += add_to_x if flip else add_to_y
+        new_arglist.append(str(24 * new_arg / norm))
+    return new_arglist
+def _normalize_based_on_viewbox(path, viewbox):
+    """Normalizes all args in a path to a standard 24x24 viewbox."""
+    # Each SVG lives in a 2D plane. The viewbox determines the region of that
+    # plane that gets rendered. For instance, some designers may work with a
+    # viewbox that's 24x24, others with one that's 100x100, etc.
+    # Suppose I design the the letter "h" in the Arial style using a 100x100
+    # viewbox (let's call it icon A). Let's suppose the icon has height 75. Then,
+    # I design the same character using a 20x20 viewbox (call this icon B), with
+    # height 15 (=75% of 20). This means that, when rendered, both icons with look
+    # exactly the same, but the scale of the commands each icon is using is
+    # different. For instance, if icon A has a command like "lineTo 100 100", the
+    # equivalent command in icon B will be "lineTo 20 20".
+    # In order to avoid this problem and bring all real values to the same scale,
+    # I scale all icons' commands to use a 24x24 viewbox. This function does this:
+    # it converts a path that exists in the given viewbox into a standard 24x24
+    # viewbox.
+    viewbox = viewbox.split(' ')
+    norm = max(int(viewbox[-1]), int(viewbox[-2]))
+    if int(viewbox[-1]) > int(viewbox[-2]):
+        add_to_y = 0
+        add_to_x = abs(int(viewbox[-1]) - int(viewbox[-2])) / 2
+    else:
+        add_to_y = abs(int(viewbox[-1]) - int(viewbox[-2])) / 2
+        add_to_x = 0
+    new_path = []
+    for command in path:
+        if command[0] == 'a':
+            new_path.append([command[0]] + _normalize_args(command[1:3], norm)
+                            + command[3:6] + _normalize_args(command[6:], norm))
+        elif command[0] == 'A':
+            new_path.append([command[0]] + _normalize_args(command[1:3], norm)
+                            + command[3:6] + _normalize_args(command[6:], norm, add=(add_to_x, add_to_y)))
+        elif command[0] == 'V':
+            new_path.append([command[0]] + _normalize_args(command[1:], norm, add=(add_to_x, add_to_y), flip=True))
+        elif command[0] == command[0].upper():
+            new_path.append([command[0]] + _normalize_args(command[1:], norm, add=(add_to_x, add_to_y)))
+        elif command[0] in 'zZ':
+            new_path.append([command[0]])
+        else:
+            new_path.append([command[0]] + _normalize_args(command[1:], norm))
+    return new_path
+def _convert_args(args, curr_pos, cmd):
+    """Converts given args to relative values."""
+    # NOTE: glyphs only use a very small subset of commands (L, C, M, and Z -- I
+    # believe). So I'm not handling A and H for now.
+    if cmd in 'AH':
+        raise NotImplementedError('These commands have >6 args (not supported).')
+    new_args = []
+    for i, arg in enumerate(args):
+        x_or_y = i % 2
+        if cmd == 'H':
+            x_or_y = (i + 1) % 2
+        new_args.append(str(float(arg) - curr_pos[x_or_y]))
+    return new_args
+def _update_curr_pos(curr_pos, cmd, start_of_path):
+    """Calculate the position of the pen after cmd is applied."""
+    if cmd[0] in 'ml':
+        curr_pos = [curr_pos[0] + float(cmd[1]), curr_pos[1] + float(cmd[2])]
+        if cmd[0] == 'm':
+            start_of_path = curr_pos
+    elif cmd[0] in 'z':
+        curr_pos = start_of_path
+    elif cmd[0] in 'h':
+        curr_pos = [curr_pos[0] + float(cmd[1]), curr_pos[1]]
+    elif cmd[0] in 'v':
+        curr_pos = [curr_pos[0], curr_pos[1] + float(cmd[1])]
+    elif cmd[0] in 'ctsqa':
+        curr_pos = [curr_pos[0] + float(cmd[-2]), curr_pos[1] + float(cmd[-1])]
+    return curr_pos, start_of_path
+def _make_relative(cmds):
+    """Convert commands in a path to relative positioning."""
+    curr_pos = (0.0, 0.0)
+    start_of_path = (0.0, 0.0)
+    new_cmds = []
+    for cmd in cmds:
+        if cmd[0].lower() == cmd[0]:
+            new_cmd = cmd
+        elif cmd[0].lower() == 'z':
+            new_cmd = [cmd[0].lower()]
+        else:
+            new_cmd = [cmd[0].lower()] + _convert_args(cmd[1:], curr_pos, cmd=cmd[0])
+        new_cmds.append(new_cmd)
+        curr_pos, start_of_path = _update_curr_pos(curr_pos, new_cmd, start_of_path)
+    return new_cmds
+def _is_to_left_of(pt1, pt2):
+    pt1_norm = (pt1[0]**2 + pt1[1]**2)
+    pt2_norm = (pt2[0]**2 + pt2[1]**2)
+    return pt1[1] < pt2[1] or (pt1_norm == pt2_norm and pt1[0] < pt2[0])
+def _get_leftmost_point(path):
+    """Returns the leftmost, topmost point of the path."""
+    leftmost = (float('inf'), float('inf'))
+    idx = -1
+    for i, cmd in enumerate(path):
+        if len(cmd) > 1:
+            endpoint = cmd[-2:]
+            if _is_to_left_of(endpoint, leftmost):
+                leftmost = endpoint
+                idx = i
+    return leftmost, idx
+def _separate_substructures(path):
+    """Returns a list of subpaths, each representing substructures the glyph."""
+    substructures = []
+    curr = []
+    for cmd in path:
+        if cmd[0] in 'mM' and curr:
+            substructures.append(curr)
+            curr = []
+        curr.append(cmd)
+    if curr:
+        substructures.append(curr)
+    return substructures
+def _is_clockwise(subpath):
+    """Returns whether the given subpath is clockwise-oriented."""
+    pts = [cmd[-2:] for cmd in subpath]
+    det = 0
+    for i in range(len(pts) - 1):
+        det += np.linalg.det(pts[i:i + 2])
+    return det > 0
+def _make_clockwise(subpath):
+    """Inverts the cardinality of the given subpath."""
+    new_path = [subpath[0]]
+    other_cmds = list(reversed(subpath[1:]))
+    for i, cmd in enumerate(other_cmds):
+        if i + 1 == len(other_cmds):
+            where_we_were = subpath[0][-2:]
+        else:
+            where_we_were = other_cmds[i + 1][-2:]
+        if len(cmd) > 3:
+            new_cmd = [cmd[0], cmd[3], cmd[4], cmd[1], cmd[2],
+                       where_we_were[0], where_we_were[1]]
+        else:
+            new_cmd = [cmd[0], where_we_were[0], where_we_were[1]]
+        new_path.append(new_cmd)
+    return new_path
+def _canonicalize(path):
+    """Makes all paths start at top left, and go clockwise first."""
+    # convert args to floats
+    #print(len(path),path)
+    path = [[x[0]] + list(map(float, x[1:])) for x in path]
+   # print(len(path),path)
+    # _canonicalize each subpath separately
+    #pdb.set_trace()
+    new_substructures = []
+    for subpath in _separate_substructures(path):
+      #  print(subpath,"\n")
+        leftmost_point, leftmost_idx = _get_leftmost_point(subpath)
+        reordered = ([['M', leftmost_point[0], leftmost_point[1]]] + subpath[leftmost_idx + 1:] + subpath[1:leftmost_idx + 1])
+        new_substructures.append((reordered, leftmost_point))
+   # sys.exit()
+    new_path = []
+    first_substructure_done = False
+    should_flip_cardinality = False
+    for sp, _ in sorted(new_substructures, key=lambda x: (x[1][1], x[1][0])):
+        if not first_substructure_done:
+            # we're looking at the first substructure now, we can determine whether we
+            # will flip the cardniality of the whole icon or not
+            should_flip_cardinality = not _is_clockwise(sp)
+            first_substructure_done = True
+        if should_flip_cardinality:
+            sp = _make_clockwise(sp)
+        new_path.extend(sp)
+    # convert args to strs
+    path = [[x[0]] + list(map(str, x[1:])) for x in new_path]
+    return path
+# ######### UTILS FOR CONVERTING TOKENIZED PATHS TO VECTORS ###########
+def _path_to_vector(path, categorical=False):
+    """Converts path's commands to a series of vectors."""
+    # Notes:
+    #   - The SimpleSVG dataset does not have any 't', 'q', 'Z', 'T', or 'Q'.
+    #     Thus, we don't handle those here.
+    #   - We also removed all 'z's.
+    #   - The x-axis-rotation argument to a commands is always 0 in this
+    #     dataset, so we ignore it
+    # Many commands have args that correspond to args in other commands.
+    #   v  __,__ _______________ ______________,_________ __,__ __,__ _,y
+    #   h  __,__ _______________ ______________,_________ __,__ __,__ x,_
+    #   z  __,__ _______________ ______________,_________ __,__ __,__ _,_
+    #   a  rx,ry x-axis-rotation large-arc-flag,sweepflag __,__ __,__ x,y
+    #   l  __,__ _______________ ______________,_________ __,__ __,__ x,y
+    #   c  __,__ _______________ ______________,_________ x1,y1 x2,y2 x,y
+    #   m  __,__ _______________ ______________,_________ __,__ __,__ x,y
+    #   s  __,__ _______________ ______________,_________ __,__ x2,y2 x,y
+    # So each command will be converted to a vector where the dimension is the
+    # minimal number of arguments to all commands:
+    #   [rx, ry, large-arc-flag, sweepflag, x1, y1, x2, y2, x, y]
+    # If a command does not output a certain arg, it is set to 0.
+    #   "l 5,5" becomes [0, 0, 0, 0, 0, 0, 0, 0, 5, 5]
+    # Also note, as of now we also output an extra dimension at index 0, which
+    # indicates which command is being outputted (integer).
+    new_path = []
+    for cmd in path:
+        new_path.append(_cmd_to_vector(cmd, categorical=categorical))
+    return new_path
+def _cmd_to_vector(cmd_list, categorical=False):
+    """Converts the given command (given as a list) into a vector.
+    UM_ARGS = {'v': 1, 'V': 1, 'h': 1, 'H': 1, 'a': 7, 'A': 7, 'l': 2, 'L': 2,
+            't': 2, 'T': 2, 'c': 6, 'C': 6, 'm': 2, 'M': 2, 's': 4, 'S': 4,
+            'q': 4, 'Q': 4, 'z': 0}
+    CMDS_LIST = 'zhvmltsqcaHVMLTSQCA'
+    CMD_MAPPING = {cmd: i for i, cmd in enumerate(CMDS_LIST)}
+    """
+    # For description of how this conversion happens, see
+    # _path_to_vector docstring.
+    cmd = cmd_list[0]
+    args = cmd_list[1:]
+    if not categorical:
+        # integer, for MSE
+        command = [float(CMD_MAPPING[cmd])]
+    else:
+        # one hot + 1 dim for EOS.
+        command = [0.0] * (len(CMDS_LIST) + 1) # 大概有19个commands?
+        command[CMD_MAPPING[cmd] + 1] = 1.0
+    arguments = [0.0] * 10
+    if cmd in 'hH':
+        arguments[8] = float(args[0])  # x
+    elif cmd in 'vV':
+        arguments[9] = float(args[0])  # y
+    elif cmd in 'mMlLtT':
+        arguments[8] = float(args[0])  # x
+        arguments[9] = float(args[1])  # y
+    elif cmd in 'sSqQ':
+        arguments[6] = float(args[0])  # x2
+        arguments[7] = float(args[1])  # y2
+        arguments[8] = float(args[2])  # x
+        arguments[9] = float(args[3])  # y
+    elif cmd in 'cC':
+        arguments[4] = float(args[0])  # x1
+        arguments[5] = float(args[1])  # y1
+        arguments[6] = float(args[2])  # x2
+        arguments[7] = float(args[3])  # y2
+        arguments[8] = float(args[4])  # x
+        arguments[9] = float(args[5])  # y
+    elif cmd in 'aA':
+        arguments[0] = float(args[0])  # rx
+        arguments[1] = float(args[1])  # ry
+        # we skip x-axis-rotation
+        arguments[2] = float(args[3])  # large-arc-flag
+        arguments[3] = float(args[4])  # sweep-flag
+        # a does not have x1, y1, x2, y2 args
+        arguments[8] = float(args[5])  # x
+        arguments[9] = float(args[6])  # y
+    return command + arguments
+################## UTILS FOR RENDERING PATH INTO IMAGE #################
+def _cubicbezier(x0, y0, x1, y1, x2, y2, x3, y3, n=40):
+    """Return n points along cubiz bezier with given control points."""
+    # from http://rosettacode.org/wiki/Bitmap/B%C3%A9zier_curves/Cubic
+    pts = []
+    for i in range(n + 1):
+        t = float(i) / float(n)
+        a = (1. - t)**3
+        b = 3. * t * (1. - t)**2
+        c = 3.0 * t**2 * (1.0 - t)
+        d = t**3
+        x = float(a * x0 + b * x1 + c * x2 + d * x3)
+        y = float(a * y0 + b * y1 + c * y2 + d * y3)
+        pts.append((x, y))
+    return list(zip(*pts))
+def _update_pos(curr_pos, end_pos, absolute):
+    if absolute:
+        return end_pos
+    return curr_pos[0] + end_pos[0], curr_pos[1] + end_pos[1]
+def constant_color(*unused_args):
+    return np.array([255, 255, 255])
+def _render_cubic(canvas, curr_pos, c_args, absolute, color):
+    """Renders a cubic bezier curve in the given canvas."""
+    if not absolute:
+        c_args[0] += curr_pos[0]
+        c_args[1] += curr_pos[1]
+        c_args[2] += curr_pos[0]
+        c_args[3] += curr_pos[1]
+        c_args[4] += curr_pos[0]
+        c_args[5] += curr_pos[1]
+    x, y = _cubicbezier(curr_pos[0], curr_pos[1],
+                        c_args[0], c_args[1],
+                        c_args[2], c_args[3],
+                        c_args[4], c_args[5])
+    max_possible = len(canvas)
+    x = [int(round(x_)) for x_ in x]
+    y = [int(round(y_)) for y_ in y]
+    def within_range(x):
+        return 0 <= x < max_possible
+    filtered = [(x_, y_) for x_, y_ in zip(x, y)
+                if within_range(x_) and within_range(y_)]
+    if not filtered:
+        return
+    x, y = list(zip(*filtered))
+    canvas[y, x, :] = color
+def _render_line(canvas, curr_pos, l_args, absolute, color):
+    """Renders a line in the given canvas."""
+    end_point = l_args
+    if not absolute:
+        end_point[0] += curr_pos[0]
+        end_point[1] += curr_pos[1]
+    rr, cc, val = draw.line_aa(int(curr_pos[0]), int(curr_pos[1]),
+                               int(end_point[0]), int(end_point[1]))
+    max_possible = len(canvas)
+    def within_range(x):
+        return 0 <= x < max_possible
+    filtered = [(x, y, v) for x, y, v in zip(rr, cc, val)
+                if within_range(x) and within_range(y)]
+    if not filtered:
+        return
+    rr, cc, val = list(zip(*filtered))
+    val = [(v * color) for v in val]
+    canvas[cc, rr, :] = val
+def _per_step_render(path, absolute=False, color=constant_color):
+    """Render the icon's edges, given its path."""
+    def to_canvas_size(l):
+        return [float(f) * (64. / 24.) for f in l]
+    canvas = np.zeros((64, 64, 3))
+    curr_pos = (0.0, 0.0)
+    for i, cmd in enumerate(path):
+        if not cmd:
+            continue
+        if cmd[0] in 'mM':
+            curr_pos = _update_pos(curr_pos, to_canvas_size(cmd[-2:]), absolute)
+        elif cmd[0] in 'cC':
+            _render_cubic(canvas, curr_pos, to_canvas_size(cmd[1:]), absolute, color(i, 55))
+            curr_pos = _update_pos(curr_pos, to_canvas_size(cmd[-2:]), absolute)
+        elif cmd[0] in 'lL':
+            _render_line(canvas, curr_pos, to_canvas_size(cmd[1:]), absolute, color(i, 55))
+            curr_pos = _update_pos(curr_pos, to_canvas_size(cmd[1:]), absolute)
+    return canvas
+def _zoom_out(path_list, add_baseline=0., per=22):
+    """Makes glyph slightly smaller in viewbox, makes some descenders visible."""
+    # assumes tensor is already unnormalized, and in long form
+    new_path = []
+    for command in path_list:
+        args = []
+        is_even = False
+        for arg in command[1:]:
+            if is_even:
+                args.append(str(float(arg) - ((24. - per) / 24.) * 64. / 4.))
+                is_even = False
+            else:
+                args.append(str(float(arg) - add_baseline))
+                is_even = True
+        new_path.append([command[0]] + args)
+    return new_path
+##################### UTILS FOR PROCESSING VECTORS ################
+def _append_eos(sample, categorical, feature_dim):
+    if not categorical:
+        eos = -1 * np.ones(feature_dim)
+    else:
+        eos = np.zeros(feature_dim)
+        eos[0] = 1.0
+    sample.append(eos)
+    return sample
+def _make_simple_cmds_long(out):
+    """Converts svg decoder output to format required by some render functions."""
+    # out has 10 dims
+    # the first 4 are respectively dims 0, 4, 5, 9 of the full 20-dim onehot vec
+    # the latter 6 are the 6 last dims of the 10-dim arg vec
+    shape_minus_dim = list(np.shape(out))[:-1]
+   # print("make?  ",shape_minus_dim ) # [51]
+    return np.concatenate([out[..., :1], # [51,1]  51个steps的第1维特征
+                           np.zeros(shape_minus_dim + [3]),# [51,3]
+                           out[..., 1:3], #[51,2]
+                           np.zeros(shape_minus_dim + [3]),# [51,3]
+                           out[..., 3:4],# [51,1]
+                           np.zeros(shape_minus_dim + [14]),# [51,14]
+                           out[..., 4:]], -1)# [51,6] # 最后的6个绘制参数
+def render(tensor, data_dir=None):
+    """Converts SVG decoder output into HTML svg."""
+    # undo normalization
+    # mean_npz, stdev_npz = get_means_stdevs(data_dir)
+    # tensor = (tensor * stdev_npz) + mean_npz
+    # convert to html
+    #print("before",tensor.shape)# 51, 10)
+    tensor = _make_simple_cmds_long(tensor)
+   # print("after",tensor.shape)#(51, 30)
+    # vector = np.squeeze(np.squeeze(tensor, 0), 2)
+   # print("1",tensor[0,:5])# (51, 30)
+    html = _vector_to_svg(tensor, stop_at_eos=True, categorical=True)
+   # print(html.shape)
+    # some aesthetic postprocessing
+    html = postprocess(html)
+    html = html.replace('256px', '50px')
+    return html
+################# UTILS FOR CONVERTING VECTORS TO SVGS ########################
+#note: transform the decoded trg_seq into the common svg format.把decode出来的seq转成html的svg，命令有前后关系，也都是相对位置。
+def _vector_to_svg(vectors, stop_at_eos=False, categorical=False):
+    """Tranforms a given vector to an svg string.
+    """
+    new_path = []
+    for vector in vectors:
+        if stop_at_eos:
+            if categorical:
+                try:
+                    is_eos = np.argmax(vector[:len(CMDS_LIST) + 1]) == 0
+                except Exception:
+                    raise Exception(vector)
+            else:
+                is_eos = vector[0] < -0.5
+            if is_eos:
+                break
+        new_path.append(' '.join(_vector_to_cmd(vector, categorical=categorical))) #
+    new_path = ' '.join(new_path) # 加入new_path，每个path都以空格分隔
+    return SVG_PREFIX_BIG + PATH_PREFIX_1 + new_path + PATH_POSFIX_1 + SVG_POSFIX
+def _vector_to_path(vectors):
+    new_path = []
+    for vector in vectors:
+        #print(vector,"???")
+        new_path.append(_vector_to_cmd(vector,categorical=True)) #
+        #print(_vector_to_cmd(vector),"hhh")
+   # new_path = ' '.join(new_path) # 加入new_path，每个path都以空格分隔
+    return new_path
+def _vector_to_cmd(vector, categorical=False, return_floats=False):
+    """Does the inverse transformation as _cmd_to_vector().
+        UM_ARGS = {'v': 1, 'V': 1, 'h': 1, 'H': 1, 'a': 7, 'A': 7, 'l': 2, 'L': 2,
+            't': 2, 'T': 2, 'c': 6, 'C': 6, 'm': 2, 'M': 2, 's': 4, 'S': 4,
+            'q': 4, 'Q': 4, 'z': 0}
+    CMDS_LIST = 'zhvmltsqcaHVMLTSQCA'
+    CMD_MAPPING = {cmd: i for i, cmd in enumerate(CMDS_LIST)}
+    """
+    cast_fn = float if return_floats else str
+    if categorical:
+       # print(vector.shape,vector)# 30
+        #print("??",len(CMDS_LIST)) # 19
+        command = vector[:len(CMDS_LIST) + 1],# 前20维
+        arguments = vector[len(CMDS_LIST) + 1:]# 后10维
+        cmd_idx = np.argmax(command) - 1 # 看当前绘制命令属于哪一类
+    else:
+        command, arguments = vector[:1], vector[1:]
+        cmd_idx = int(round(command[0]))
+    if cmd_idx < -0.5:
+        # EOS
+        return []
+    if cmd_idx >= len(CMDS_LIST):
+        cmd_idx = len(CMDS_LIST) - 1
+    cmd = CMDS_LIST[cmd_idx]
+    cmd = cmd.upper()
+    cmd_list = [cmd]
+    if cmd in 'hH': # 如果是画线，而且是x轴
+        cmd_list.append(cast_fn(arguments[8]))  # x
+    elif cmd in 'vV': # 如果是画线，而且是y轴
+        cmd_list.append(cast_fn(arguments[9]))  # y
+    elif cmd in 'mMlLtT':
+        cmd_list.append(cast_fn(arguments[8]))  # x
+        cmd_list.append(cast_fn(arguments[9]))  # y
+    elif cmd in 'sSqQ':
+        cmd_list.append(cast_fn(arguments[6]))  # x2
+        cmd_list.append(cast_fn(arguments[7]))  # y2
+        cmd_list.append(cast_fn(arguments[8]))  # x
+        cmd_list.append(cast_fn(arguments[9]))  # y
+    elif cmd in 'cC':
+        cmd_list.append(cast_fn(arguments[4]))  # x1
+        cmd_list.append(cast_fn(arguments[5]))  # y1
+        cmd_list.append(cast_fn(arguments[6]))  # x2
+        cmd_list.append(cast_fn(arguments[7]))  # y2
+        cmd_list.append(cast_fn(arguments[8]))  # x
+        cmd_list.append(cast_fn(arguments[9]))  # y
+    elif cmd in 'aA':
+        cmd_list.append(cast_fn(arguments[0]))  # rx
+        cmd_list.append(cast_fn(arguments[1]))  # ry
+        # x-axis-rotation is always 0
+        cmd_list.append(cast_fn('0'))
+        # the following two flags are binary.
+        cmd_list.append(cast_fn(1 if arguments[2] > 0.5 else 0))  # large-arc-flag
+        cmd_list.append(cast_fn(1 if arguments[3] > 0.5 else 0))  # sweep-flag
+        cmd_list.append(cast_fn(arguments[8]))  # x
+        cmd_list.append(cast_fn(arguments[9]))  # y
+    return cmd_list
+############## UTILS FOR CONVERTING SVGS/VECTORS TO IMAGES ###################
+# From Infer notebook
+start = ("""<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www."""
+         """w3.org/1999/xlink" width="256px" height="256px" style="-ms-trans"""
+         """form: rotate(360deg); -webkit-transform: rotate(360deg); transfo"""
+         """rm: rotate(360deg);" preserveAspectRatio="xMidYMid meet" viewBox"""
+         """="0 0 24 24"><path d=\"""")
+end = """\" fill="currentColor"/></svg>"""
+COMMAND_RX = re.compile("([MmLlHhVvCcSsQqTtAaZz])")
+FLOAT_RX = re.compile("[-+]?[0-9]*\.?[0-9]+(?:[eE][-+]?[0-9]+)?")  # noqa
+def svg_html_to_path_string(svg):
+    return svg.replace(start, '').replace(end, '')
+def _tokenize(pathdef):
+    """Returns each svg token from path list."""
+    # e.g.: 'm0.1-.5c0,6' -> m', '0.1, '-.5', 'c', '0', '6'
+    for x in COMMAND_RX.split(pathdef):
+        if x != '' and x in 'MmLlHhVvCcSsQqTtAaZz':
+            yield x
+        for token in FLOAT_RX.findall(x):
+            yield token
+def path_string_to_tokenized_commands(path):
+    """Tokenizes the given path string.
+    E.g.:
+        Given M 0.5 0.5 l 0.25 0.25 z
+        Returns [['M', '0.5', '0.5'], ['l', '0.25', '0.25'], ['z']]
+    """
+    new_path = []
+    current_cmd = []
+    for token in _tokenize(path):
+        if len(current_cmd) > 0:
+            if token in 'MmLlHhVvCcSsQqTtAaZz':
+                # cmd ended, convert to vector and add to new_path
+                new_path.append(current_cmd)
+                current_cmd = [token]
+            else:
+                # add arg to command
+                current_cmd.append(token)
+        else:
+            # add to start new cmd
+            current_cmd.append(token)
+    if current_cmd:
+        # process command still unprocessed
+        new_path.append(current_cmd)
+    return new_path
+def separate_substructures(tokenized_commands):
+  """Returns a list of SVG substructures."""
+  # every moveTo command starts a new substructure
+  # an SVG substructure is a subpath that closes on itself
+  # such as the outter and the inner edge of the character `o`
+  substructures = []
+  curr = []
+  for cmd in tokenized_commands:
+    if cmd[0] in 'mM' and len(curr) > 0:
+      substructures.append(curr)
+      curr = []
+    curr.append(cmd)
+  if len(curr) > 0:
+    substructures.append(curr)
+  return substructures
+def postprocess(svg, dist_thresh=2., skip=False):
+    path = svg_html_to_path_string(svg)
+    #print(svg)
+    svg_template = svg.replace(path, '{}')
+    tokenized_commands = path_string_to_tokenized_commands(path)
+    def dist(a, b):
+        return np.sqrt((float(a[0]) - float(b[0]))**2 + (float(a[1]) - float(b[1]))**2)
+    def are_close_together(a, b, t):
+        return dist(a, b) < t
+    # first, go through each start/end point and merge if they're close enough
+    # together (that is, make end point the same as the start point).
+    # TODO: there are better ways of doing this, in a way that propagates errors.
+    # back (so if total error is 0.2, go through all N commands in this
+    # substructure and fix each by 0.2/N (unless they have 0 vertical change))
+    # NOTE: this is the same.
+    substructures = separate_substructures(tokenized_commands)
+   # print(len(substructures))# 7578
+    previous_substructure_endpoint = (0., 0.,)
+    for substructure in substructures:
+        # first, if the last substructure's endpoint was updated, we must update
+        # the start point of this one to reflect the opposite update
+        substructure[0][-2] = str(float(substructure[0][-2]) -
+                                  previous_substructure_endpoint[0])
+        substructure[0][-1] = str(float(substructure[0][-1]) -
+                                  previous_substructure_endpoint[1])
+        start = list(map(float, substructure[0][-2:]))
+        curr_pos = (0., 0.)
+        for cmd in substructure:
+            curr_pos, _ = _update_curr_pos(curr_pos, cmd, (0., 0.))
+        if are_close_together(start, curr_pos, dist_thresh):
+            new_point = np.array(start)
+            previous_substructure_endpoint = ((new_point[0] - curr_pos[0]),
+                                              (new_point[1] - curr_pos[1]))
+            substructure[-1][-2] = str(float(substructure[-1][-2]) +
+                                       (new_point[0] - curr_pos[0]))
+            substructure[-1][-1] = str(float(substructure[-1][-1]) +
+                                       (new_point[1] - curr_pos[1]))
+            if substructure[-1][0] in 'cC':
+                substructure[-1][-4] = str(float(substructure[-1][-4]) +
+                                           (new_point[0] - curr_pos[0]))
+                substructure[-1][-3] = str(float(substructure[-1][-3]) +
+                                           (new_point[1] - curr_pos[1]))
+    if skip:
+        return svg_template.format(' '.join([' '.join(' '.join(cmd) for cmd in s)
+                                             for s in substructures]))
+    def cosa(x, y):
+        return (x[0] * y[0] + x[1] * y[1]) / ((np.sqrt(x[0]**2 + x[1]**2) * np.sqrt(y[0]**2 + y[1]**2)))
+    def rotate(a, x, y):
+        return (x * np.cos(a) - y * np.sin(a), y * np.cos(a) + x * np.sin(a))
+    # second, gotta find adjacent bezier curves and, if their control points
+    # are well enough aligned, fully align them
+    for substructure in substructures:
+        curr_pos = (0., 0.)
+        new_curr_pos, _ = _update_curr_pos((0., 0.,), substructure[0], (0., 0.))
+        for cmd_idx in range(1, len(substructure)):
+            prev_cmd = substructure[cmd_idx-1]
+            cmd = substructure[cmd_idx]
+            new_new_curr_pos, _ = _update_curr_pos(new_curr_pos, cmd, (0., 0.))
+            if cmd[0] == 'c':
+                if prev_cmd[0] == 'c':
+                    # check the vectors and update if needed
+                    # previous control pt wrt new curr point
+                    prev_ctr_point = (curr_pos[0] + float(prev_cmd[3]) - new_curr_pos[0],
+                                      curr_pos[1] + float(prev_cmd[4]) - new_curr_pos[1])
+                    ctr_point = (float(cmd[1]), float(cmd[2]))
+                    if -1. < cosa(prev_ctr_point, ctr_point) < -0.95:
+                        # calculate exact angle between the two vectors
+                        angle_diff = (np.pi - np.arccos(cosa(prev_ctr_point, ctr_point)))/2
+                        # rotate each vector by angle/2 in the correct direction for each.
+                        sign = np.sign(np.cross(prev_ctr_point, ctr_point))
+                        new_ctr_point = rotate(sign * angle_diff, *ctr_point)
+                        new_prev_ctr_point = rotate(-sign * angle_diff, *prev_ctr_point)
+                        # override the previous control points
+                        # (which has to be wrt previous curr position)
+                        substructure[cmd_idx-1][3] = str(new_prev_ctr_point[0] -
+                                                         curr_pos[0] + new_curr_pos[0])
+                        substructure[cmd_idx-1][4] = str(new_prev_ctr_point[1] -
+                                                         curr_pos[1] + new_curr_pos[1])
+                        substructure[cmd_idx][1] = str(new_ctr_point[0])
+                        substructure[cmd_idx][2] = str(new_ctr_point[1])
+            curr_pos = new_curr_pos
+            new_curr_pos = new_new_curr_pos
+   # print('0',substructures)
+    return svg_template.format(' '.join([' '.join(' '.join(cmd) for cmd in s)
+                                         for s in substructures]))
+# def get_means_stdevs(data_dir):
+#     """Returns the means and stdev saved in data_dir."""
+#     if data_dir not in means_stdevs:
+#         with tf.gfile.Open(os.path.join(data_dir, 'mean.npz'), 'r') as f:
+#             mean_npz = np.load(f)
+#         with tf.gfile.Open(os.path.join(data_dir, 'stdev.npz'), 'r') as f:
+#             stdev_npz = np.load(f)
+#         means_stdevs[data_dir] = (mean_npz, stdev_npz)
+#     return means_stdevs[data_dir]
+###############
+def convert_to_svg(decoder_output, categorical=False):
+    converted = []
+    for example in decoder_output:
+        converted.append(_vector_to_svg(example, True, categorical=categorical))
+    return np.array(converted)
+def create_image_conversion_fn(max_outputs, categorical=False):
+    """Binds the number of outputs to the image conversion fn (to svg or png)."""
+    def convert_to_svg(decoder_output):
+        converted = []
+        for example in decoder_output:
+            if len(converted) == max_outputs:
+                break
+            converted.append(_vector_to_svg(example, True, categorical=categorical))
+        return np.array(converted)
+    return convert_to_svg
+################### UTILS FOR CREATING TF SUMMARIES ##########################
+def _make_encoded_image(img_tensor):
+    pil_img = Image.fromarray(np.squeeze(img_tensor * 255).astype(np.uint8), mode='L')
+    buff = io.BytesIO()
+    pil_img.save(buff, format='png')
+    encoded_image = buff.getvalue()
+    return encoded_image
+################### CHECK GLYPH/PATH VALID ##############################################
+def is_valid_glyph(g):
+    is_09 = 48 <= g['uni'] <= 57
+    is_capital_az = 65 <= g['uni'] <= 90
+    is_az = 97 <= g['uni'] <= 122
+    is_valid_dims = g['width'] != 0 and g['vwidth'] != 0
+    return (is_09 or is_capital_az or is_az) and is_valid_dims
+def is_valid_path(pathunibfp):
+   # print(len(pathunibfp[0]))
+    if len(pathunibfp[0])>70:
+        print("!!!more than 400",len(pathunibfp[0]))
+       # sys.exit()
+    return pathunibfp[0] and len(pathunibfp[0]) <= 70,len(pathunibfp[0])
+################### DATASET PROCESSING #######################################
+def convert_to_path(g):
+    """Converts SplineSet in SFD font to str path."""
+    path = _sfd_to_path_list(g)
+    path = _add_missing_cmds(path, remove_zs=False)
+    path = _normalize_based_on_viewbox(path, '0 0 {} {}'.format(g['width'], g['vwidth']))
+    return path, g['uni'], g['binary_fp']
+def convert_simple_vector_to_path(seq):
+    path=[]
+    for i in range(seq.shape[0]):
+       # seq_i = seq[i]
+        path_i=[]
+        cmd  = np.argmax(seq[i][:4])
+       # args = seq[i][4:]
+        p0 = seq[i][4:6]
+        p1 = seq[i][6:8]
+        p2 = seq[i][8:10]
+        if cmd == 0:
+            break
+        elif cmd==1:
+            path_i.append('M')
+            path_i.append(str(p2[0]))
+            path_i.append(str(p2[1]))
+        elif cmd==2:
+            path_i.append('L')
+            path_i.append(str(p2[0]))
+            path_i.append(str(p2[1]))
+        elif cmd==3:
+            path_i.append('C')
+            path_i.append(str(p0[0]))
+            path_i.append(str(p0[1]))
+            path_i.append(str(p1[0]))
+            path_i.append(str(p1[1]))
+            path_i.append(str(p2[0]))
+            path_i.append(str(p2[1]))
+        else:
+            print("wrong!!! to path")
+            sys.exit()
+        path.append(path_i)
+    return path
+   # print("jjj")
+def clockwise(seq):
+    #pdb.set_trace()
+    path=convert_simple_vector_to_path(seq)
+    path = _canonicalize(path)
+    final = {}
+    final['rendered'] = _per_step_render(path, absolute=True)
+    vector = _path_to_vector(path, categorical=True)
+    vector = np.array(vector)
+  #  print(vector.shape,vector[:,9])# note vector: 12,30
+    vector = np.concatenate([np.take(vector, [0, 4, 5, 9], axis=-1), vector[..., -6:]], axis=-1)
+    final['seq_len'] = np.shape(vector)[0]
+    vector = _append_eos(vector.tolist(), True, 10)
+    final['sequence'] = np.concatenate((vector, np.zeros(((70 - final['seq_len']), 10))), 0)
+    final['rendered'] = np.reshape(final['rendered'][..., 0], [64 * 64]).astype(np.float32).tolist()
+    return final
+def create_example(pathunibfp):
+    """Bulk of dataset processing. Converts str path to np array"""
+    path, uni, binary_fp = pathunibfp
+    final = {}
+    # zoom out
+    path = _zoom_out(path)
+    # make clockwise
+    path = _canonicalize(path)
+    # render path for training
+    final['rendered'] = _per_step_render(path, absolute=True)
+    # make path relative
+    #path = _make_relative(path) # note 不rela 直接是绝对的
+    # convert to vector
+    vector = _path_to_vector(path, categorical=True)
+   # path2 = _vector_to_path(vector)# note vector转成path
+    #print(path2)
+    #print(path==path2)
+    vector = np.array(vector)
+  #  print(vector.shape,vector[:,9])# note vector: 12,30
+    vector = np.concatenate([np.take(vector, [0, 4, 5, 9], axis=-1), vector[..., -6:]], axis=-1)
+    #path2 = _vector_to_path(vector)
+    #print(path,"\nhhh",path2)
+    # print("hhh",vector)
+    # print(render(vector))
+    # sys.exit()
+    # count some stats
+    final['seq_len'] = np.shape(vector)[0]
+    # final['class'] = int(_map_uni_to_alphanum(uni))
+    final['class'] = int(_map_uni_to_alpha(uni))
+    final['binary_fp'] = str(binary_fp)
+    # append eos
+    vector = _append_eos(vector.tolist(), True, 10)
+    # pad path to 51 (with eos)
+#    pdb.set_trace()
+    # if final['seq_len']>50:
+   # print( final['seq_len'])
+    final['sequence'] = np.concatenate((vector, np.zeros(((70 - final['seq_len']), 10))), 0)
+    #seq = final['sequence']
+    # new_path = convert_simple_vector_to_path(seq)
+    # print(new_path,path2==new_path)
+   # sys.exit()
+    # make pure list:
+    # use last channel only
+    final['rendered'] = np.reshape(final['rendered'][..., 0], [64 * 64]).astype(np.float32).tolist()
+    final['sequence'] = np.reshape(final['sequence'], [71 * 10]).astype(np.float32).tolist()
+    final['class'] = np.reshape(final['class'], [1]).astype(np.int64).tolist()
+    final['seq_len'] = np.reshape(final['seq_len'], [1]).astype(np.int64).tolist()
+    return final
+def mean_to_example(mean_stdev):
+    """Converts the found mean and stdev to example."""
+    # mean_stdev is a dict
+    mean_stdev['mean'] = np.reshape(mean_stdev['mean'], [10]).astype(np.float32).tolist()
+    mean_stdev['variance'] = np.reshape(mean_stdev['variance'], [10]).astype(np.float32).tolist()
+    mean_stdev['stddev'] = np.reshape(mean_stdev['stddev'], [10]).astype(np.float32).tolist()
+    mean_stdev['count'] = np.reshape(mean_stdev['count'], [1]).astype(np.int64).tolist()
+    return mean_stdev
+################### CHECK VALID ##############################################
+class MeanStddev:
+    """Accumulator to compute the mean/stdev of svg commands."""
+    def create_accumulator(self):
+        curr_sum = np.zeros([10])
+        sum_sq = np.zeros([10])
+        return (curr_sum, sum_sq, 0)  # x, x^2, count
+    def add_input(self, sum_count, new_input):
+        (curr_sum, sum_sq, count) = sum_count
+        # new_input is a dict with keys = ['seq_len', 'sequence']
+        new_seq_len = new_input['seq_len'][0]  # Line #754 'seq_len' is a list of one int
+        assert isinstance(new_seq_len, int), print(type(new_seq_len))
+        # remove padding and eos from sequence
+        assert isinstance(new_input['sequence'], list), print(type(new_input['sequence']))
+        new_input_np = np.reshape(np.array(new_input['sequence']), [-1, 10])
+        assert isinstance(new_input_np, np.ndarray), print(type())
+        assert new_input_np.shape[0] >= new_seq_len
+        new_input_np = new_input_np[:new_seq_len, :]
+        # accumulate new_sum and new_sum_sq
+        new_sum = np.sum([curr_sum, np.sum(new_input_np, axis=0)], axis=0)
+        new_sum_sq = np.sum([sum_sq, np.sum(np.power(new_input_np, 2), axis=0)],
+                            axis=0)
+        return new_sum, new_sum_sq, count + new_seq_len
+    def merge_accumulators(self, accumulators):
+        curr_sums, sum_sqs, counts = list(zip(*accumulators))
+        return np.sum(curr_sums, axis=0), np.sum(sum_sqs, axis=0), np.sum(counts)
+    def extract_output(self, sum_count):
+        (curr_sum, curr_sum_sq, count) = sum_count
+        if count:
+            mean = np.divide(curr_sum, count)
+            variance = np.divide(curr_sum_sq, count) - np.power(mean, 2)
+            # -ve value could happen due to rounding
+            variance = np.max([variance, np.zeros(np.shape(variance))], axis=0)
+            stddev = np.sqrt(variance)
+            return {
+                'mean': mean,
+                'variance': variance,
+                'stddev': stddev,
+                'count': count
+            }
+        else:
+            return {
+                'mean': float('NaN'),
+                'variance': float('NaN'),
+                'stddev': float('NaN'),
+                'count': 0
+            }

{data_utils → ThaiVecFont/data_utils}/write_data_to_dirs.py RENAMED Viewed

@@ -1,231 +1,231 @@
-import argparse
-import multiprocessing as mp
-import os
-import pickle
-import numpy as np
-from data_utils import svg_utils
-from tqdm import tqdm
-def exist_empty_imgs(imgs_array, num_chars):
-    for char_id in range(num_chars):
-        print(np.max(imgs_array[char_id]))
-        input()
-        if np.max(imgs_array[char_id]) == 0:
-            return True
-    return False
-def create_db(opts, output_path, log_path):
-    charset = open(f"{opts.data_path}/char_set/{opts.language}.txt", 'r').read()
-    print("Process sfd to npy files in dirs....")
-    sdf_path = os.path.join(opts.sfd_path, opts.language, opts.split)
-    all_font_ids = sorted(os.listdir(sdf_path))
-    num_fonts = len(all_font_ids)
-    num_fonts_w = len(str(num_fonts))
-    print(f"Number {opts.split} fonts before processing", num_fonts)
-    num_processes = mp.cpu_count() - 1
-    fonts_per_process = num_fonts // num_processes + 1
-    num_chars = len(charset)
-    num_chars_w = len(str(num_chars))
-    # import ipdb; ipdb.set_trace()
-    def process(process_id):
-        valid_chars = []
-        invalid_path = []
-        invalid_glypts = []
-        cur_process_log_file = open(os.path.join(log_path, f'log_{opts.split}_{process_id}.txt'), 'w')
-        for i in tqdm(range(process_id * fonts_per_process, (process_id + 1) * fonts_per_process)):
-            if i >= num_fonts:
-                break
-            font_id = all_font_ids[i]
-            cur_font_sfd_dir = os.path.join(sdf_path, font_id)
-            cur_font_glyphs = []
-            if not os.path.exists(os.path.join(cur_font_sfd_dir, 'imgs_' + str(opts.img_size) + '.npy')):
-                continue
-            # a whole font as an entry
-            for char_id in range(num_chars):
-                # print('char_id :',char_id)
-                if not os.path.exists(os.path.join(cur_font_sfd_dir, '{}_{num:0{width}}.sfd'.format(font_id, num=char_id, width=num_chars_w))):
-                    break
-                char_desp_f = open(os.path.join(cur_font_sfd_dir, '{}_{num:0{width}}.txt'.format(font_id, num=char_id, width=num_chars_w)), 'r')
-                char_desp = char_desp_f.readlines()
-                sfd_f = open(os.path.join(cur_font_sfd_dir, '{}_{num:0{width}}.sfd'.format(font_id, num=char_id, width=num_chars_w)), 'r')
-                sfd = sfd_f.read()
-                uni = int(char_desp[0].strip())
-                width = int(char_desp[1].strip())
-                vwidth = int(char_desp[2].strip())
-                char_idx = char_desp[3].strip()
-                font_idx = char_desp[4].strip()
-                cur_glyph = {}
-                cur_glyph['uni'] = uni
-                cur_glyph['width'] = width
-                cur_glyph['vwidth'] = vwidth
-                cur_glyph['sfd'] = sfd
-                cur_glyph['id'] = char_idx
-                cur_glyph['binary_fp'] = font_idx
-                if not svg_utils.is_valid_glyph(cur_glyph):
-                    msg = f"font {font_idx}, char {char_idx} is not a valid glyph\n"
-                    invalid_path.glypts([font_idx, int(char_idx), charset[int(char_idx)]])
-                    cur_process_log_file.write(msg)
-                    char_desp_f.close()
-                    sfd_f.close()
-                    # use the font whose all glyphs are valid
-                    break
-                pathunibfp = svg_utils.convert_to_path(cur_glyph)
-                if not svg_utils.is_valid_path(pathunibfp):
-                    msg = f"font {font_idx}, char {char_idx}'s sfd is not a valid path\n"
-                    invalid_path.append([font_idx, int(char_idx), charset[int(char_idx)]])
-                    cur_process_log_file.write(msg)
-                    char_desp_f.close()
-                    sfd_f.close()
-                    break
-                valid_chars.append([font_idx, int(char_idx), charset[int(char_idx)]])
-                example = svg_utils.create_example(pathunibfp)
-                cur_font_glyphs.append(example)
-                char_desp_f.close()
-                sfd_f.close()
-            if len(cur_font_glyphs) == num_chars:
-                # use the font whose all glyphs are valid
-                # merge the whole font
-                rendered = np.load(os.path.join(cur_font_sfd_dir, 'imgs_' + str(opts.img_size) + '.npy'))
-                if (rendered[0] == rendered[1]).all() == True:
-                    continue
-                sequence = []
-                seq_len = []
-                binaryfp = []
-                char_class = []
-                for char_id in range(num_chars):
-                    example = cur_font_glyphs[char_id]
-                    sequence.append(example['sequence'])
-                    seq_len.append(example['seq_len'])
-                    char_class.append(example['class'])
-                    binaryfp = example['binary_fp']
-                if not os.path.exists(os.path.join(output_path, '{num:0{width}}'.format(num=i, width=num_fonts_w))):
-                    os.mkdir(os.path.join(output_path, '{num:0{width}}'.format(num=i, width=num_fonts_w)))
-                np.save(os.path.join(output_path, '{num:0{width}}'.format(num=i, width=num_fonts_w), 'sequence.npy'), np.array(sequence))
-                np.save(os.path.join(output_path, '{num:0{width}}'.format(num=i, width=num_fonts_w), 'seq_len.npy'), np.array(seq_len))
-                np.save(os.path.join(output_path, '{num:0{width}}'.format(num=i, width=num_fonts_w), 'class.npy'), np.array(char_class))
-                np.save(os.path.join(output_path, '{num:0{width}}'.format(num=i, width=num_fonts_w), 'font_id.npy'), np.array(binaryfp))
-                np.save(os.path.join(output_path, '{num:0{width}}'.format(num=i, width=num_fonts_w), 'rendered_' + str(opts.img_size) + '.npy'), rendered)
-        print("valid_chars", len(valid_chars))
-        print("invalid_path:", invalid_path)
-        print("invalid_glypts:",invalid_glypts)
-    processes = [mp.Process(target=process, args=[pid]) for pid in range(num_processes)]
-    for p in processes:
-        p.start()
-    for p in processes:
-        p.join()
-    print("Finished processing all sfd files, logs (invalid glyphs and paths) are saved to", log_path)
-def cal_mean_stddev(opts, output_path):
-    print("Calculating all glyphs' mean stddev ....")
-    charset = open(f"{opts.data_path}/char_set/{opts.language}.txt", 'r').read()
-    font_paths = []
-    for root, dirs, files in os.walk(output_path):
-        for dir_name in dirs:
-            font_paths.append(os.path.join(output_path, dir_name))
-    font_paths.sort()
-    num_fonts = len(font_paths)
-    num_processes = mp.cpu_count() - 1
-    fonts_per_process = num_fonts // num_processes + 1
-    num_chars = len(charset)
-    manager = mp.Manager()
-    return_dict = manager.dict()
-    main_stddev_accum = svg_utils.MeanStddev()
-    print(main_stddev_accum)
-    def process(process_id, return_dict):
-        mean_stddev_accum = svg_utils.MeanStddev()
-        cur_sum_count = mean_stddev_accum.create_accumulator()
-        for i in range(process_id * fonts_per_process, (process_id + 1) * fonts_per_process):
-            if i >= num_fonts:
-                break
-            cur_font_path = font_paths[i]
-            for charid in range(num_chars):
-                cur_font_char = {}
-                cur_font_char['seq_len'] = np.load(os.path.join(cur_font_path, 'seq_len.npy')).tolist()[charid]
-                cur_font_char['sequence'] = np.load(os.path.join(cur_font_path, 'sequence.npy')).tolist()[charid]
-                # print(cur_font_char)
-                cur_sum_count = mean_stddev_accum.add_input(cur_sum_count, cur_font_char)
-        return_dict[process_id] = cur_sum_count
-    processes = [mp.Process(target=process, args=[pid, return_dict]) for pid in range(num_processes)]
-    for p in processes:
-        p.start()
-    for p in processes:
-        p.join()
-    merged_sum_count = main_stddev_accum.merge_accumulators(return_dict.values())
-    output = main_stddev_accum.extract_output(merged_sum_count)
-    print('output :', output)
-    mean = output['mean']
-    stdev = output['stddev']
-    print('mean :', mean)
-    mean = np.concatenate((np.zeros([4]), mean[4:]), axis=0)
-    stdev = np.concatenate((np.ones([4]), stdev[4:]), axis=0)
-    # finally, save the mean and stddev files
-    output_path_ = os.path.join(opts.output_path, opts.language)
-    np.save(os.path.join(output_path_, 'mean'), mean)
-    np.save(os.path.join(output_path_, 'stdev'), stdev)
-    # rename npy to npz, don't mind about it, just some legacy issue
-    os.rename(os.path.join(output_path_, 'mean.npy'), os.path.join(output_path_, 'mean.npz'))
-    os.rename(os.path.join(output_path_, 'stdev.npy'), os.path.join(output_path_, 'stdev.npz'))
-def main():
-    parser = argparse.ArgumentParser(description="LMDB creation")
-    parser.add_argument("--language", type=str, default='eng', choices=['eng', 'chn', 'tha'])
-    parser.add_argument("--data_path", type=str, default='./Font_Dataset', help="Path to Dataset")
-    parser.add_argument("--ttf_path", type=str, default='../data/font_ttfs')
-    parser.add_argument('--sfd_path', type=str, default='../data/font_sfds')
-    parser.add_argument("--output_path", type=str, default='../data/vecfont_dataset_/', help="Path to write the database to")
-    parser.add_argument('--img_size', type=int, default=64, help="the height and width of glyph images")
-    parser.add_argument("--split", type=str, default='train')
-    # parser.add_argument("--log_dir", type=str, default='../data/font_sfds/log/')
-    parser.add_argument("--phase", type=int, default=0, choices=[0, 1, 2],
-                        help="0 all, 1 create db, 2 cal stddev")
-    opts = parser.parse_args()
-    assert os.path.exists(opts.sfd_path), "specified sfd glyphs path does not exist"
-    output_path = os.path.join(opts.output_path, opts.language, opts.split)
-    log_path = os.path.join(opts.sfd_path, opts.language, 'log')
-    if not os.path.exists(output_path):
-        os.makedirs(output_path)
-    if not os.path.exists(log_path):
-        os.makedirs(log_path)
-    if opts.phase <= 1:
-        create_db(opts, output_path, log_path)
-    if opts.phase <= 2 and opts.split == 'train':
-        cal_mean_stddev(opts, output_path)
-if __name__ == "__main__":
     main()

+import argparse
+import multiprocessing as mp
+import os
+import pickle
+import numpy as np
+from data_utils import svg_utils
+from tqdm import tqdm
+def exist_empty_imgs(imgs_array, num_chars):
+    for char_id in range(num_chars):
+        print(np.max(imgs_array[char_id]))
+        input()
+        if np.max(imgs_array[char_id]) == 0:
+            return True
+    return False
+def create_db(opts, output_path, log_path):
+    charset = open(f"{opts.data_path}/char_set/{opts.language}.txt", 'r').read()
+    print("Process sfd to npy files in dirs....")
+    sdf_path = os.path.join(opts.sfd_path, opts.language, opts.split)
+    all_font_ids = sorted(os.listdir(sdf_path))
+    num_fonts = len(all_font_ids)
+    num_fonts_w = len(str(num_fonts))
+    print(f"Number {opts.split} fonts before processing", num_fonts)
+    num_processes = mp.cpu_count() - 1
+    fonts_per_process = num_fonts // num_processes + 1
+    num_chars = len(charset)
+    num_chars_w = len(str(num_chars))
+    # import ipdb; ipdb.set_trace()
+    def process(process_id):
+        valid_chars = []
+        invalid_path = []
+        invalid_glypts = []
+        cur_process_log_file = open(os.path.join(log_path, f'log_{opts.split}_{process_id}.txt'), 'w')
+        for i in tqdm(range(process_id * fonts_per_process, (process_id + 1) * fonts_per_process)):
+            if i >= num_fonts:
+                break
+            font_id = all_font_ids[i]
+            cur_font_sfd_dir = os.path.join(sdf_path, font_id)
+            cur_font_glyphs = []
+            if not os.path.exists(os.path.join(cur_font_sfd_dir, 'imgs_' + str(opts.img_size) + '.npy')):
+                continue
+            # a whole font as an entry
+            for char_id in range(num_chars):
+                # print('char_id :',char_id)
+                if not os.path.exists(os.path.join(cur_font_sfd_dir, '{}_{num:0{width}}.sfd'.format(font_id, num=char_id, width=num_chars_w))):
+                    break
+                char_desp_f = open(os.path.join(cur_font_sfd_dir, '{}_{num:0{width}}.txt'.format(font_id, num=char_id, width=num_chars_w)), 'r')
+                char_desp = char_desp_f.readlines()
+                sfd_f = open(os.path.join(cur_font_sfd_dir, '{}_{num:0{width}}.sfd'.format(font_id, num=char_id, width=num_chars_w)), 'r')
+                sfd = sfd_f.read()
+                uni = int(char_desp[0].strip())
+                width = int(char_desp[1].strip())
+                vwidth = int(char_desp[2].strip())
+                char_idx = char_desp[3].strip()
+                font_idx = char_desp[4].strip()
+                cur_glyph = {}
+                cur_glyph['uni'] = uni
+                cur_glyph['width'] = width
+                cur_glyph['vwidth'] = vwidth
+                cur_glyph['sfd'] = sfd
+                cur_glyph['id'] = char_idx
+                cur_glyph['binary_fp'] = font_idx
+                if not svg_utils.is_valid_glyph(cur_glyph):
+                    msg = f"font {font_idx}, char {char_idx} is not a valid glyph\n"
+                    invalid_path.glypts([font_idx, int(char_idx), charset[int(char_idx)]])
+                    cur_process_log_file.write(msg)
+                    char_desp_f.close()
+                    sfd_f.close()
+                    # use the font whose all glyphs are valid
+                    break
+                pathunibfp = svg_utils.convert_to_path(cur_glyph)
+                if not svg_utils.is_valid_path(pathunibfp):
+                    msg = f"font {font_idx}, char {char_idx}'s sfd is not a valid path\n"
+                    invalid_path.append([font_idx, int(char_idx), charset[int(char_idx)]])
+                    cur_process_log_file.write(msg)
+                    char_desp_f.close()
+                    sfd_f.close()
+                    break
+                valid_chars.append([font_idx, int(char_idx), charset[int(char_idx)]])
+                example = svg_utils.create_example(pathunibfp)
+                cur_font_glyphs.append(example)
+                char_desp_f.close()
+                sfd_f.close()
+            if len(cur_font_glyphs) == num_chars:
+                # use the font whose all glyphs are valid
+                # merge the whole font
+                rendered = np.load(os.path.join(cur_font_sfd_dir, 'imgs_' + str(opts.img_size) + '.npy'))
+                if (rendered[0] == rendered[1]).all() == True:
+                    continue
+                sequence = []
+                seq_len = []
+                binaryfp = []
+                char_class = []
+                for char_id in range(num_chars):
+                    example = cur_font_glyphs[char_id]
+                    sequence.append(example['sequence'])
+                    seq_len.append(example['seq_len'])
+                    char_class.append(example['class'])
+                    binaryfp = example['binary_fp']
+                if not os.path.exists(os.path.join(output_path, '{num:0{width}}'.format(num=i, width=num_fonts_w))):
+                    os.mkdir(os.path.join(output_path, '{num:0{width}}'.format(num=i, width=num_fonts_w)))
+                np.save(os.path.join(output_path, '{num:0{width}}'.format(num=i, width=num_fonts_w), 'sequence.npy'), np.array(sequence))
+                np.save(os.path.join(output_path, '{num:0{width}}'.format(num=i, width=num_fonts_w), 'seq_len.npy'), np.array(seq_len))
+                np.save(os.path.join(output_path, '{num:0{width}}'.format(num=i, width=num_fonts_w), 'class.npy'), np.array(char_class))
+                np.save(os.path.join(output_path, '{num:0{width}}'.format(num=i, width=num_fonts_w), 'font_id.npy'), np.array(binaryfp))
+                np.save(os.path.join(output_path, '{num:0{width}}'.format(num=i, width=num_fonts_w), 'rendered_' + str(opts.img_size) + '.npy'), rendered)
+        print("valid_chars", len(valid_chars))
+        print("invalid_path:", invalid_path)
+        print("invalid_glypts:",invalid_glypts)
+    processes = [mp.Process(target=process, args=[pid]) for pid in range(num_processes)]
+    for p in processes:
+        p.start()
+    for p in processes:
+        p.join()
+    print("Finished processing all sfd files, logs (invalid glyphs and paths) are saved to", log_path)
+def cal_mean_stddev(opts, output_path):
+    print("Calculating all glyphs' mean stddev ....")
+    charset = open(f"{opts.data_path}/char_set/{opts.language}.txt", 'r').read()
+    font_paths = []
+    for root, dirs, files in os.walk(output_path):
+        for dir_name in dirs:
+            font_paths.append(os.path.join(output_path, dir_name))
+    font_paths.sort()
+    num_fonts = len(font_paths)
+    num_processes = mp.cpu_count() - 1
+    fonts_per_process = num_fonts // num_processes + 1
+    num_chars = len(charset)
+    manager = mp.Manager()
+    return_dict = manager.dict()
+    main_stddev_accum = svg_utils.MeanStddev()
+    print(main_stddev_accum)
+    def process(process_id, return_dict):
+        mean_stddev_accum = svg_utils.MeanStddev()
+        cur_sum_count = mean_stddev_accum.create_accumulator()
+        for i in range(process_id * fonts_per_process, (process_id + 1) * fonts_per_process):
+            if i >= num_fonts:
+                break
+            cur_font_path = font_paths[i]
+            for charid in range(num_chars):
+                cur_font_char = {}
+                cur_font_char['seq_len'] = np.load(os.path.join(cur_font_path, 'seq_len.npy')).tolist()[charid]
+                cur_font_char['sequence'] = np.load(os.path.join(cur_font_path, 'sequence.npy')).tolist()[charid]
+                # print(cur_font_char)
+                cur_sum_count = mean_stddev_accum.add_input(cur_sum_count, cur_font_char)
+        return_dict[process_id] = cur_sum_count
+    processes = [mp.Process(target=process, args=[pid, return_dict]) for pid in range(num_processes)]
+    for p in processes:
+        p.start()
+    for p in processes:
+        p.join()
+    merged_sum_count = main_stddev_accum.merge_accumulators(return_dict.values())
+    output = main_stddev_accum.extract_output(merged_sum_count)
+    print('output :', output)
+    mean = output['mean']
+    stdev = output['stddev']
+    print('mean :', mean)
+    mean = np.concatenate((np.zeros([4]), mean[4:]), axis=0)
+    stdev = np.concatenate((np.ones([4]), stdev[4:]), axis=0)
+    # finally, save the mean and stddev files
+    output_path_ = os.path.join(opts.output_path, opts.language)
+    np.save(os.path.join(output_path_, 'mean'), mean)
+    np.save(os.path.join(output_path_, 'stdev'), stdev)
+    # rename npy to npz, don't mind about it, just some legacy issue
+    os.rename(os.path.join(output_path_, 'mean.npy'), os.path.join(output_path_, 'mean.npz'))
+    os.rename(os.path.join(output_path_, 'stdev.npy'), os.path.join(output_path_, 'stdev.npz'))
+def main():
+    parser = argparse.ArgumentParser(description="LMDB creation")
+    parser.add_argument("--language", type=str, default='eng', choices=['eng', 'chn', 'tha'])
+    parser.add_argument("--data_path", type=str, default='./Font_Dataset', help="Path to Dataset")
+    parser.add_argument("--ttf_path", type=str, default='../data/font_ttfs')
+    parser.add_argument('--sfd_path', type=str, default='../data/font_sfds')
+    parser.add_argument("--output_path", type=str, default='../data/vecfont_dataset_/', help="Path to write the database to")
+    parser.add_argument('--img_size', type=int, default=64, help="the height and width of glyph images")
+    parser.add_argument("--split", type=str, default='train')
+    # parser.add_argument("--log_dir", type=str, default='../data/font_sfds/log/')
+    parser.add_argument("--phase", type=int, default=0, choices=[0, 1, 2],
+                        help="0 all, 1 create db, 2 cal stddev")
+    opts = parser.parse_args()
+    assert os.path.exists(opts.sfd_path), "specified sfd glyphs path does not exist"
+    output_path = os.path.join(opts.output_path, opts.language, opts.split)
+    log_path = os.path.join(opts.sfd_path, opts.language, 'log')
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+    if not os.path.exists(log_path):
+        os.makedirs(log_path)
+    if opts.phase <= 1:
+        create_db(opts, output_path, log_path)
+    if opts.phase <= 2 and opts.split == 'train':
+        cal_mean_stddev(opts, output_path)
+if __name__ == "__main__":
     main()

{data_utils → ThaiVecFont/data_utils}/write_glyph_imgs.py RENAMED Viewed

@@ -1,180 +1,180 @@
-from PIL import Image
-from PIL import Image
-from PIL import ImageDraw
-from PIL import ImageFont
-import argparse
-import numpy as np
-import os
-import multiprocessing as mp
-from tqdm import tqdm
-char_error = 0
-def get_bbox(img):
-    img = 255 - np.array(img)
-    sum_x = np.sum(img, axis=0)
-    sum_y = np.sum(img, axis=1)
-    range_x = np.where(sum_x > 0)
-    width = range_x[0][-1] - range_x[0][0]
-    range_y = np.where(sum_y > 0)
-    height = range_y[0][-1] - range_y[0][0]
-    return width, height
-def write_glyph_imgs_mp(opts):
-    """Useing multiprocessing to render glyph images"""
-    charset = open(f"{opts.data_path}/char_set/{opts.language}.txt", 'r').read()
-    fonts_file_path = os.path.join(opts.ttf_path, opts.language)
-    sfd_path = os.path.join(opts.sfd_path, opts.language)
-    for root, dirs, files in os.walk(os.path.join(fonts_file_path, opts.split)):
-        ttf_names = files
-    # ttf_names = ['08343.aspx_id=299524532']
-    ttf_names.sort()
-    font_num = len(ttf_names)
-    charset_lenw = len(str(len(charset)))
-    process_nums = mp.cpu_count() - 1
-    font_num_per_process = font_num // process_nums + 1
-    def process(process_id, font_num_p_process):
-        for i in tqdm(range(process_id * font_num_p_process, (process_id + 1) * font_num_p_process)):
-            if i >= font_num:
-                break
-            fontname = ttf_names[i].split('.')[0]
-            # print(fontname)
-            if not os.path.exists(os.path.join(sfd_path, opts.split, fontname)):
-                continue
-            ttf_file_path = os.path.join(fonts_file_path, opts.split, ttf_names[i])
-            try:
-                font = ImageFont.truetype(ttf_file_path, int(opts.img_size*opts.FONT_SIZE), encoding="unic")
-            except:
-                print('cant open ' + fontname)
-                continue
-            fontimgs_array = np.zeros((len(charset), opts.img_size, opts.img_size), np.uint8)
-            fontimgs_array[:, :, :] = 255
-            flag_success = True
-            for charid in range(len(charset)):
-                # read the meta file
-                txt_fpath = os.path.join(sfd_path, opts.split, fontname, fontname + '_' + '{num:0{width}}'.format(num=charid, width=charset_lenw) + '.txt')
-                try:
-                    txt_lines = open(txt_fpath,'r').read().split('\n')
-                except:
-                    print('cannot read text file')
-                    flag_success = False
-                    break
-                if len(txt_lines) < 5:
-                    flag_success = False
-                    break # should be empty file
-                # the offsets are calculated according to the rules in data_utils/svg_utils.py
-                vbox_w = float(txt_lines[1])
-                vbox_h = float(txt_lines[2])
-                norm = max(int(vbox_w), int(vbox_h))
-                if int(vbox_h) > int(vbox_w):
-                    add_to_y = 0
-                    add_to_x = abs(int(vbox_h) - int(vbox_w)) / 2
-                    add_to_x = add_to_x * (float(opts.img_size) / norm)
-                else:
-                    add_to_y = abs(int(vbox_h) - int(vbox_w)) / 2
-                    add_to_y = add_to_y * (float(opts.img_size) / norm)
-                    add_to_x = 0
-                char = charset[charid]
-                array = np.ndarray((opts.img_size, opts.img_size), np.uint8)
-                array[:, :] = 255
-                image = Image.fromarray(array)
-                draw = ImageDraw.Draw(image)
-                try:
-                    font_width, font_height = font.getsize(char)
-                except Exception as e:
-                    print('cant calculate height and width ' + "%04d"%i + '_' + '{num:0{width}}'.format(num=charid, width=charset_lenw))
-                    flag_success = False
-                    break
-                try:
-                    ascent, descent = font.getmetrics()
-                except:
-                    print('cannot get ascent, descent')
-                    flag_success = False
-                    break
-                draw_pos_x = add_to_x
-                #if opts.language == 'eng':
-                thai_characters_long = ["ญ","ฎ","ฏ","ฐ"]
-                if char in thai_characters_long:
-                    draw_pos_y = add_to_y + opts.img_size - ascent - descent - int((opts.img_size / 24.0) * (10.0 / 3.0))
-                else:
-                    draw_pos_y = add_to_y + opts.img_size - ascent - int((opts.img_size / 24.0) * (10.0 / 3.0))
-                #else:
-                #    draw_pos_y = add_to_y + opts.img_size - ascent - int((opts.img_size / 24.0) * (10.0 / 3.0))
-                draw.text((draw_pos_x, draw_pos_y), char, (0), font=font)
-                if opts.debug:
-                    image.save(os.path.join(sfd_path, opts.split, fontname, str(charid) + '_' + str(opts.img_size) + '.png'))
-                try:
-                    char_w, char_h = get_bbox(image)
-                    # print(charid, char_w, char_h)
-                except Exception as e:
-                    print("cannot get bbox")
-                    print(e)
-                    flag_success = False
-                    break
-                # Detect large font
-                problem = []
-                if font_width > 59:
-                    problem.append("width")
-                if font_height > 93:
-                    problem.append("height")
-                if problem:
-                    print(problem,fontname, charid, font_width, font_height, char_w, char_h)
-                    flag_success = False
-                    break
-                # Detect Small Font
-                if (char_w < opts.img_size * 0.15) and (char_h < opts.img_size * 0.15):
-                    flag_success = False
-                    break
-                fontimgs_array[charid] = np.array(image)
-            if flag_success:
-                np.save(os.path.join(sfd_path, opts.split, fontname, 'imgs_' + str(opts.img_size) + '.npy'), fontimgs_array)
-            else:
-                global char_error # Count char flag not success
-                char_error += 1
-                print("flag on", fontname, charid,  'imgs_' + str(opts.img_size) + '.npy', " Not Succeed")
-    processes = [mp.Process(target=process, args=(pid, font_num_per_process)) for pid in range(process_nums)]
-    for p in processes:
-        p.start()
-    for p in processes:
-        p.join()
-def main():
-    parser = argparse.ArgumentParser(description="Write glyph images")
-    parser.add_argument("--language", type=str, default='eng', choices=['eng', 'chn', 'tha'])
-    parser.add_argument("--data_path", type=str, default='./Font_Dataset', help="Path to Dataset")
-    parser.add_argument("--ttf_path", type=str, default='../data/font_ttfs')
-    parser.add_argument('--sfd_path', type=str, default='../data/font_sfds')
-    parser.add_argument('--img_size', type=int, default=64)
-    parser.add_argument('--split', type=str, default='train')
-    parser.add_argument('--FONT_SIZE', type=float, default=1)
-    parser.add_argument('--debug', type=bool, default=False)
-    opts = parser.parse_args()
-    write_glyph_imgs_mp(opts)
-if __name__ == "__main__":
-    main()

+from PIL import Image
+from PIL import Image
+from PIL import ImageDraw
+from PIL import ImageFont
+import argparse
+import numpy as np
+import os
+import multiprocessing as mp
+from tqdm import tqdm
+char_error = 0
+def get_bbox(img):
+    img = 255 - np.array(img)
+    sum_x = np.sum(img, axis=0)
+    sum_y = np.sum(img, axis=1)
+    range_x = np.where(sum_x > 0)
+    width = range_x[0][-1] - range_x[0][0]
+    range_y = np.where(sum_y > 0)
+    height = range_y[0][-1] - range_y[0][0]
+    return width, height
+def write_glyph_imgs_mp(opts):
+    """Useing multiprocessing to render glyph images"""
+    charset = open(f"{opts.data_path}/char_set/{opts.language}.txt", 'r').read()
+    fonts_file_path = os.path.join(opts.ttf_path, opts.language)
+    sfd_path = os.path.join(opts.sfd_path, opts.language)
+    for root, dirs, files in os.walk(os.path.join(fonts_file_path, opts.split)):
+        ttf_names = files
+    # ttf_names = ['08343.aspx_id=299524532']
+    ttf_names.sort()
+    font_num = len(ttf_names)
+    charset_lenw = len(str(len(charset)))
+    process_nums = mp.cpu_count() - 1
+    font_num_per_process = font_num // process_nums + 1
+    def process(process_id, font_num_p_process):
+        for i in tqdm(range(process_id * font_num_p_process, (process_id + 1) * font_num_p_process)):
+            if i >= font_num:
+                break
+            fontname = ttf_names[i].split('.')[0]
+            # print(fontname)
+            if not os.path.exists(os.path.join(sfd_path, opts.split, fontname)):
+                continue
+            ttf_file_path = os.path.join(fonts_file_path, opts.split, ttf_names[i])
+            try:
+                font = ImageFont.truetype(ttf_file_path, int(opts.img_size*opts.FONT_SIZE), encoding="unic")
+            except:
+                print('cant open ' + fontname)
+                continue
+            fontimgs_array = np.zeros((len(charset), opts.img_size, opts.img_size), np.uint8)
+            fontimgs_array[:, :, :] = 255
+            flag_success = True
+            for charid in range(len(charset)):
+                # read the meta file
+                txt_fpath = os.path.join(sfd_path, opts.split, fontname, fontname + '_' + '{num:0{width}}'.format(num=charid, width=charset_lenw) + '.txt')
+                try:
+                    txt_lines = open(txt_fpath,'r').read().split('\n')
+                except:
+                    print('cannot read text file')
+                    flag_success = False
+                    break
+                if len(txt_lines) < 5:
+                    flag_success = False
+                    break # should be empty file
+                # the offsets are calculated according to the rules in data_utils/svg_utils.py
+                vbox_w = float(txt_lines[1])
+                vbox_h = float(txt_lines[2])
+                norm = max(int(vbox_w), int(vbox_h))
+                if int(vbox_h) > int(vbox_w):
+                    add_to_y = 0
+                    add_to_x = abs(int(vbox_h) - int(vbox_w)) / 2
+                    add_to_x = add_to_x * (float(opts.img_size) / norm)
+                else:
+                    add_to_y = abs(int(vbox_h) - int(vbox_w)) / 2
+                    add_to_y = add_to_y * (float(opts.img_size) / norm)
+                    add_to_x = 0
+                char = charset[charid]
+                array = np.ndarray((opts.img_size, opts.img_size), np.uint8)
+                array[:, :] = 255
+                image = Image.fromarray(array)
+                draw = ImageDraw.Draw(image)
+                try:
+                    font_width, font_height = font.getsize(char)
+                except Exception as e:
+                    print('cant calculate height and width ' + "%04d"%i + '_' + '{num:0{width}}'.format(num=charid, width=charset_lenw))
+                    flag_success = False
+                    break
+                try:
+                    ascent, descent = font.getmetrics()
+                except:
+                    print('cannot get ascent, descent')
+                    flag_success = False
+                    break
+                draw_pos_x = add_to_x
+                #if opts.language == 'eng':
+                thai_characters_long = ["ญ","ฎ","ฏ","ฐ"]
+                if char in thai_characters_long:
+                    draw_pos_y = add_to_y + opts.img_size - ascent - descent - int((opts.img_size / 24.0) * (10.0 / 3.0))
+                else:
+                    draw_pos_y = add_to_y + opts.img_size - ascent - int((opts.img_size / 24.0) * (10.0 / 3.0))
+                #else:
+                #    draw_pos_y = add_to_y + opts.img_size - ascent - int((opts.img_size / 24.0) * (10.0 / 3.0))
+                draw.text((draw_pos_x, draw_pos_y), char, (0), font=font)
+                if opts.debug:
+                    image.save(os.path.join(sfd_path, opts.split, fontname, str(charid) + '_' + str(opts.img_size) + '.png'))
+                try:
+                    char_w, char_h = get_bbox(image)
+                    # print(charid, char_w, char_h)
+                except Exception as e:
+                    print("cannot get bbox")
+                    print(e)
+                    flag_success = False
+                    break
+                # Detect large font
+                problem = []
+                if font_width > 59:
+                    problem.append("width")
+                if font_height > 93:
+                    problem.append("height")
+                if problem:
+                    print(problem,fontname, charid, font_width, font_height, char_w, char_h)
+                    flag_success = False
+                    break
+                # Detect Small Font
+                if (char_w < opts.img_size * 0.15) and (char_h < opts.img_size * 0.15):
+                    flag_success = False
+                    break
+                fontimgs_array[charid] = np.array(image)
+            if flag_success:
+                np.save(os.path.join(sfd_path, opts.split, fontname, 'imgs_' + str(opts.img_size) + '.npy'), fontimgs_array)
+            else:
+                global char_error # Count char flag not success
+                char_error += 1
+                print("flag on", fontname, charid,  'imgs_' + str(opts.img_size) + '.npy', " Not Succeed")
+    processes = [mp.Process(target=process, args=(pid, font_num_per_process)) for pid in range(process_nums)]
+    for p in processes:
+        p.start()
+    for p in processes:
+        p.join()
+def main():
+    parser = argparse.ArgumentParser(description="Write glyph images")
+    parser.add_argument("--language", type=str, default='eng', choices=['eng', 'chn', 'tha'])
+    parser.add_argument("--data_path", type=str, default='./Font_Dataset', help="Path to Dataset")
+    parser.add_argument("--ttf_path", type=str, default='../data/font_ttfs')
+    parser.add_argument('--sfd_path', type=str, default='../data/font_sfds')
+    parser.add_argument('--img_size', type=int, default=64)
+    parser.add_argument('--split', type=str, default='train')
+    parser.add_argument('--FONT_SIZE', type=float, default=1)
+    parser.add_argument('--debug', type=bool, default=False)
+    opts = parser.parse_args()
+    write_glyph_imgs_mp(opts)
+if __name__ == "__main__":
+    main()

dataloader.py → ThaiVecFont/dataloader.py RENAMED Viewed

@@ -1,67 +1,67 @@
-# data loader for training main model
-import os
-import pickle
-import torch
-import torch.utils.data as data
-import torchvision.transforms as T
-import sys
-import numpy as np
-torch.multiprocessing.set_sharing_strategy('file_system')
-class SVGDataset(data.Dataset):
-    def __init__(self, root_path, img_size=128, lang='eng', char_num=52, max_seq_len=51, dim_seq=10,  transform=None, mode='train'):
-        super().__init__()
-        self.mode = mode
-        self.img_size = img_size
-        self.char_num = char_num
-        self.max_seq_len = max_seq_len
-        self.dim_seq = dim_seq
-        self.trans = transform
-        self.font_paths = []
-        self.dir_path = os.path.join(root_path, lang, self.mode)
-        for root, dirs, files in os.walk(self.dir_path):
-            depth = root.count('/') - self.dir_path.count('/')
-            if depth == 0:
-                for dir_name in dirs:
-                    self.font_paths.append(os.path.join(self.dir_path, dir_name))
-        self.font_paths.sort()
-        print(f"Finished loading {mode} paths, number: {str(len(self.font_paths))}")
-    def __getitem__(self, index):
-        item = {}
-        font_path = self.font_paths[index]
-        item = {}
-        item['class'] = torch.LongTensor(np.load(os.path.join(font_path, 'class.npy')))
-        item['seq_len'] = torch.LongTensor(np.load(os.path.join(font_path, 'seq_len.npy')))
-        item['sequence'] = torch.FloatTensor(np.load(os.path.join(font_path, 'sequence_relaxed.npy'))).view(self.char_num, self.max_seq_len, self.dim_seq)
-        item['pts_aux'] = torch.FloatTensor(np.load(os.path.join(font_path, 'pts_aux.npy')))
-        item['rendered'] = torch.FloatTensor(np.load(os.path.join(font_path, 'rendered_' + str(self.img_size) + '.npy'))).view(self.char_num, self.img_size, self.img_size) / 255.
-        item['rendered'] = self.trans(item['rendered'])
-        item['font_id'] = torch.FloatTensor(np.load(os.path.join(font_path, 'font_id.npy')).astype(np.float32))
-        return item
-    def __len__(self):
-        return len(self.font_paths)
-def get_loader(root_path, img_size, lang, char_num, max_seq_len, dim_seq, batch_size, mode='train'):
-    SetRange = T.Lambda(lambda X: 1. - X )  # convert [0, 1] -> [0, 1]
-    transform = T.Compose([SetRange])
-    dataset = SVGDataset(root_path, img_size, lang, char_num, max_seq_len, dim_seq, transform, mode)
-    dataloader = data.DataLoader(dataset, batch_size, shuffle=(mode == 'train'), num_workers=batch_size)
-    return dataloader
-if __name__ == '__main__':
-    root_path = 'data/new_data'
-    max_seq_len = 51
-    dim_seq = 10
-    batch_size = 1
-    char_num = 52
-    loader = get_loader(root_path, char_num, max_seq_len, dim_seq, batch_size, 'train')
-    fout = open('train_id_record_old.txt','w')
-    for idx, batch in enumerate(loader):
-        binary_fp = batch['font_id'].numpy()[0][0]
-        fout.write("%05d"%int(binary_fp) + '\n')

+# data loader for training main model
+import os
+import pickle
+import torch
+import torch.utils.data as data
+import torchvision.transforms as T
+import sys
+import numpy as np
+torch.multiprocessing.set_sharing_strategy('file_system')
+class SVGDataset(data.Dataset):
+    def __init__(self, root_path, img_size=128, lang='eng', char_num=52, max_seq_len=51, dim_seq=10,  transform=None, mode='train'):
+        super().__init__()
+        self.mode = mode
+        self.img_size = img_size
+        self.char_num = char_num
+        self.max_seq_len = max_seq_len
+        self.dim_seq = dim_seq
+        self.trans = transform
+        self.font_paths = []
+        self.dir_path = os.path.join(root_path, lang, self.mode)
+        for root, dirs, files in os.walk(self.dir_path):
+            depth = root.count('/') - self.dir_path.count('/')
+            if depth == 0:
+                for dir_name in dirs:
+                    self.font_paths.append(os.path.join(self.dir_path, dir_name))
+        self.font_paths.sort()
+        print(f"Finished loading {mode} paths, number: {str(len(self.font_paths))}")
+    def __getitem__(self, index):
+        item = {}
+        font_path = self.font_paths[index]
+        item = {}
+        item['class'] = torch.LongTensor(np.load(os.path.join(font_path, 'class.npy')))
+        item['seq_len'] = torch.LongTensor(np.load(os.path.join(font_path, 'seq_len.npy')))
+        item['sequence'] = torch.FloatTensor(np.load(os.path.join(font_path, 'sequence_relaxed.npy'))).view(self.char_num, self.max_seq_len, self.dim_seq)
+        item['pts_aux'] = torch.FloatTensor(np.load(os.path.join(font_path, 'pts_aux.npy')))
+        item['rendered'] = torch.FloatTensor(np.load(os.path.join(font_path, 'rendered_' + str(self.img_size) + '.npy'))).view(self.char_num, self.img_size, self.img_size) / 255.
+        item['rendered'] = self.trans(item['rendered'])
+        item['font_id'] = torch.FloatTensor(np.load(os.path.join(font_path, 'font_id.npy')).astype(np.float32))
+        return item
+    def __len__(self):
+        return len(self.font_paths)
+def get_loader(root_path, img_size, lang, char_num, max_seq_len, dim_seq, batch_size, mode='train'):
+    SetRange = T.Lambda(lambda X: 1. - X )  # convert [0, 1] -> [0, 1]
+    transform = T.Compose([SetRange])
+    dataset = SVGDataset(root_path, img_size, lang, char_num, max_seq_len, dim_seq, transform, mode)
+    dataloader = data.DataLoader(dataset, batch_size, shuffle=(mode == 'train'), num_workers=batch_size)
+    return dataloader
+if __name__ == '__main__':
+    root_path = 'data/new_data'
+    max_seq_len = 51
+    dim_seq = 10
+    batch_size = 1
+    char_num = 52
+    loader = get_loader(root_path, char_num, max_seq_len, dim_seq, batch_size, 'train')
+    fout = open('train_id_record_old.txt','w')
+    for idx, batch in enumerate(loader):
+        binary_fp = batch['font_id'].numpy()[0][0]
+        fout.write("%05d"%int(binary_fp) + '\n')

{font_sample → ThaiVecFont/font_sample}/Athiti-Regular.ttf RENAMED Viewed

File without changes

{font_sample → ThaiVecFont/font_sample}/SaoChingcha-Bold.otf RENAMED Viewed

File without changes

{font_sample → ThaiVecFont/font_sample}/SaoChingcha-Light.otf RENAMED Viewed

File without changes

{font_sample → ThaiVecFont/font_sample}/SaoChingcha-Regular.otf RENAMED Viewed

File without changes

generate.py → ThaiVecFont/generate.py RENAMED Viewed

@@ -1,143 +1,143 @@
-import fontTools
-import os
-import shutil
-import typing
-import PIL
-from PIL import Image, ImageDraw, ImageFont
-from data_utils.convert_ttf_to_sfd import convert_mp
-from data_utils.write_glyph_imgs import write_glyph_imgs_mp
-from data_utils.write_data_to_dirs import create_db
-from data_utils.relax_rep import relax_rep
-from test_few_shot import test_main_model
-from options import get_parser_main_model
-opts = get_parser_main_model().parse_args()
-# Config on opts
-# Inference opts
-opts.mode = "test"
-opts.language = "tha"
-opts.char_num = 44
-opts.ref_nshot = 8
-opts.batch_size = 1 # inference rule
-opts.img_size = 64
-opts.max_seq_len = 121
-opts.name_ckpt = ""
-opts.model_path = "./inference_model/950_49452.ckpt"
-opts.ref_char_ids = "0,1,2,3,4,5,6,7"
-opts.dir_res = "./inference"
-opts.data_root = "./inference/vecfont_dataset/"
-# Data preprocessing opts
-opts.data_path = './inference'
-opts.sfd_path = f'{opts.data_path}/font_sfds'
-opts.ttf_path = f'{opts.data_path}/font_ttfs'
-opts.split = "test"
-opts.debug = True # Save Image On write_glyph_imgs_mp
-opts.output_path = f'{opts.data_path}/vecfont_dataset/'
-opts.phase = 0
-opts.FONT_SIZE = 1
-opts.streamlit = True
-# Glypts ID :
-# [(0, 'A'), (1, 'B'), (2, 'C'), (3, 'D'), (4, 'E')]
-# [(5, 'F'), (6, 'G'), (7, 'H'), (8, 'I'), (9, 'J')]
-# [(10, 'K'), (11, 'L'), (12, 'M'), (13, 'N'), (14, 'O')]
-# [(15, 'P'), (16, 'Q'), (17, 'R'), (18, 'S'), (19, 'T')]
-# [(20, 'U'), (21, 'V'), (22, 'W'), (23, 'X'), (24, 'Y')]
-# [(25, 'Z'), (26, 'a'), (27, 'b'), (28, 'c'), (29, 'd')]
-# [(30, 'e'), (31, 'f'), (32, 'g'), (33, 'h'), (34, 'i')]
-# [(35, 'j'), (36, 'k'), (37, 'l'), (38, 'm'), (39, 'n')]
-# [(40, 'o'), (41, 'p'), (42, 'q'), (43, 'r'), (44, 's')]
-# [(45, 't'), (46, 'u'), (47, 'v'), (48, 'w'), (49, 'x')]
-# [(50, 'y'), (51, 'z'), (52, 'ก'), (53, 'ข'), (54, 'ฃ')]
-# [(55, 'ค'), (56, 'ฅ'), (57, 'ฆ'), (58, 'ง'), (59, 'จ')]
-# [(60, 'ฉ'), (61, 'ช'), (62, 'ซ'), (63, 'ฌ'), (64, 'ญ')]
-# [(65, 'ฎ'), (66, 'ฏ'), (67, 'ฐ'), (68, 'ฑ'), (69, 'ฒ')]
-# [(70, 'ณ'), (71, 'ด'), (72, 'ต'), (73, 'ถ'), (74, 'ท')]
-# [(75, 'ธ'), (76, 'น'), (77, 'บ'), (78, 'ป'), (79, 'ผ')]
-# [(80, 'ฝ'), (81, 'พ'), (82, 'ฟ'), (83, 'ภ'), (84, 'ม')]
-# [(85, 'ย'), (86, 'ร'), (87, 'ล'), (88, 'ว'), (89, 'ศ')]
-# [(90, 'ษ'), (91, 'ส'), (92, 'ห'), (93, 'ฬ'), (94, 'อ')]
-# [(95, 'ฮ')]
-import string
-import pythainlp
-thai_digits = [*pythainlp.thai_digits]
-thai_characters = [*pythainlp.thai_consonants]
-eng_characters = [*string.ascii_letters]
-thai_floating = [*pythainlp.thai_vowels]
-directories = [
-    "inference",
-    "inference/char_set",
-    "inference/font_sfds",
-    "inference/font_ttfs",
-    "inference/vecfont_dataset",
-    "inference/font_ttfs/tha/test",
-    ]
-# Data Preprocessing
-def preprocessing(ttf_file) -> str:
-    shutil.rmtree("inference")
-    for directory in directories:
-        os.makedirs(directory, exist_ok=True)
-    # Save File / Copy File
-    if isinstance(ttf_file, memoryview):
-        with open(f"{opts.data_path}/font_ttfs/tha/test/0000.ttf", 'wb') as f:
-            f.write(ttf_file)
-    elif isinstance(ttf_file, str):
-        shutil.copy(ttf_file, f"{opts.data_path}/font_ttfs/tha/test/0000.ttf")
-    glypts = sorted(set(thai_characters))
-    print("Glypts:",len(glypts))
-    print("".join(glypts))
-    f = open("inference/char_set/tha.txt", "w")
-    f.write("".join(glypts))
-    f.close()
-    # Preprocess Pipeline
-    convert_mp(opts)
-    write_glyph_imgs_mp(opts)
-    output_path = os.path.join(opts.output_path, opts.language, opts.split)
-    log_path = os.path.join(opts.sfd_path, opts.language, 'log')
-    if not os.path.exists(output_path):
-        os.makedirs(output_path)
-    if not os.path.exists(log_path):
-        os.makedirs(log_path)
-    create_db(opts, output_path, log_path)
-    relax_rep(opts)
-    print("Finished making a data", ttf_file)
-    print("Saved at", output_path)
-    return output_path
-def inference_model(n_samples, ref_char_ids, version):
-    opts.n_samples = n_samples
-    opts.ref_char_ids = ref_char_ids
-    # Select Model
-    if version == "TH2TH":
-        opts.model_path = "./inference_model/950_49452.ckpt"
-    elif version == "ENG2TH":
-        opts.model_path = "./inference_model/950_49452.ckpt"
-    else:
-        raise NotImplementedError
-    return test_main_model(opts)
-def ttf_to_image(ttf_file, n_samples=10, ref_char_ids="1,2,3,4,5,6,7,8", version="TH2TH"):
-    preprocessing(ttf_file) # Make Data
-    merge_svg_img = inference_model(n_samples, ref_char_ids, version) # Inference
-    return merge_svg_img
-def main():
-    print(opts.mode)
-    ttf_to_image("font_sample/SaoChingcha-Regular.otf")
-if __name__ == "__main__":
-    main()

+import fontTools
+import os
+import shutil
+import typing
+import PIL
+from PIL import Image, ImageDraw, ImageFont
+from data_utils.convert_ttf_to_sfd import convert_mp
+from data_utils.write_glyph_imgs import write_glyph_imgs_mp
+from data_utils.write_data_to_dirs import create_db
+from data_utils.relax_rep import relax_rep
+from test_few_shot import test_main_model
+from options import get_parser_main_model
+opts = get_parser_main_model().parse_args()
+# Config on opts
+# Inference opts
+opts.mode = "test"
+opts.language = "tha"
+opts.char_num = 44
+opts.ref_nshot = 8
+opts.batch_size = 1 # inference rule
+opts.img_size = 64
+opts.max_seq_len = 121
+opts.name_ckpt = ""
+opts.model_path = "./inference_model/950_49452.ckpt"
+opts.ref_char_ids = "0,1,2,3,4,5,6,7"
+opts.dir_res = "./inference"
+opts.data_root = "./inference/vecfont_dataset/"
+# Data preprocessing opts
+opts.data_path = './inference'
+opts.sfd_path = f'{opts.data_path}/font_sfds'
+opts.ttf_path = f'{opts.data_path}/font_ttfs'
+opts.split = "test"
+opts.debug = True # Save Image On write_glyph_imgs_mp
+opts.output_path = f'{opts.data_path}/vecfont_dataset/'
+opts.phase = 0
+opts.FONT_SIZE = 1
+opts.streamlit = True
+# Glypts ID :
+# [(0, 'A'), (1, 'B'), (2, 'C'), (3, 'D'), (4, 'E')]
+# [(5, 'F'), (6, 'G'), (7, 'H'), (8, 'I'), (9, 'J')]
+# [(10, 'K'), (11, 'L'), (12, 'M'), (13, 'N'), (14, 'O')]
+# [(15, 'P'), (16, 'Q'), (17, 'R'), (18, 'S'), (19, 'T')]
+# [(20, 'U'), (21, 'V'), (22, 'W'), (23, 'X'), (24, 'Y')]
+# [(25, 'Z'), (26, 'a'), (27, 'b'), (28, 'c'), (29, 'd')]
+# [(30, 'e'), (31, 'f'), (32, 'g'), (33, 'h'), (34, 'i')]
+# [(35, 'j'), (36, 'k'), (37, 'l'), (38, 'm'), (39, 'n')]
+# [(40, 'o'), (41, 'p'), (42, 'q'), (43, 'r'), (44, 's')]
+# [(45, 't'), (46, 'u'), (47, 'v'), (48, 'w'), (49, 'x')]
+# [(50, 'y'), (51, 'z'), (52, 'ก'), (53, 'ข'), (54, 'ฃ')]
+# [(55, 'ค'), (56, 'ฅ'), (57, 'ฆ'), (58, 'ง'), (59, 'จ')]
+# [(60, 'ฉ'), (61, 'ช'), (62, 'ซ'), (63, 'ฌ'), (64, 'ญ')]
+# [(65, 'ฎ'), (66, 'ฏ'), (67, 'ฐ'), (68, 'ฑ'), (69, 'ฒ')]
+# [(70, 'ณ'), (71, 'ด'), (72, 'ต'), (73, 'ถ'), (74, 'ท')]
+# [(75, 'ธ'), (76, 'น'), (77, 'บ'), (78, 'ป'), (79, 'ผ')]
+# [(80, 'ฝ'), (81, 'พ'), (82, 'ฟ'), (83, 'ภ'), (84, 'ม')]
+# [(85, 'ย'), (86, 'ร'), (87, 'ล'), (88, 'ว'), (89, 'ศ')]
+# [(90, 'ษ'), (91, 'ส'), (92, 'ห'), (93, 'ฬ'), (94, 'อ')]
+# [(95, 'ฮ')]
+import string
+import pythainlp
+thai_digits = [*pythainlp.thai_digits]
+thai_characters = [*pythainlp.thai_consonants]
+eng_characters = [*string.ascii_letters]
+thai_floating = [*pythainlp.thai_vowels]
+directories = [
+    "inference",
+    "inference/char_set",
+    "inference/font_sfds",
+    "inference/font_ttfs",
+    "inference/vecfont_dataset",
+    "inference/font_ttfs/tha/test",
+    ]
+# Data Preprocessing
+def preprocessing(ttf_file) -> str:
+    shutil.rmtree("inference")
+    for directory in directories:
+        os.makedirs(directory, exist_ok=True)
+    # Save File / Copy File
+    if isinstance(ttf_file, memoryview):
+        with open(f"{opts.data_path}/font_ttfs/tha/test/0000.ttf", 'wb') as f:
+            f.write(ttf_file)
+    elif isinstance(ttf_file, str):
+        shutil.copy(ttf_file, f"{opts.data_path}/font_ttfs/tha/test/0000.ttf")
+    glypts = sorted(set(thai_characters))
+    print("Glypts:",len(glypts))
+    print("".join(glypts))
+    f = open("inference/char_set/tha.txt", "w")
+    f.write("".join(glypts))
+    f.close()
+    # Preprocess Pipeline
+    convert_mp(opts)
+    write_glyph_imgs_mp(opts)
+    output_path = os.path.join(opts.output_path, opts.language, opts.split)
+    log_path = os.path.join(opts.sfd_path, opts.language, 'log')
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+    if not os.path.exists(log_path):
+        os.makedirs(log_path)
+    create_db(opts, output_path, log_path)
+    relax_rep(opts)
+    print("Finished making a data", ttf_file)
+    print("Saved at", output_path)
+    return output_path
+def inference_model(n_samples, ref_char_ids, version):
+    opts.n_samples = n_samples
+    opts.ref_char_ids = ref_char_ids
+    # Select Model
+    if version == "TH2TH":
+        opts.model_path = "./inference_model/950_49452.ckpt"
+    elif version == "ENG2TH":
+        opts.model_path = "./inference_model/950_49452.ckpt"
+    else:
+        raise NotImplementedError
+    return test_main_model(opts)
+def ttf_to_image(ttf_file, n_samples=10, ref_char_ids="1,2,3,4,5,6,7,8", version="TH2TH"):
+    preprocessing(ttf_file) # Make Data
+    merge_svg_img = inference_model(n_samples, ref_char_ids, version) # Inference
+    return merge_svg_img
+def main():
+    print(opts.mode)
+    ttf_to_image("font_sample/SaoChingcha-Regular.otf")
+if __name__ == "__main__":
+    main()

{inference_model → ThaiVecFont/inference_model}/950_49452.ckpt RENAMED Viewed

File without changes

{models → ThaiVecFont/models}/__init__.py RENAMED Viewed

File without changes

{models → ThaiVecFont/models}/image_decoder.py RENAMED Viewed

@@ -1,48 +1,48 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import math
-class ImageDecoder(nn.Module):
-    def __init__(self, img_size, input_nc, output_nc, ngf=16, norm_layer=nn.LayerNorm):
-        super(ImageDecoder, self).__init__()
-        n_upsampling = int(math.log(img_size, 2))
-        ks_list = [3] * (n_upsampling // 3) + [5] * (n_upsampling - n_upsampling // 3)
-        stride_list = [2] * n_upsampling
-        decoder = []
-        chn_mult = []
-        for i in range(n_upsampling):
-            chn_mult.append(2 ** (n_upsampling - i - 1))
-        decoder += [nn.ConvTranspose2d(input_nc, chn_mult[0] * ngf,
-                       kernel_size=ks_list[0], stride=stride_list[0],
-                       padding=ks_list[0] // 2, output_padding=stride_list[0]-1),
-                       norm_layer([chn_mult[0] * ngf, 2, 2]),
-                       nn.ReLU(True)]
-        for i in range(1, n_upsampling):  # add upsampling layers
-            chn_prev = chn_mult[i - 1] * ngf
-            chn_next = chn_mult[i] * ngf
-            decoder += [nn.ConvTranspose2d(chn_prev, chn_next, kernel_size=ks_list[i], stride=stride_list[i], padding=ks_list[i] // 2, output_padding=stride_list[i]-1),
-                            norm_layer([chn_next, 2 ** (i+1) , 2 ** (i+1)]),
-                            nn.ReLU(True)]
-        decoder += [nn.Conv2d(chn_mult[-1] * ngf, output_nc, kernel_size=7, padding=7 // 2)]
-        decoder += [nn.Sigmoid()]
-        self.decode = nn.Sequential(*decoder)
-    def forward(self, latent_feat, trg_char, trg_img=None):
-        """Standard forward"""
-        dec_input = torch.cat((latent_feat, trg_char),-1)
-        dec_input = dec_input.view(dec_input.size(0), dec_input.size(1), 1, 1)
-        dec_out = self.decode(dec_input)
-        output = {}
-        output['gen_imgs'] = dec_out
-        if trg_img is not None:
-            output['img_l1loss'] = F.l1_loss(dec_out, trg_img)
-        return output

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+class ImageDecoder(nn.Module):
+    def __init__(self, img_size, input_nc, output_nc, ngf=16, norm_layer=nn.LayerNorm):
+        super(ImageDecoder, self).__init__()
+        n_upsampling = int(math.log(img_size, 2))
+        ks_list = [3] * (n_upsampling // 3) + [5] * (n_upsampling - n_upsampling // 3)
+        stride_list = [2] * n_upsampling
+        decoder = []
+        chn_mult = []
+        for i in range(n_upsampling):
+            chn_mult.append(2 ** (n_upsampling - i - 1))
+        decoder += [nn.ConvTranspose2d(input_nc, chn_mult[0] * ngf,
+                       kernel_size=ks_list[0], stride=stride_list[0],
+                       padding=ks_list[0] // 2, output_padding=stride_list[0]-1),
+                       norm_layer([chn_mult[0] * ngf, 2, 2]),
+                       nn.ReLU(True)]
+        for i in range(1, n_upsampling):  # add upsampling layers
+            chn_prev = chn_mult[i - 1] * ngf
+            chn_next = chn_mult[i] * ngf
+            decoder += [nn.ConvTranspose2d(chn_prev, chn_next, kernel_size=ks_list[i], stride=stride_list[i], padding=ks_list[i] // 2, output_padding=stride_list[i]-1),
+                            norm_layer([chn_next, 2 ** (i+1) , 2 ** (i+1)]),
+                            nn.ReLU(True)]
+        decoder += [nn.Conv2d(chn_mult[-1] * ngf, output_nc, kernel_size=7, padding=7 // 2)]
+        decoder += [nn.Sigmoid()]
+        self.decode = nn.Sequential(*decoder)
+    def forward(self, latent_feat, trg_char, trg_img=None):
+        """Standard forward"""
+        dec_input = torch.cat((latent_feat, trg_char),-1)
+        dec_input = dec_input.view(dec_input.size(0), dec_input.size(1), 1, 1)
+        dec_out = self.decode(dec_input)
+        output = {}
+        output['gen_imgs'] = dec_out
+        if trg_img is not None:
+            output['img_l1loss'] = F.l1_loss(dec_out, trg_img)
+        return output

{models → ThaiVecFont/models}/image_encoder.py RENAMED Viewed

@@ -1,42 +1,42 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import math
-class ImageEncoder(nn.Module):
-    def __init__(self, img_size, input_nc, ngf=16, norm_layer=nn.LayerNorm):
-        super(ImageEncoder, self).__init__()
-        n_downsampling = int(math.log(img_size, 2))
-        ks_list = [5] * (n_downsampling - n_downsampling // 3) + [3] * (n_downsampling // 3)
-        stride_list = [2] * n_downsampling
-        chn_mult = []
-        for i in range(n_downsampling):
-            chn_mult.append(2 ** (i + 1))
-        encoder = [nn.Conv2d(input_nc, ngf, kernel_size=7, padding=7 // 2, bias=True, padding_mode='replicate'),
-                   norm_layer([ngf, 2 ** n_downsampling, 2 ** n_downsampling]),
-                   nn.ReLU(True)]
-        for i in range(n_downsampling):  # add downsampling layers
-            if i == 0:
-                chn_prev = ngf
-            else:
-                chn_prev = ngf * chn_mult[i - 1]
-            chn_next = ngf * chn_mult[i]
-            encoder += [nn.Conv2d(chn_prev, chn_next, kernel_size=ks_list[i], stride=stride_list[i], padding=ks_list[i] // 2, padding_mode='replicate'),
-                        norm_layer([chn_next, 2 ** (n_downsampling - 1 - i), 2 ** (n_downsampling - 1 - i)]),
-                        nn.ReLU(True)]
-        self.encode = nn.Sequential(*encoder)
-        self.flatten = nn.Flatten()
-    def forward(self, input):
-        """Standard forward"""
-        ret = self.encode(input)
-        img_feat = self.flatten(ret)
-        output = {}
-        output['img_feat'] = img_feat
-        return output

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+class ImageEncoder(nn.Module):
+    def __init__(self, img_size, input_nc, ngf=16, norm_layer=nn.LayerNorm):
+        super(ImageEncoder, self).__init__()
+        n_downsampling = int(math.log(img_size, 2))
+        ks_list = [5] * (n_downsampling - n_downsampling // 3) + [3] * (n_downsampling // 3)
+        stride_list = [2] * n_downsampling
+        chn_mult = []
+        for i in range(n_downsampling):
+            chn_mult.append(2 ** (i + 1))
+        encoder = [nn.Conv2d(input_nc, ngf, kernel_size=7, padding=7 // 2, bias=True, padding_mode='replicate'),
+                   norm_layer([ngf, 2 ** n_downsampling, 2 ** n_downsampling]),
+                   nn.ReLU(True)]
+        for i in range(n_downsampling):  # add downsampling layers
+            if i == 0:
+                chn_prev = ngf
+            else:
+                chn_prev = ngf * chn_mult[i - 1]
+            chn_next = ngf * chn_mult[i]
+            encoder += [nn.Conv2d(chn_prev, chn_next, kernel_size=ks_list[i], stride=stride_list[i], padding=ks_list[i] // 2, padding_mode='replicate'),
+                        norm_layer([chn_next, 2 ** (n_downsampling - 1 - i), 2 ** (n_downsampling - 1 - i)]),
+                        nn.ReLU(True)]
+        self.encode = nn.Sequential(*encoder)
+        self.flatten = nn.Flatten()
+    def forward(self, input):
+        """Standard forward"""
+        ret = self.encode(input)
+        img_feat = self.flatten(ret)
+        output = {}
+        output['img_feat'] = img_feat
+        return output

{models → ThaiVecFont/models}/modality_fusion.py RENAMED Viewed

@@ -1,64 +1,64 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import math
-from options import get_parser_main_model
-opts = get_parser_main_model().parse_args()
-def init_weights(m):
-    for name, param in m.named_parameters():
-        nn.init.uniform_(param.data, -0.08, 0.08)
-class ModalityFusion(nn.Module):
-    def __init__(self, img_size=64, ref_nshot=4, bottleneck_bits=512, ngf=32, seq_latent_dim=512, mode='train'):
-        super().__init__()
-        self.mode = mode
-        self.bottleneck_bits = bottleneck_bits
-        self.ref_nshot = ref_nshot
-        self.mode = mode
-        self.fc_merge = nn.Linear(seq_latent_dim * opts.ref_nshot, 512)
-        n_downsampling = int(math.log(img_size, 2))
-        mult_max = 2 ** (n_downsampling)
-        self.fc_fusion = nn.Linear(ngf * mult_max + seq_latent_dim, opts.bottleneck_bits * 2, bias=True) # the max multiplier for img feat channels is
-    def forward(self, seq_feat, img_feat, ref_pad_mask=None):
-        cls_one_pad = torch.ones((1,1,1)).to(seq_feat.device).repeat(seq_feat.size(0),1,1)
-        ref_pad_mask = torch.cat([cls_one_pad,ref_pad_mask],dim=-1)
-        seq_feat = seq_feat * (ref_pad_mask.transpose(1, 2))
-        seq_feat_ = seq_feat.view(seq_feat.size(0) // self.ref_nshot, self.ref_nshot,seq_feat.size(-2) , seq_feat.size(-1))
-        seq_feat_ = seq_feat_.transpose(1, 2)
-        seq_feat_ = seq_feat_.contiguous().view(seq_feat_.size(0), seq_feat_.size(1), seq_feat_.size(2) * seq_feat_.size(3))
-        seq_feat_ = self.fc_merge(seq_feat_)
-        seq_feat_cls = seq_feat_[:, 0]
-        feat_cat = torch.cat((img_feat, seq_feat_cls),-1)
-        dist_param = self.fc_fusion(feat_cat)
-        output = {}
-        mu = dist_param[..., :self.bottleneck_bits]
-        log_sigma = dist_param[..., self.bottleneck_bits:]
-        if self.mode == 'train':
-            # calculate the kl loss and reparamerize latent code
-            epsilon = torch.randn(*mu.size(), device=mu.device)
-            z = mu + torch.exp(log_sigma / 2) * epsilon
-            kl = 0.5 * torch.mean(torch.exp(log_sigma) + torch.square(mu) - 1. - log_sigma)
-            output['latent'] = z
-            output['kl_loss'] = kl
-            seq_feat_[:, 0] = z
-            latent_feat_seq = seq_feat_
-        else:
-            output['latent'] = mu
-            output['kl_loss'] = 0.0
-            seq_feat_[:, 0] = mu
-            latent_feat_seq = seq_feat_
-        return output, latent_feat_seq

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+from options import get_parser_main_model
+opts = get_parser_main_model().parse_args()
+def init_weights(m):
+    for name, param in m.named_parameters():
+        nn.init.uniform_(param.data, -0.08, 0.08)
+class ModalityFusion(nn.Module):
+    def __init__(self, img_size=64, ref_nshot=4, bottleneck_bits=512, ngf=32, seq_latent_dim=512, mode='train'):
+        super().__init__()
+        self.mode = mode
+        self.bottleneck_bits = bottleneck_bits
+        self.ref_nshot = ref_nshot
+        self.mode = mode
+        self.fc_merge = nn.Linear(seq_latent_dim * opts.ref_nshot, 512)
+        n_downsampling = int(math.log(img_size, 2))
+        mult_max = 2 ** (n_downsampling)
+        self.fc_fusion = nn.Linear(ngf * mult_max + seq_latent_dim, opts.bottleneck_bits * 2, bias=True) # the max multiplier for img feat channels is
+    def forward(self, seq_feat, img_feat, ref_pad_mask=None):
+        cls_one_pad = torch.ones((1,1,1)).to(seq_feat.device).repeat(seq_feat.size(0),1,1)
+        ref_pad_mask = torch.cat([cls_one_pad,ref_pad_mask],dim=-1)
+        seq_feat = seq_feat * (ref_pad_mask.transpose(1, 2))
+        seq_feat_ = seq_feat.view(seq_feat.size(0) // self.ref_nshot, self.ref_nshot,seq_feat.size(-2) , seq_feat.size(-1))
+        seq_feat_ = seq_feat_.transpose(1, 2)
+        seq_feat_ = seq_feat_.contiguous().view(seq_feat_.size(0), seq_feat_.size(1), seq_feat_.size(2) * seq_feat_.size(3))
+        seq_feat_ = self.fc_merge(seq_feat_)
+        seq_feat_cls = seq_feat_[:, 0]
+        feat_cat = torch.cat((img_feat, seq_feat_cls),-1)
+        dist_param = self.fc_fusion(feat_cat)
+        output = {}
+        mu = dist_param[..., :self.bottleneck_bits]
+        log_sigma = dist_param[..., self.bottleneck_bits:]
+        if self.mode == 'train':
+            # calculate the kl loss and reparamerize latent code
+            epsilon = torch.randn(*mu.size(), device=mu.device)
+            z = mu + torch.exp(log_sigma / 2) * epsilon
+            kl = 0.5 * torch.mean(torch.exp(log_sigma) + torch.square(mu) - 1. - log_sigma)
+            output['latent'] = z
+            output['kl_loss'] = kl
+            seq_feat_[:, 0] = z
+            latent_feat_seq = seq_feat_
+        else:
+            output['latent'] = mu
+            output['kl_loss'] = 0.0
+            seq_feat_[:, 0] = mu
+            latent_feat_seq = seq_feat_
+        return output, latent_feat_seq

{models → ThaiVecFont/models}/model_main.py RENAMED Viewed

@@ -1,212 +1,212 @@
-from models.image_encoder import ImageEncoder
-from models.image_decoder import ImageDecoder
-from models.modality_fusion import ModalityFusion
-from models.vgg_perceptual_loss import VGGPerceptualLoss
-from models.transformers import *
-from torch.autograd import Variable
-class ModelMain(nn.Module):
-    def __init__(self, opts, mode='train'):
-        super().__init__()
-        self.opts = opts
-        self.img_encoder = ImageEncoder(img_size=opts.img_size, input_nc=opts.ref_nshot, ngf=opts.ngf, norm_layer=nn.LayerNorm)
-        self.img_decoder = ImageDecoder(img_size=opts.img_size, input_nc=opts.bottleneck_bits + opts.char_num, output_nc=1, ngf=opts.ngf, norm_layer=nn.LayerNorm)
-        self.vggptlossfunc = VGGPerceptualLoss()
-        self.modality_fusion = ModalityFusion(img_size=opts.img_size, ref_nshot=opts.ref_nshot, bottleneck_bits=opts.bottleneck_bits, ngf=opts.ngf, mode=opts.mode)
-        self.transformer_main = Transformer(
-            input_channels = 1,
-            input_axis = 2,              # number of axis for input data (2 for images, 3 for video)
-            num_freq_bands = 6,          # number of freq bands, with original value (2 * K + 1)
-            max_freq = 10.,              # maximum frequency, hyperparameter depending on how fine the data is
-            depth = 6,                   # depth of net. The shape of the final attention mechanism will be:
-                                         # depth * (cross attention -> self_per_cross_attn * self attention)
-            num_latents = 256,           # number of latents, or induced set points, or centroids. different papers giving it different names
-            latent_dim = opts.dim_seq_latent,            # latent dimension
-            cross_heads = 1,             # number of heads for cross attention. paper said 1
-            latent_heads = 8,            # number of heads for latent self attention, 8
-            cross_dim_head = 64,         # number of dimensions per cross attention head
-            latent_dim_head = 64,        # number of dimensions per latent self attention head
-            num_classes = 1000,          # output number of classes
-            attn_dropout = 0.,
-            ff_dropout = 0.,
-            weight_tie_layers = False,   # whether to weight tie layers (optional, as indicated in the diagram)
-            fourier_encode_data = True,  # whether to auto-fourier encode the data, using the input_axis given. defaults to True, but can be turned off if you are fourier encoding the data yourself
-            self_per_cross_attn = 2      # number of self attention blocks per cross attention
-            )
-        self.transformer_seqdec = Transformer_decoder()
-    def forward(self, data, mode='train'):
-        imgs, seqs, scalars = self.fetch_data(data, mode)
-        ref_img, trg_img = imgs
-        ref_seq, ref_seq_cat, ref_pad_mask, trg_seq, trg_seq_gt, trg_seq_shifted, trg_pts_aux = seqs
-        trg_char_onehot, trg_cls, trg_seqlen = scalars
-        # image encoding
-        img_encoder_out = self.img_encoder(ref_img)
-        img_feat = img_encoder_out['img_feat'] # bs, ngf * (2 ** 6)
-        # seq encoding
-        ref_img_ = ref_img.view(ref_img.size(0) * ref_img.size(1), ref_img.size(2), ref_img.size(3)).unsqueeze(-1) # [max_seq_len, n_bs * n_ref, 9]
-        seq_feat, _ = self.transformer_main(ref_img_, ref_seq_cat, mask=ref_pad_mask) # [n_bs * n_ref, max_seq_len + 1, 9]
-        # modality funsion
-        mf_output, latent_feat_seq = self.modality_fusion(seq_feat, img_feat, ref_pad_mask=ref_pad_mask)
-        latent_feat_seq = self.transformer_main.att_residual(latent_feat_seq) # [n_bs, max_seq_len + 1, bottleneck_bits]
-        z = mf_output['latent']
-        kl_loss = mf_output['kl_loss']
-        # image decoding
-        img_decoder_out = self.img_decoder(z, trg_char_onehot, trg_img)
-        ret_dict = {}
-        loss_dict = {}
-        ret_dict['img'] = {}
-        ret_dict['img']['out'] = img_decoder_out['gen_imgs']
-        ret_dict['img']['ref'] = ref_img
-        ret_dict['img']['trg'] = trg_img
-        if mode in {'train', 'val'}:
-            # seq decoding (training or val mode)
-            tgt_mask = Variable(subsequent_mask(self.opts.max_seq_len).type_as(ref_pad_mask.data)).unsqueeze(0).expand(z.size(0), -1, -1, -1).cuda().float()
-            command_logits, args_logits, attn = self.transformer_seqdec(x=trg_seq_shifted, memory=latent_feat_seq, trg_char=trg_cls, tgt_mask=tgt_mask)
-            command_logits_2, args_logits_2 = self.transformer_seqdec.parallel_decoder(command_logits, args_logits, memory=latent_feat_seq.detach(), trg_char=trg_cls)
-            total_loss = self.transformer_main.loss(command_logits, args_logits,trg_seq, trg_seqlen, trg_pts_aux)
-            total_loss_parallel = self.transformer_main.loss(command_logits_2, args_logits_2, trg_seq, trg_seqlen, trg_pts_aux)
-            vggpt_loss = self.vggptlossfunc(img_decoder_out['gen_imgs'], trg_img)
-            # loss and output
-            loss_svg_items = ['total', 'cmd', 'args', 'smt', 'aux']
-            # for image
-            loss_dict['img'] = {}
-            loss_dict['img']['l1'] = img_decoder_out['img_l1loss']
-            loss_dict['img']['vggpt'] = vggpt_loss['pt_c_loss']
-            # for latent
-            loss_dict['kl'] = kl_loss
-            # for svg
-            loss_dict['svg'] = {}
-            loss_dict['svg_para'] = {}
-            for item in loss_svg_items:
-                loss_dict['svg'][item] = total_loss[f'loss_{item}']
-                loss_dict['svg_para'][item] = total_loss_parallel[f'loss_{item}']
-        else: # testing (inference)
-            trg_len = trg_seq_shifted.size(0)
-            sampled_svg = torch.zeros(1, trg_seq.size(1), self.opts.dim_seq_short).cuda()
-            for t in range(0, trg_len):
-                tgt_mask = Variable(subsequent_mask(sampled_svg.size(0)).type_as(ref_seq_cat.data)).unsqueeze(0).expand(sampled_svg.size(1), -1, -1, -1).cuda().float()
-                command_logits, args_logits, attn = self.transformer_seqdec(x=sampled_svg, memory=latent_feat_seq, trg_char=trg_cls, tgt_mask=tgt_mask)
-                prob_comand = F.softmax(command_logits[:, -1, :], -1)
-                prob_args = F.softmax(args_logits[:, -1, :], -1)
-                next_command = torch.argmax(prob_comand, -1).unsqueeze(-1)
-                next_args = torch.argmax(prob_args, -1)
-                predict_tmp = torch.cat((next_command, next_args),-1).unsqueeze(1).transpose(0,1)
-                sampled_svg = torch.cat((sampled_svg, predict_tmp), dim=0)
-            sampled_svg =  sampled_svg[1:]
-            cmd2 = sampled_svg[:,:,0].unsqueeze(-1)
-            arg2 = sampled_svg[:,:,1:]
-            command_logits_2, args_logits_2 = self.transformer_seqdec.parallel_decoder(cmd_logits=cmd2, args_logits=arg2, memory=latent_feat_seq, trg_char=trg_cls)
-            prob_comand = F.softmax(command_logits_2,-1)
-            prob_args = F.softmax(args_logits_2,-1)
-            update_command = torch.argmax(prob_comand,-1).unsqueeze(-1)
-            update_args = torch.argmax(prob_args,-1)
-            sampled_svg_parralel = torch.cat((update_command, update_args),-1).transpose(0,1)
-            commands1 = F.one_hot(sampled_svg[:,:,:1].long(), 4).squeeze().transpose(0, 1)
-            args1 = denumericalize(sampled_svg[:,:,1:]).transpose(0,1)
-            sampled_svg_1 = torch.cat([commands1.cpu().detach(),args1[:, :, 2:].cpu().detach()],dim =-1)
-            commands2 = F.one_hot(sampled_svg_parralel[:, :, :1].long(), 4).squeeze().transpose(0, 1)
-            args2 = denumericalize(sampled_svg_parralel[:, :, 1:]).transpose(0,1)
-            sampled_svg_2 = torch.cat([commands2.cpu().detach(),args2[:, :, 2:].cpu().detach()], dim =-1)
-            ret_dict['svg'] = {}
-            ret_dict['svg']['sampled_1'] = sampled_svg_1
-            ret_dict['svg']['sampled_2'] = sampled_svg_2
-            ret_dict['svg']['trg'] = trg_seq_gt
-        return ret_dict, loss_dict
-    def fetch_data(self, data, mode):
-        input_image = data['rendered'] # [bs, opts.char_num, opts.img_size, opts.img_size]
-        input_sequence = data['sequence'] #  [bs, opts.char_num, opts.max_seq_len]
-        input_seqlen = data['seq_len']
-        input_seqlen = input_seqlen + 1
-        input_pts_aux = data['pts_aux']
-        arg_quant = numericalize(input_sequence[:, :, :, 4:])
-        cmd_cls = torch.argmax(input_sequence[:, :, :, :4], dim=-1).unsqueeze(-1)
-        input_sequence = torch.cat([cmd_cls, arg_quant], dim=-1) # 1 + 8 = 9 dimension
-        # choose reference classes and target classes
-        if mode == 'train':
-            ref_cls = torch.randint(0, self.opts.char_num, (input_image.size(0), self.opts.ref_nshot)).cuda()
-            if opts.ref_nshot == 52: # For ENG to TH
-                ref_cls_upper = torch.randint(0, 26, (input_image.size(0), self.opts.ref_nshot // 2)).cuda()
-                ref_cls_lower = torch.randint(26, 52, (input_image.size(0), self.opts.ref_nshot // 2)).cuda()
-                ref_cls = torch.cat((ref_cls_upper, ref_cls_lower), -1)
-        elif mode == 'val':
-            ref_cls = torch.arange(0, self.opts.ref_nshot, 1).cuda().unsqueeze(0).expand(input_image.size(0), -1)
-        else:
-            ref_ids = self.opts.ref_char_ids.split(',')
-            ref_ids = list(map(int, ref_ids))
-            assert len(ref_ids) == self.opts.ref_nshot
-            ref_cls = torch.tensor(ref_ids).cuda().unsqueeze(0).expand(self.opts.char_num, -1)
-        if mode in {'train', 'val'}:
-            trg_cls = torch.randint(0, self.opts.char_num, (input_image.size(0), 1)).cuda()
-            if opts.ref_nshot == 52:
-                trg_cls = torch.randint(52, opts.char_num, (input_image.size(0), 1)).cuda()
-        else:
-            trg_cls = torch.arange(0, self.opts.char_num).cuda()
-            if opts.ref_nshot == 52:
-                trg_cls = torch.randint(52, opts.char_num, (input_image.size(0), 1)).cuda()
-            trg_cls = trg_cls.view(self.opts.char_num, 1)
-            input_image = input_image.expand(self.opts.char_num, -1, -1, -1)
-            input_sequence = input_sequence.expand(self.opts.char_num, -1, -1, -1)
-            input_pts_aux = input_pts_aux.expand(self.opts.char_num, -1, -1, -1)
-            input_seqlen = input_seqlen.expand(self.opts.char_num, -1, -1)
-        ref_img = util_funcs.select_imgs(input_image, ref_cls, self.opts)
-        # select a target glyph image
-        trg_img = util_funcs.select_imgs(input_image, trg_cls, self.opts)
-        # randomly select ref vector glyphs
-        ref_seq = util_funcs.select_seqs(input_sequence, ref_cls, self.opts, self.opts.dim_seq_short) # [opts.batch_size, opts.ref_nshot, opts.max_seq_len, opts.dim_seq_nmr]
-        # randomly select a target vector glyph
-        trg_seq = util_funcs.select_seqs(input_sequence, trg_cls, self.opts, self.opts.dim_seq_short)
-        trg_seq = trg_seq.squeeze(1)
-        trg_pts_aux = util_funcs.select_seqs(input_pts_aux, trg_cls, self.opts, opts.n_aux_pts)
-        trg_pts_aux = trg_pts_aux.squeeze(1)
-        # the one-hot target char class
-        trg_char_onehot = util_funcs.trgcls_to_onehot(trg_cls, self.opts)
-        # shift target sequence
-        trg_seq_gt = trg_seq.clone().detach()
-        trg_seq_gt = torch.cat((trg_seq_gt[:, :, :1], trg_seq_gt[:, :, 3:]), -1)
-        trg_seq = trg_seq.transpose(0, 1)
-        trg_seq_shifted = util_funcs.shift_right(trg_seq)
-        ref_seq_cat = ref_seq.view(ref_seq.size(0) * ref_seq.size(1), ref_seq.size(2), ref_seq.size(3))
-        ref_seq_cat = ref_seq_cat.transpose(0,1)
-        ref_seqlen = util_funcs.select_seqlens(input_seqlen, ref_cls, self.opts)
-        ref_seqlen_cat = ref_seqlen.view(ref_seqlen.size(0) * ref_seqlen.size(1), ref_seqlen.size(2))
-        ref_pad_mask = torch.zeros(ref_seqlen_cat.size(0), self.opts.max_seq_len) # value = 1 means pos to be masked
-        for i in range(ref_seqlen_cat.size(0)):
-            ref_pad_mask[i,:ref_seqlen_cat[i]] = 1
-        ref_pad_mask = ref_pad_mask.cuda().float().unsqueeze(1)
-        trg_seqlen = util_funcs.select_seqlens(input_seqlen, trg_cls, self.opts)
-        trg_seqlen = trg_seqlen.squeeze()
         return [ref_img, trg_img], [ref_seq, ref_seq_cat, ref_pad_mask, trg_seq, trg_seq_gt, trg_seq_shifted, trg_pts_aux], [trg_char_onehot, trg_cls, trg_seqlen]

+from models.image_encoder import ImageEncoder
+from models.image_decoder import ImageDecoder
+from models.modality_fusion import ModalityFusion
+from models.vgg_perceptual_loss import VGGPerceptualLoss
+from models.transformers import *
+from torch.autograd import Variable
+class ModelMain(nn.Module):
+    def __init__(self, opts, mode='train'):
+        super().__init__()
+        self.opts = opts
+        self.img_encoder = ImageEncoder(img_size=opts.img_size, input_nc=opts.ref_nshot, ngf=opts.ngf, norm_layer=nn.LayerNorm)
+        self.img_decoder = ImageDecoder(img_size=opts.img_size, input_nc=opts.bottleneck_bits + opts.char_num, output_nc=1, ngf=opts.ngf, norm_layer=nn.LayerNorm)
+        self.vggptlossfunc = VGGPerceptualLoss()
+        self.modality_fusion = ModalityFusion(img_size=opts.img_size, ref_nshot=opts.ref_nshot, bottleneck_bits=opts.bottleneck_bits, ngf=opts.ngf, mode=opts.mode)
+        self.transformer_main = Transformer(
+            input_channels = 1,
+            input_axis = 2,              # number of axis for input data (2 for images, 3 for video)
+            num_freq_bands = 6,          # number of freq bands, with original value (2 * K + 1)
+            max_freq = 10.,              # maximum frequency, hyperparameter depending on how fine the data is
+            depth = 6,                   # depth of net. The shape of the final attention mechanism will be:
+                                         # depth * (cross attention -> self_per_cross_attn * self attention)
+            num_latents = 256,           # number of latents, or induced set points, or centroids. different papers giving it different names
+            latent_dim = opts.dim_seq_latent,            # latent dimension
+            cross_heads = 1,             # number of heads for cross attention. paper said 1
+            latent_heads = 8,            # number of heads for latent self attention, 8
+            cross_dim_head = 64,         # number of dimensions per cross attention head
+            latent_dim_head = 64,        # number of dimensions per latent self attention head
+            num_classes = 1000,          # output number of classes
+            attn_dropout = 0.,
+            ff_dropout = 0.,
+            weight_tie_layers = False,   # whether to weight tie layers (optional, as indicated in the diagram)
+            fourier_encode_data = True,  # whether to auto-fourier encode the data, using the input_axis given. defaults to True, but can be turned off if you are fourier encoding the data yourself
+            self_per_cross_attn = 2      # number of self attention blocks per cross attention
+            )
+        self.transformer_seqdec = Transformer_decoder()
+    def forward(self, data, mode='train'):
+        imgs, seqs, scalars = self.fetch_data(data, mode)
+        ref_img, trg_img = imgs
+        ref_seq, ref_seq_cat, ref_pad_mask, trg_seq, trg_seq_gt, trg_seq_shifted, trg_pts_aux = seqs
+        trg_char_onehot, trg_cls, trg_seqlen = scalars
+        # image encoding
+        img_encoder_out = self.img_encoder(ref_img)
+        img_feat = img_encoder_out['img_feat'] # bs, ngf * (2 ** 6)
+        # seq encoding
+        ref_img_ = ref_img.view(ref_img.size(0) * ref_img.size(1), ref_img.size(2), ref_img.size(3)).unsqueeze(-1) # [max_seq_len, n_bs * n_ref, 9]
+        seq_feat, _ = self.transformer_main(ref_img_, ref_seq_cat, mask=ref_pad_mask) # [n_bs * n_ref, max_seq_len + 1, 9]
+        # modality funsion
+        mf_output, latent_feat_seq = self.modality_fusion(seq_feat, img_feat, ref_pad_mask=ref_pad_mask)
+        latent_feat_seq = self.transformer_main.att_residual(latent_feat_seq) # [n_bs, max_seq_len + 1, bottleneck_bits]
+        z = mf_output['latent']
+        kl_loss = mf_output['kl_loss']
+        # image decoding
+        img_decoder_out = self.img_decoder(z, trg_char_onehot, trg_img)
+        ret_dict = {}
+        loss_dict = {}
+        ret_dict['img'] = {}
+        ret_dict['img']['out'] = img_decoder_out['gen_imgs']
+        ret_dict['img']['ref'] = ref_img
+        ret_dict['img']['trg'] = trg_img
+        if mode in {'train', 'val'}:
+            # seq decoding (training or val mode)
+            tgt_mask = Variable(subsequent_mask(self.opts.max_seq_len).type_as(ref_pad_mask.data)).unsqueeze(0).expand(z.size(0), -1, -1, -1).cuda().float()
+            command_logits, args_logits, attn = self.transformer_seqdec(x=trg_seq_shifted, memory=latent_feat_seq, trg_char=trg_cls, tgt_mask=tgt_mask)
+            command_logits_2, args_logits_2 = self.transformer_seqdec.parallel_decoder(command_logits, args_logits, memory=latent_feat_seq.detach(), trg_char=trg_cls)
+            total_loss = self.transformer_main.loss(command_logits, args_logits,trg_seq, trg_seqlen, trg_pts_aux)
+            total_loss_parallel = self.transformer_main.loss(command_logits_2, args_logits_2, trg_seq, trg_seqlen, trg_pts_aux)
+            vggpt_loss = self.vggptlossfunc(img_decoder_out['gen_imgs'], trg_img)
+            # loss and output
+            loss_svg_items = ['total', 'cmd', 'args', 'smt', 'aux']
+            # for image
+            loss_dict['img'] = {}
+            loss_dict['img']['l1'] = img_decoder_out['img_l1loss']
+            loss_dict['img']['vggpt'] = vggpt_loss['pt_c_loss']
+            # for latent
+            loss_dict['kl'] = kl_loss
+            # for svg
+            loss_dict['svg'] = {}
+            loss_dict['svg_para'] = {}
+            for item in loss_svg_items:
+                loss_dict['svg'][item] = total_loss[f'loss_{item}']
+                loss_dict['svg_para'][item] = total_loss_parallel[f'loss_{item}']
+        else: # testing (inference)
+            trg_len = trg_seq_shifted.size(0)
+            sampled_svg = torch.zeros(1, trg_seq.size(1), self.opts.dim_seq_short).cuda()
+            for t in range(0, trg_len):
+                tgt_mask = Variable(subsequent_mask(sampled_svg.size(0)).type_as(ref_seq_cat.data)).unsqueeze(0).expand(sampled_svg.size(1), -1, -1, -1).cuda().float()
+                command_logits, args_logits, attn = self.transformer_seqdec(x=sampled_svg, memory=latent_feat_seq, trg_char=trg_cls, tgt_mask=tgt_mask)
+                prob_comand = F.softmax(command_logits[:, -1, :], -1)
+                prob_args = F.softmax(args_logits[:, -1, :], -1)
+                next_command = torch.argmax(prob_comand, -1).unsqueeze(-1)
+                next_args = torch.argmax(prob_args, -1)
+                predict_tmp = torch.cat((next_command, next_args),-1).unsqueeze(1).transpose(0,1)
+                sampled_svg = torch.cat((sampled_svg, predict_tmp), dim=0)
+            sampled_svg =  sampled_svg[1:]
+            cmd2 = sampled_svg[:,:,0].unsqueeze(-1)
+            arg2 = sampled_svg[:,:,1:]
+            command_logits_2, args_logits_2 = self.transformer_seqdec.parallel_decoder(cmd_logits=cmd2, args_logits=arg2, memory=latent_feat_seq, trg_char=trg_cls)
+            prob_comand = F.softmax(command_logits_2,-1)
+            prob_args = F.softmax(args_logits_2,-1)
+            update_command = torch.argmax(prob_comand,-1).unsqueeze(-1)
+            update_args = torch.argmax(prob_args,-1)
+            sampled_svg_parralel = torch.cat((update_command, update_args),-1).transpose(0,1)
+            commands1 = F.one_hot(sampled_svg[:,:,:1].long(), 4).squeeze().transpose(0, 1)
+            args1 = denumericalize(sampled_svg[:,:,1:]).transpose(0,1)
+            sampled_svg_1 = torch.cat([commands1.cpu().detach(),args1[:, :, 2:].cpu().detach()],dim =-1)
+            commands2 = F.one_hot(sampled_svg_parralel[:, :, :1].long(), 4).squeeze().transpose(0, 1)
+            args2 = denumericalize(sampled_svg_parralel[:, :, 1:]).transpose(0,1)
+            sampled_svg_2 = torch.cat([commands2.cpu().detach(),args2[:, :, 2:].cpu().detach()], dim =-1)
+            ret_dict['svg'] = {}
+            ret_dict['svg']['sampled_1'] = sampled_svg_1
+            ret_dict['svg']['sampled_2'] = sampled_svg_2
+            ret_dict['svg']['trg'] = trg_seq_gt
+        return ret_dict, loss_dict
+    def fetch_data(self, data, mode):
+        input_image = data['rendered'] # [bs, opts.char_num, opts.img_size, opts.img_size]
+        input_sequence = data['sequence'] #  [bs, opts.char_num, opts.max_seq_len]
+        input_seqlen = data['seq_len']
+        input_seqlen = input_seqlen + 1
+        input_pts_aux = data['pts_aux']
+        arg_quant = numericalize(input_sequence[:, :, :, 4:])
+        cmd_cls = torch.argmax(input_sequence[:, :, :, :4], dim=-1).unsqueeze(-1)
+        input_sequence = torch.cat([cmd_cls, arg_quant], dim=-1) # 1 + 8 = 9 dimension
+        # choose reference classes and target classes
+        if mode == 'train':
+            ref_cls = torch.randint(0, self.opts.char_num, (input_image.size(0), self.opts.ref_nshot)).cuda()
+            if opts.ref_nshot == 52: # For ENG to TH
+                ref_cls_upper = torch.randint(0, 26, (input_image.size(0), self.opts.ref_nshot // 2)).cuda()
+                ref_cls_lower = torch.randint(26, 52, (input_image.size(0), self.opts.ref_nshot // 2)).cuda()
+                ref_cls = torch.cat((ref_cls_upper, ref_cls_lower), -1)
+        elif mode == 'val':
+            ref_cls = torch.arange(0, self.opts.ref_nshot, 1).cuda().unsqueeze(0).expand(input_image.size(0), -1)
+        else:
+            ref_ids = self.opts.ref_char_ids.split(',')
+            ref_ids = list(map(int, ref_ids))
+            assert len(ref_ids) == self.opts.ref_nshot
+            ref_cls = torch.tensor(ref_ids).cuda().unsqueeze(0).expand(self.opts.char_num, -1)
+        if mode in {'train', 'val'}:
+            trg_cls = torch.randint(0, self.opts.char_num, (input_image.size(0), 1)).cuda()
+            if opts.ref_nshot == 52:
+                trg_cls = torch.randint(52, opts.char_num, (input_image.size(0), 1)).cuda()
+        else:
+            trg_cls = torch.arange(0, self.opts.char_num).cuda()
+            if opts.ref_nshot == 52:
+                trg_cls = torch.randint(52, opts.char_num, (input_image.size(0), 1)).cuda()
+            trg_cls = trg_cls.view(self.opts.char_num, 1)
+            input_image = input_image.expand(self.opts.char_num, -1, -1, -1)
+            input_sequence = input_sequence.expand(self.opts.char_num, -1, -1, -1)
+            input_pts_aux = input_pts_aux.expand(self.opts.char_num, -1, -1, -1)
+            input_seqlen = input_seqlen.expand(self.opts.char_num, -1, -1)
+        ref_img = util_funcs.select_imgs(input_image, ref_cls, self.opts)
+        # select a target glyph image
+        trg_img = util_funcs.select_imgs(input_image, trg_cls, self.opts)
+        # randomly select ref vector glyphs
+        ref_seq = util_funcs.select_seqs(input_sequence, ref_cls, self.opts, self.opts.dim_seq_short) # [opts.batch_size, opts.ref_nshot, opts.max_seq_len, opts.dim_seq_nmr]
+        # randomly select a target vector glyph
+        trg_seq = util_funcs.select_seqs(input_sequence, trg_cls, self.opts, self.opts.dim_seq_short)
+        trg_seq = trg_seq.squeeze(1)
+        trg_pts_aux = util_funcs.select_seqs(input_pts_aux, trg_cls, self.opts, opts.n_aux_pts)
+        trg_pts_aux = trg_pts_aux.squeeze(1)
+        # the one-hot target char class
+        trg_char_onehot = util_funcs.trgcls_to_onehot(trg_cls, self.opts)
+        # shift target sequence
+        trg_seq_gt = trg_seq.clone().detach()
+        trg_seq_gt = torch.cat((trg_seq_gt[:, :, :1], trg_seq_gt[:, :, 3:]), -1)
+        trg_seq = trg_seq.transpose(0, 1)
+        trg_seq_shifted = util_funcs.shift_right(trg_seq)
+        ref_seq_cat = ref_seq.view(ref_seq.size(0) * ref_seq.size(1), ref_seq.size(2), ref_seq.size(3))
+        ref_seq_cat = ref_seq_cat.transpose(0,1)
+        ref_seqlen = util_funcs.select_seqlens(input_seqlen, ref_cls, self.opts)
+        ref_seqlen_cat = ref_seqlen.view(ref_seqlen.size(0) * ref_seqlen.size(1), ref_seqlen.size(2))
+        ref_pad_mask = torch.zeros(ref_seqlen_cat.size(0), self.opts.max_seq_len) # value = 1 means pos to be masked
+        for i in range(ref_seqlen_cat.size(0)):
+            ref_pad_mask[i,:ref_seqlen_cat[i]] = 1
+        ref_pad_mask = ref_pad_mask.cuda().float().unsqueeze(1)
+        trg_seqlen = util_funcs.select_seqlens(input_seqlen, trg_cls, self.opts)
+        trg_seqlen = trg_seqlen.squeeze()
         return [ref_img, trg_img], [ref_seq, ref_seq_cat, ref_pad_mask, trg_seq, trg_seq_gt, trg_seq_shifted, trg_pts_aux], [trg_char_onehot, trg_cls, trg_seqlen]

{models → ThaiVecFont/models}/pos_enc.py RENAMED Viewed

@@ -1,21 +1,21 @@
-class PositionalEncoding(nn.Module):
-    def __init__(self, d_model, dropout=0.1, max_len=5000):
-        super(PositionalEncoding, self).__init__()
-        self.dropout = nn.Dropout(p=dropout)
-        pe = torch.zeros(max_len, d_model) # [max_len, d_model]
-        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) # [max_len, 1]
-        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
-        # [d_model/2]
-        pe[:, 0::2] = torch.sin(position * div_term) # [max_len, d_model/2]
-        pe[:, 1::2] = torch.cos(position * div_term)
-        pe = pe.unsqueeze(0).transpose(0, 1)  # 1,51,512    -->  [51, 1, d_model]
-        self.register_buffer('pe', pe)
-    def forward(self, x):
-        """
-        :param x: [x_len, batch_size, emb_size]
-        :return: [x_len, batch_size, emb_size]
-        """
-        x = x + self.pe[:x.size(0), :]  # [x_len, batch_size, d_model]
         return self.dropout(x)

+class PositionalEncoding(nn.Module):
+    def __init__(self, d_model, dropout=0.1, max_len=5000):
+        super(PositionalEncoding, self).__init__()
+        self.dropout = nn.Dropout(p=dropout)
+        pe = torch.zeros(max_len, d_model) # [max_len, d_model]
+        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) # [max_len, 1]
+        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
+        # [d_model/2]
+        pe[:, 0::2] = torch.sin(position * div_term) # [max_len, d_model/2]
+        pe[:, 1::2] = torch.cos(position * div_term)
+        pe = pe.unsqueeze(0).transpose(0, 1)  # 1,51,512    -->  [51, 1, d_model]
+        self.register_buffer('pe', pe)
+    def forward(self, x):
+        """
+        :param x: [x_len, batch_size, emb_size]
+        :return: [x_len, batch_size, emb_size]
+        """
+        x = x + self.pe[:x.size(0), :]  # [x_len, batch_size, d_model]
         return self.dropout(x)

{models → ThaiVecFont/models}/transformers.py RENAMED Viewed

@@ -1,711 +1,711 @@
-from math import pi, log
-from functools import wraps
-from multiprocessing import context
-from textwrap import indent
-import models.util_funcs as util_funcs
-import math, copy
-import numpy as np
-import torch
-from torch import nn, einsum
-import torch.nn.functional as F
-from einops import rearrange, repeat
-from einops.layers.torch import Reduce
-import pdb
-from einops.layers.torch import Rearrange
-from options import get_parser_main_model
-opts = get_parser_main_model().parse_args()
-class PositionalEncoding(nn.Module):
-    def __init__(self, d_model, dropout=0.1, max_len=5000):
-        super(PositionalEncoding, self).__init__()
-        self.dropout = nn.Dropout(p=dropout)
-        pe = torch.zeros(max_len, d_model)
-        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
-        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
-        pe[:, 0::2] = torch.sin(position * div_term)
-        pe[:, 1::2] = torch.cos(position * div_term)
-        pe = pe.unsqueeze(0).transpose(0, 1)
-        self.register_buffer('pe', pe)
-    def forward(self, x):
-        """
-        :param x: [x_len, batch_size, emb_size]
-        :return: [x_len, batch_size, emb_size]
-        """
-        x = x + self.pe[:x.size(0), :].to(x.device)
-        return self.dropout(x)
-def exists(val):
-    return val is not None
-def default(val, d):
-    return val if exists(val) else d
-def cache_fn(f):
-    cache = dict()
-    @wraps(f)
-    def cached_fn(*args, _cache = True, key = None, **kwargs):
-        if not _cache:
-            return f(*args, **kwargs)
-        nonlocal cache
-        if key in cache:
-            return cache[key]
-        result = f(*args, **kwargs)
-        cache[key] = result
-        return result
-    return cached_fn
-def fourier_encode(x, max_freq, num_bands = 4):
-    '''
-    x: ([64, 64, 2, 1]) is between [-1,1]
-    max_feq is 10
-    num_bands is 6
-    '''
-    x = x.unsqueeze(-1)
-    device, dtype, orig_x = x.device, x.dtype, x
-    scales = torch.linspace(1., max_freq / 2, num_bands, device = device, dtype = dtype) # tensor([1.0000, 1.8000, 2.6000, 3.4000, 4.2000, 5.0000]
-    scales = scales[(*((None,) * (len(x.shape) - 1)), Ellipsis)] # r([[[[1.0000, 1.8000, 2.6000, 3.4000, 4.2000, 5.0000]]]],
-    x = x * scales * pi
-    x = torch.cat([x.sin(), x.cos()], dim = -1)
-    x = torch.cat((x, orig_x), dim = -1)
-    return x
-class PreNorm(nn.Module):
-    def __init__(self, dim, fn, context_dim = None):
-        super().__init__()
-        self.fn = fn
-        self.norm = nn.LayerNorm(dim)
-        self.norm_context = nn.LayerNorm(context_dim) if exists(context_dim) else None
-    def forward(self, x, **kwargs):
-        x = self.norm(x)
-        if exists(self.norm_context):
-            context = kwargs['context']
-            normed_context = self.norm_context(context)
-            kwargs.update(context = normed_context)
-        return self.fn(x, **kwargs)
-class GEGLU(nn.Module):
-    def forward(self, x):
-        x, gates = x.chunk(2, dim = -1)
-        return x * F.gelu(gates)
-class FeedForward(nn.Module):
-    def __init__(self, dim, mult = 4, dropout = 0.):
-        super().__init__()
-        self.net = nn.Sequential(
-            nn.Linear(dim, dim * mult * 2),
-            GEGLU(),
-            nn.Linear(dim * mult, dim),
-            nn.Dropout(dropout)
-        )
-    def forward(self, x):
-        return self.net(x)
-class Attention(nn.Module):
-    def __init__(self, query_dim, context_dim = None, heads = 8, dim_head = 64, dropout = 0.,cls_conv_dim=None):
-        super().__init__()
-        inner_dim = dim_head * heads
-        context_dim = default(context_dim, query_dim)
-        self.scale = dim_head ** -0.5
-        self.heads = heads
-        self.to_q = nn.Linear(query_dim, inner_dim, bias = False)
-        self.to_kv = nn.Linear(context_dim, inner_dim * 2, bias = False) # 27 to 5012*2 = 1024
-        self.dropout = nn.Dropout(dropout)
-        self.to_out = nn.Linear(inner_dim, query_dim)
-        #self.cls_dim_adjust = nn.Linear(context_dim,cls_conv_dim)
-    def forward(self, x, context = None, mask = None, ref_cls_onehot=None):
-        h = self.heads
-        q = self.to_q(x)
-        context = default(context, x)
-        k, v = self.to_kv(context).chunk(2, dim = -1)
-        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h = h), (q, k, v))
-        sim = einsum('b i d, b j d -> b i j', q, k) * self.scale
-        if exists(mask):
-            mask = repeat(mask, 'b j k -> (b h) k j', h = h)
-            sim.masked_fill(mask == 0, -1e9)
-        # attention, what we cannot get enough of
-        attn = sim.softmax(dim = -1)
-        attn = self.dropout(attn)
-        out = einsum('b i j, b j d -> b i d', attn, v)
-        out = rearrange(out, '(b h) n d -> b n (h d)', h = h)
-        return self.to_out(out), attn
-class SVGEmbedding(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.command_embed = nn.Embedding(4, 512)
-        self.arg_embed = nn.Embedding(128, 128,padding_idx=0)
-        self.embed_fcn = nn.Linear(128 * 8, 512)
-        self.pos_encoding = PositionalEncoding(d_model=opts.hidden_size, max_len=opts.max_seq_len + 1)
-        self._init_embeddings()
-    def _init_embeddings(self):
-        nn.init.kaiming_normal_(self.command_embed.weight, mode="fan_in")
-        nn.init.kaiming_normal_(self.arg_embed.weight, mode="fan_in")
-        nn.init.kaiming_normal_(self.embed_fcn.weight, mode="fan_in")
-    def forward(self, commands, args, groups=None):
-        S, GN,_ = commands.shape
-        src = self.command_embed(commands.long()).squeeze() + \
-            self.embed_fcn(self.arg_embed((args).long()).view(S, GN, -1)) # shift due to -1 PAD_VAL
-        src = self.pos_encoding(src)
-        return src
-class PositionwiseFeedForward(nn.Module):
-    "Implements FFN equation."
-    def __init__(self, d_model, d_ff, dropout):
-        super(PositionwiseFeedForward, self).__init__()
-        self.w_1 = nn.Linear(d_model, d_ff)
-        self.w_2 = nn.Linear(d_ff, d_model)
-        self.dropout = nn.Dropout(dropout)
-    def forward(self, x):
-        return self.w_2(F.relu(self.dropout(self.w_1(x))))
-class Transformer_decoder(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.SVG_embedding = SVGEmbedding()
-        self.command_fcn = nn.Linear(512, 4)
-        self.args_fcn = nn.Linear(512, 8 * 128)
-        c = copy.deepcopy
-        attn = MultiHeadedAttention(h=8, d_model=512, dropout=0.0)
-        ff = PositionwiseFeedForward(d_model=512, d_ff=1024, dropout=0.0)
-        self.decoder_layers = clones(DecoderLayer(512, c(attn), c(attn),c(ff), dropout=0.0), 6)
-        self.decoder_norm = nn.LayerNorm(512)
-        self.decoder_layers_parallel = clones(DecoderLayer(512, c(attn), c(attn), c(ff), dropout=0.0), 1)
-        self.decoder_norm_parallel = nn.LayerNorm(512)
-        self.cls_embedding = nn.Embedding(52,512)
-        self.cls_token = nn.Parameter(torch.zeros(1, 1, 512))
-    def forward(self, x, memory, trg_char, src_mask=None, tgt_mask=None):
-        memory = memory.unsqueeze(1)
-        commands = x[:, :, :1]
-        args = x[:, :, 1:]
-        x = self.SVG_embedding(commands, args).transpose(0,1)
-        trg_char = trg_char.long()
-        trg_char = self.cls_embedding(trg_char)
-        x[:, 0:1, :] = trg_char
-        tgt_mask = tgt_mask.squeeze()
-        for layer in self.decoder_layers:
-            x,attn = layer(x, memory, src_mask, tgt_mask)
-        out = self.decoder_norm(x)
-        N, S, _ = out.shape
-        cmd_logits = self.command_fcn(out)
-        args_logits = self.args_fcn(out) # shape: bs, max_len, 8, 256
-        args_logits = args_logits.reshape(N, S, 8, 128)
-        return cmd_logits,args_logits,attn
-    def parallel_decoder(self, cmd_logits, args_logits, memory, trg_char):
-        memory = memory.unsqueeze(1)
-        cmd_args_mask =  torch.Tensor([[0, 0, 0., 0., 0., 0., 0., 0.],
-                                       [1, 1, 0., 0., 0., 0., 1., 1.],
-                                       [1, 1, 0., 0., 0., 0., 1., 1.],
-                                       [1, 1, 1., 1., 1., 1., 1., 1.]]).to(cmd_logits.device)
-        if opts.mode == 'train':
-            cmd2 = torch.argmax(cmd_logits, -1).unsqueeze(-1).transpose(0, 1)
-            arg2 = torch.argmax(args_logits, -1).transpose(0, 1)
-            cmd2paddingmask = _get_key_padding_mask(cmd2).transpose(0,1).unsqueeze(-1).to(cmd2.device)
-            cmd2 = cmd2 * cmd2paddingmask
-            args_mask = torch.matmul(F.one_hot(cmd2.long(),4).float(), cmd_args_mask).transpose(-1,-2).squeeze(-1)
-            arg2 = arg2 * args_mask
-            x = self.SVG_embedding(cmd2, arg2).transpose(0, 1)
-        else:
-            cmd2 = cmd_logits
-            arg2 = args_logits
-            cmd2paddingmask = _get_key_padding_mask(cmd2).transpose(0, 1).unsqueeze(-1).to(cmd2.device)
-            cmd2 = cmd2 * cmd2paddingmask
-            args_mask = torch.matmul(F.one_hot(cmd2.long(),4).float(), cmd_args_mask).transpose(-1, -2).squeeze(-1)
-            arg2 = arg2 * args_mask
-            x = self.SVG_embedding(cmd2, arg2).transpose(0,1)
-        S = x.size(1)
-        B = x.size(0)
-        tgt_mask = torch.ones(S,S).to(x.device).unsqueeze(0).repeat(B, 1, 1)
-        cmd2paddingmask = cmd2paddingmask.transpose(0, 1).transpose(-1, -2)
-        tgt_mask  = tgt_mask * cmd2paddingmask
-        trg_char = trg_char.long()
-        trg_char = self.cls_embedding(trg_char)
-        x = torch.cat([trg_char, x],1)
-        x[:, 0:1, :] = trg_char
-        x = x[:,:opts.max_seq_len,:]
-        tgt_mask = tgt_mask #*tri
-        for layer in self.decoder_layers_parallel:
-            x, attn = layer(x, memory, src_mask=None, tgt_mask=tgt_mask)
-        out = self.decoder_norm_parallel(x)
-        N, S, _ = out.shape
-        cmd_logits = self.command_fcn(out)
-        args_logits = self.args_fcn(out)
-        args_logits = args_logits.reshape(N, S, 8, 128)
-        return cmd_logits, args_logits
-def _get_key_padding_mask(commands, seq_dim=0):
-    """
-    Args:
-        commands: Shape [S, ...]
-    """
-    lens =[]
-    with torch.no_grad():
-        key_padding_mask = (commands == 0).cumsum(dim=seq_dim) > 0
-        commands=commands.transpose(0,1).squeeze(-1) #bs, opts.max_seq_len
-        for i in range(commands.size(0)):
-            try:
-                seqi = commands[i]#blue opts.max_seq_len
-                index = torch.where(seqi==0)[0][0]
-            except:
-                index=opts.max_seq_len
-            lens.append(index)
-        lens = torch.tensor(lens)+1#blue b
-        seqlen_mask = util_funcs.sequence_mask(lens, opts.max_seq_len)#blue b,opts.max_seq_len
-        return seqlen_mask
-class Transformer(nn.Module):
-    def __init__(
-        self,
-        *,
-        num_freq_bands,
-        depth,
-        max_freq,
-        input_channels = 1,
-        input_axis = 2,
-        num_latents = 512,
-        latent_dim = 512,
-        cross_heads = 1,
-        latent_heads = 8,
-        cross_dim_head = 64,
-        latent_dim_head = 64,
-        num_classes = 1000,
-        attn_dropout = 0.,
-        ff_dropout = 0.,
-        weight_tie_layers = False,
-        fourier_encode_data = True,
-        self_per_cross_attn = 2,
-        final_classifier_head = True
-    ):
-        """The shape of the final attention mechanism will be:
-        depth * (cross attention -> self_per_cross_attn * self attention)
-        Args:
-          num_freq_bands: Number of freq bands, with original value (2 * K + 1)
-          depth: Depth of net.
-          max_freq: Maximum frequency, hyperparameter depending on how
-              fine the data is.
-          freq_base: Base for the frequency
-          input_channels: Number of channels for each token of the input.
-          input_axis: Number of axes for input data (2 for images, 3 for video)
-          num_latents: Number of latents, or induced set points, or centroids.
-              Different papers giving it different names.
-          latent_dim: Latent dimension.
-          cross_heads: Number of heads for cross attention. Paper said 1.
-          latent_heads: Number of heads for latent self attention, 8.
-          cross_dim_head: Number of dimensions per cross attention head.
-          latent_dim_head: Number of dimensions per latent self attention head.
-          num_classes: Output number of classes.
-          attn_dropout: Attention dropout
-          ff_dropout: Feedforward dropout
-          weight_tie_layers: Whether to weight tie layers (optional).
-          fourier_encode_data: Whether to auto-fourier encode the data, using
-              the input_axis given. defaults to True, but can be turned off
-              if you are fourier encoding the data yourself.
-          self_per_cross_attn: Number of self attention blocks per cross attn.
-          final_classifier_head: mean pool and project embeddings to number of classes (num_classes) at the end
-        """
-        super().__init__()
-        self.input_axis = input_axis
-        self.max_freq = max_freq
-        self.num_freq_bands = num_freq_bands
-        self.fourier_encode_data = fourier_encode_data
-        fourier_channels = (input_axis * ((num_freq_bands * 2) + 1)) if fourier_encode_data else 0 # 26
-        input_dim = fourier_channels + input_channels
-        self.latents = nn.Parameter(torch.randn(num_latents, latent_dim))
-        get_cross_attn = lambda: PreNorm(latent_dim, Attention(latent_dim, input_dim, heads=cross_heads, dim_head=cross_dim_head, dropout=attn_dropout), context_dim=input_dim)
-        get_cross_ff = lambda: PreNorm(latent_dim, FeedForward(latent_dim, dropout=ff_dropout))
-        get_latent_attn = lambda: PreNorm(latent_dim, Attention(latent_dim, heads=latent_heads, dim_head=latent_dim_head, dropout=attn_dropout))
-        get_latent_ff = lambda: PreNorm(latent_dim, FeedForward(latent_dim, dropout=ff_dropout))
-        get_cross_attn, get_cross_ff, get_latent_attn, get_latent_ff = map(cache_fn, (get_cross_attn, get_cross_ff, get_latent_attn, get_latent_ff))
-        #self_per_cross_attn=1
-        self.layers = nn.ModuleList([])
-        for i in range(depth):
-            should_cache = i > 0 and weight_tie_layers
-            cache_args = {'_cache': should_cache}
-            self_attns = nn.ModuleList([])
-            for block_ind in range(self_per_cross_attn): #BUG 之前是2  self_per_cross_attn
-                self_attns.append(nn.ModuleList([
-                    get_latent_attn(**cache_args, key = block_ind),
-                    get_latent_ff(**cache_args, key = block_ind)
-                ]))
-            self.layers.append(nn.ModuleList([
-                get_cross_attn(**cache_args),
-                get_cross_ff(**cache_args),
-                self_attns
-            ]))
-        get_cross_attn2 = lambda: PreNorm(latent_dim, Attention(latent_dim, input_dim, heads = cross_heads, dim_head = cross_dim_head, dropout = attn_dropout), context_dim = input_dim)
-        get_cross_ff2 = lambda: PreNorm(latent_dim, FeedForward(latent_dim, dropout = ff_dropout))
-        get_latent_attn2 = lambda: PreNorm(latent_dim, Attention(latent_dim, heads = latent_heads, dim_head = latent_dim_head, dropout = attn_dropout))
-        get_latent_ff2 = lambda: PreNorm(latent_dim, FeedForward(latent_dim, dropout = ff_dropout))
-        get_cross_attn2, get_cross_ff2, get_latent_attn2, get_latent_ff2 = map(cache_fn, (get_cross_attn2, get_cross_ff2, get_latent_attn2, get_latent_ff2))
-        self.layers_cnnsvg = nn.ModuleList([])
-        for i in range(1):
-            should_cache = i > 0 and weight_tie_layers
-            cache_args = {'_cache': should_cache}
-            self_attns2 = nn.ModuleList([])
-            for block_ind in range(self_per_cross_attn):
-                self_attns2.append(nn.ModuleList([
-                    get_latent_attn2(**cache_args, key = block_ind),
-                    get_latent_ff2(**cache_args, key = block_ind)
-                ]))
-            self.layers_cnnsvg.append(nn.ModuleList([
-                get_cross_attn2(**cache_args),
-                get_cross_ff2(**cache_args),
-                self_attns2
-            ]))
-        self.to_logits = nn.Sequential(
-            Reduce('b n d -> b d', 'mean'),
-            nn.LayerNorm(latent_dim),
-            nn.Linear(latent_dim, num_classes)
-        ) if final_classifier_head else nn.Identity()
-        self.pre_lstm_fc = nn.Linear(10,opts.hidden_size)
-        self.posr = PositionalEncoding(d_model=opts.hidden_size,max_len=opts.max_seq_len)
-        patch_height = 2
-        patch_width = 2
-        patch_dim =  1 * patch_height * patch_width
-        self.to_patch_embedding = nn.Sequential(
-            Rearrange('b (h p1) (w p2) c -> b (h w) (p1 p2 c)', p1 = patch_height, p2 = patch_width),
-            nn.Linear(patch_dim, 16),
-        )
-        self.SVG_embedding = SVGEmbedding()
-        self.cls_token = nn.Parameter(torch.zeros(1, 1, 512))
-    def forward(self, data, seq, ref_cls_onehot=None, mask=None, return_embeddings=True):
-        b, *axis, _, device, dtype = *data.shape, data.device, data.dtype
-        assert len(axis) == self.input_axis, 'input data must have the right number of axis' # img is 2
-        x = seq
-        commands=x[:, :, :1]
-        args=x[:, :, 1:]
-        x = self.SVG_embedding(commands, args).transpose(0,1)
-        cls_tokens = repeat(self.cls_token, '() n d -> b n d', b = x.size(0))
-        x = torch.cat([cls_tokens,x],dim = 1)
-        cls_one_pad = torch.ones((1,1,1)).to(x.device).repeat(x.size(0),1,1)
-        mask = torch.cat([cls_one_pad,mask],dim=-1)
-        self_atten = []
-        for cross_attn, cross_ff, self_attns in self.layers:
-            for self_attn, self_ff in self_attns:
-                x_,atten = self_attn(x,mask=mask)
-                x = x_ + x
-                self_atten.append(atten)
-                x = self_ff(x) + x
-        x = x + torch.randn_like(x) # add a perturbation
-        return x, self_atten
-    def att_residual(self, x, mask=None):
-        for cross_attn, cross_ff, self_attns in self.layers_cnnsvg:
-            for self_attn, self_ff in self_attns:
-                x_, atten = self_attn(x)
-                x = x_ + x
-                x = self_ff(x) + x
-        return x
-    def loss(self, cmd_logits, args_logits, trg_seq, trg_seqlen, trg_pts_aux):
-        '''
-        Inputs:
-        cmd_logits: [b, 51, 4]
-        args_logits: [b, 51, 6]
-        '''
-        cmd_args_mask =  torch.Tensor([[0, 0, 0., 0., 0., 0., 0., 0.],
-                                       [1, 1, 0., 0., 0., 0., 1., 1.],
-                                       [1, 1, 0., 0., 0., 0., 1., 1.],
-                                       [1, 1, 1., 1., 1., 1., 1., 1.]]).to(cmd_logits.device)
-        tgt_commands = trg_seq[:,:,:1].transpose(0,1)
-        tgt_args = trg_seq[:,:,1:].transpose(0,1)
-        seqlen_mask = util_funcs.sequence_mask(trg_seqlen, opts.max_seq_len).unsqueeze(-1)
-        seqlen_mask2 = seqlen_mask.repeat(1,1,4)# NOTE b,501,4
-        seqlen_mask4 = seqlen_mask.repeat(1,1,8)
-        seqlen_mask3 = seqlen_mask.unsqueeze(-1).repeat(1,1,8,128)
-        tgt_commands_onehot = F.one_hot(tgt_commands, 4)
-        tgt_args_onehot = F.one_hot(tgt_args, 128)
-        args_mask = torch.matmul(tgt_commands_onehot.float(),cmd_args_mask).squeeze()
-        loss_cmd = torch.sum(- tgt_commands_onehot.squeeze() * F.log_softmax(cmd_logits, -1), -1)
-        loss_cmd = torch.mul(loss_cmd, seqlen_mask.squeeze())
-        loss_cmd = torch.mean(torch.sum(loss_cmd/trg_seqlen.unsqueeze(-1),-1))
-        loss_args = (torch.sum(-tgt_args_onehot*F.log_softmax(args_logits,-1),-1)*seqlen_mask4*args_mask)
-        loss_args = torch.mean(loss_args,dim=-1,keepdim=False)
-        loss_args = torch.mean(torch.sum(loss_args/trg_seqlen.unsqueeze(-1),-1))
-        SE_mask =  torch.Tensor([[1, 1],
-                                 [0, 0],
-                                 [1, 1],
-                                 [1, 1]]).to(cmd_logits.device)
-        SE_args_mask = torch.matmul(tgt_commands_onehot.float(),SE_mask).squeeze().unsqueeze(-1)
-        args_prob = F.softmax(args_logits, -1)
-        args_end = args_prob[:,:,6:]
-        args_end_shifted = torch.cat((torch.zeros(args_end.size(0),1,args_end.size(2),args_end.size(3)).to(args_end.device),args_end),1)
-        args_end_shifted = args_end_shifted[:,:opts.max_seq_len,:,:]
-        args_end_shifted = args_end_shifted*SE_args_mask + args_end*(1-SE_args_mask)
-        args_start = args_prob[:,:,:2]
-        seqlen_mask5 = util_funcs.sequence_mask(trg_seqlen-1, opts.max_seq_len).unsqueeze(-1)
-        seqlen_mask5 = seqlen_mask5.repeat(1,1,2)
-        smooth_constrained = torch.sum(torch.pow((args_end_shifted - args_start), 2), -1) * seqlen_mask5
-        smooth_constrained = torch.mean(smooth_constrained, dim=-1, keepdim=False)
-        smooth_constrained = torch.mean(torch.sum(smooth_constrained / (trg_seqlen - 1).unsqueeze(-1), -1))
-        args_prob2 = F.softmax(args_logits / 0.1, -1)
-        c = torch.argmax(args_prob2,-1).unsqueeze(-1).float() - args_prob2.detach()
-        p_argmax = args_prob2 + c
-        p_argmax = torch.mean(p_argmax,-1)
-        control_pts = denumericalize(p_argmax)
-        p0 = control_pts[:,:,:2]
-        p1 = control_pts[:,:,2:4]
-        p2 = control_pts[:,:,4:6]
-        p3 = control_pts[:,:,6:8]
-        line_mask = (tgt_commands==2).float() + (tgt_commands==1).float()
-        curve_mask = (tgt_commands==3).float()
-        t=0.25
-        aux_pts_line = p0 + t*(p3-p0)
-        for t in [0.5,0.75]:
-            coord_t = p0 + t*(p3-p0)
-            aux_pts_line = torch.cat((aux_pts_line,coord_t),-1)
-        aux_pts_line = aux_pts_line*line_mask
-        t=0.25
-        aux_pts_curve = (1-t)*(1-t)*(1-t)*p0 + 3*t*(1-t)*(1-t)*p1 + 3*t*t*(1-t)*p2 + t*t*t*p3
-        for t in [0.5, 0.75]:
-            coord_t = (1-t)*(1-t)*(1-t)*p0 + 3*t*(1-t)*(1-t)*p1 + 3*t*t*(1-t)*p2 + t*t*t*p3
-            aux_pts_curve = torch.cat((aux_pts_curve,coord_t),-1)
-        aux_pts_curve = aux_pts_curve * curve_mask
-        aux_pts_predict = aux_pts_curve + aux_pts_line
-        seqlen_mask_aux = util_funcs.sequence_mask(trg_seqlen - 1, opts.max_seq_len).unsqueeze(-1)
-        aux_pts_loss = torch.pow((aux_pts_predict - trg_pts_aux), 2) * seqlen_mask_aux
-        loss_aux = torch.mean(aux_pts_loss, dim=-1, keepdim=False)
-        loss_aux = torch.mean(torch.sum(loss_aux / trg_seqlen.unsqueeze(-1), -1))
-        loss = opts.loss_w_cmd * loss_cmd + opts.loss_w_args * loss_args + opts.loss_w_aux * loss_aux + opts.loss_w_smt * smooth_constrained
-        svg_losses = {}
-        svg_losses['loss_total'] = loss
-        svg_losses["loss_cmd"] = loss_cmd
-        svg_losses["loss_args"] = loss_args
-        svg_losses["loss_smt"] = smooth_constrained
-        svg_losses["loss_aux"] = loss_aux
-        return svg_losses
-class DecoderLayer(nn.Module):
-    "Decoder is made of self-attn, src-attn, and feed forward (defined below)"
-    def __init__(self, size, self_attn, src_attn, feed_forward, dropout):
-        super(DecoderLayer, self).__init__()
-        self.size = size
-        self.self_attn = self_attn
-        self.src_attn = src_attn
-        self.feed_forward = feed_forward
-        self.sublayer = clones(SublayerConnection(size, dropout), 3)
-    def forward(self, x, memory, src_mask, tgt_mask):
-        "Follow Figure 1 (right) for connections."
-        m = memory
-        x = self.sublayer[0](x, lambda x: self.self_attn(x, x, x, tgt_mask))
-        x = self.sublayer[1](x, lambda x: self.src_attn(x, m, m, src_mask))
-        attn = self.self_attn.attn
-        return self.sublayer[2](x, self.feed_forward),attn
-def subsequent_mask(size):
-    "Mask out subsequent positions."
-    attn_shape = (1, size, size)
-    subsequent_mask = np.triu(np.ones(attn_shape), k=1).astype('uint8')
-    return torch.from_numpy(subsequent_mask) == 0
-def numericalize(cmd, n=128):
-    """NOTE: shall only be called after normalization"""
-    # assert np.max(cmd.origin) <= 1.0 and np.min(cmd.origin) >= -1.0
-    cmd = (cmd / 30 * n).round().clip(min=0, max=n-1).int()
-    return cmd
-def denumericalize(cmd, n=128):
-    cmd = cmd / n * 30
-    return cmd
-def attention(query, key, value, mask=None, trg_tri_mask=None,dropout=None, posr=None):
-    "Compute 'Scaled Dot Product Attention'"
-    d_k = query.size(-1)
-    scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k)
-    if posr is not None:
-        posr = posr.unsqueeze(1)
-        scores = scores + posr
-    if mask is not None:
-        try:
-            scores = scores.masked_fill(mask == 0, -1e9) # note mask: b,1,501,501  scores: b, head, 501,501
-        except Exception as e:
-            print("Shape: ",scores.shape)
-            print("Error: ",e)
-            import pdb; pdb.set_trace()
-    if trg_tri_mask is not None:
-        scores = scores.masked_fill(trg_tri_mask == 0, -1e9)
-    p_attn = F.softmax(scores, dim=-1)
-    if dropout is not None:
-        p_attn = dropout(p_attn)
-    return torch.matmul(p_attn, value), p_attn
-class MultiHeadedAttention(nn.Module):
-    def __init__(self, h, d_model, dropout):
-        "Take in model size and number of heads."
-        super(MultiHeadedAttention, self).__init__()
-        assert d_model % h == 0
-        # We assume d_v always equals d_k
-        self.d_k = d_model // h #32
-        self.h = h #8
-        self.linears = clones(nn.Linear(d_model, d_model), 4)
-        self.attn = None
-        self.dropout = nn.Dropout(p=dropout)
-    def forward(self, query, key, value, mask=None,trg_tri_mask=None, posr=None):
-        "Implements Figure 2"
-        if mask is not None:
-            # Same mask applied to all h heads.
-            mask = mask.unsqueeze(1)
-        nbatches = query.size(0) #16
-        query, key, value = \
-            [l(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2)
-             for l, x in zip(self.linears, (query, key, value))]
-        x, self.attn = attention(query, key, value, mask=mask,trg_tri_mask=trg_tri_mask,
-                                 dropout=self.dropout, posr=posr)
-        x = x.transpose(1, 2).contiguous() \
-            .view(nbatches, -1, self.h * self.d_k)
-        return self.linears[-1](x)
-def clones(module, N):
-    "Produce N identical layers."
-    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])
-class SublayerConnection(nn.Module):
-    """
-    A residual connection followed by a layer norm.
-    Note for code simplicity the norm is first as opposed to last.
-    """
-    def __init__(self, size, dropout):
-        super(SublayerConnection, self).__init__()
-        self.norm = nn.LayerNorm(size)
-        self.dropout = nn.Dropout(dropout)
-    def forward(self, x, sublayer):
-        "Apply residual connection to any sublayer with the same size."
-        x_norm=self.norm(x)
-        return x + self.dropout(sublayer(x_norm))#+ self.augs(x_norm)
-if __name__ == '__main__':
-    model = Transformer(
-        input_channels = 1,          # number of channels for each token of the input
-        input_axis = 2,              # number of axis for input data (2 for images, 3 for video)
-        num_freq_bands = 6,          # number of freq bands, with original value (2 * K + 1)
-        max_freq = 10.,              # maximum frequency, hyperparameter depending on how fine the data is
-        depth = 6,                   # depth of net. The shape of the final attention mechanism will be:
-                                    #   depth * (cross attention -> self_per_cross_attn * self attention)
-        num_latents = 256,           # number of latents, or induced set points, or centroids. different papers giving it different names
-        latent_dim = 512,            # latent dimension
-        cross_heads = 1,             # number of heads for cross attention. paper said 1
-        latent_heads = 8,            # number of heads for latent self attention, 8
-        cross_dim_head = 64,         # number of dimensions per cross attention head
-        latent_dim_head = 64,        # number of dimensions per latent self attention head
-        num_classes = 1000,          # output number of classes
-        attn_dropout = 0.,
-        ff_dropout = 0.,
-        weight_tie_layers = False,   # whether to weight tie layers (optional, as indicated in the diagram)
-        fourier_encode_data = True,  # whether to auto-fourier encode the data, using the input_axis given. defaults to True, but can be turned off if you are fourier encoding the data yourself
-        self_per_cross_attn = 2      # number of self attention blocks per cross attention
-    )
-    img = torch.randn(1, 224, 224, 3) # 1 imagenet image, pixelized
     model(img) # (1, 1000)

+from math import pi, log
+from functools import wraps
+from multiprocessing import context
+from textwrap import indent
+import models.util_funcs as util_funcs
+import math, copy
+import numpy as np
+import torch
+from torch import nn, einsum
+import torch.nn.functional as F
+from einops import rearrange, repeat
+from einops.layers.torch import Reduce
+import pdb
+from einops.layers.torch import Rearrange
+from options import get_parser_main_model
+opts = get_parser_main_model().parse_args()
+class PositionalEncoding(nn.Module):
+    def __init__(self, d_model, dropout=0.1, max_len=5000):
+        super(PositionalEncoding, self).__init__()
+        self.dropout = nn.Dropout(p=dropout)
+        pe = torch.zeros(max_len, d_model)
+        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
+        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        pe = pe.unsqueeze(0).transpose(0, 1)
+        self.register_buffer('pe', pe)
+    def forward(self, x):
+        """
+        :param x: [x_len, batch_size, emb_size]
+        :return: [x_len, batch_size, emb_size]
+        """
+        x = x + self.pe[:x.size(0), :].to(x.device)
+        return self.dropout(x)
+def exists(val):
+    return val is not None
+def default(val, d):
+    return val if exists(val) else d
+def cache_fn(f):
+    cache = dict()
+    @wraps(f)
+    def cached_fn(*args, _cache = True, key = None, **kwargs):
+        if not _cache:
+            return f(*args, **kwargs)
+        nonlocal cache
+        if key in cache:
+            return cache[key]
+        result = f(*args, **kwargs)
+        cache[key] = result
+        return result
+    return cached_fn
+def fourier_encode(x, max_freq, num_bands = 4):
+    '''
+    x: ([64, 64, 2, 1]) is between [-1,1]
+    max_feq is 10
+    num_bands is 6
+    '''
+    x = x.unsqueeze(-1)
+    device, dtype, orig_x = x.device, x.dtype, x
+    scales = torch.linspace(1., max_freq / 2, num_bands, device = device, dtype = dtype) # tensor([1.0000, 1.8000, 2.6000, 3.4000, 4.2000, 5.0000]
+    scales = scales[(*((None,) * (len(x.shape) - 1)), Ellipsis)] # r([[[[1.0000, 1.8000, 2.6000, 3.4000, 4.2000, 5.0000]]]],
+    x = x * scales * pi
+    x = torch.cat([x.sin(), x.cos()], dim = -1)
+    x = torch.cat((x, orig_x), dim = -1)
+    return x
+class PreNorm(nn.Module):
+    def __init__(self, dim, fn, context_dim = None):
+        super().__init__()
+        self.fn = fn
+        self.norm = nn.LayerNorm(dim)
+        self.norm_context = nn.LayerNorm(context_dim) if exists(context_dim) else None
+    def forward(self, x, **kwargs):
+        x = self.norm(x)
+        if exists(self.norm_context):
+            context = kwargs['context']
+            normed_context = self.norm_context(context)
+            kwargs.update(context = normed_context)
+        return self.fn(x, **kwargs)
+class GEGLU(nn.Module):
+    def forward(self, x):
+        x, gates = x.chunk(2, dim = -1)
+        return x * F.gelu(gates)
+class FeedForward(nn.Module):
+    def __init__(self, dim, mult = 4, dropout = 0.):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(dim, dim * mult * 2),
+            GEGLU(),
+            nn.Linear(dim * mult, dim),
+            nn.Dropout(dropout)
+        )
+    def forward(self, x):
+        return self.net(x)
+class Attention(nn.Module):
+    def __init__(self, query_dim, context_dim = None, heads = 8, dim_head = 64, dropout = 0.,cls_conv_dim=None):
+        super().__init__()
+        inner_dim = dim_head * heads
+        context_dim = default(context_dim, query_dim)
+        self.scale = dim_head ** -0.5
+        self.heads = heads
+        self.to_q = nn.Linear(query_dim, inner_dim, bias = False)
+        self.to_kv = nn.Linear(context_dim, inner_dim * 2, bias = False) # 27 to 5012*2 = 1024
+        self.dropout = nn.Dropout(dropout)
+        self.to_out = nn.Linear(inner_dim, query_dim)
+        #self.cls_dim_adjust = nn.Linear(context_dim,cls_conv_dim)
+    def forward(self, x, context = None, mask = None, ref_cls_onehot=None):
+        h = self.heads
+        q = self.to_q(x)
+        context = default(context, x)
+        k, v = self.to_kv(context).chunk(2, dim = -1)
+        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h = h), (q, k, v))
+        sim = einsum('b i d, b j d -> b i j', q, k) * self.scale
+        if exists(mask):
+            mask = repeat(mask, 'b j k -> (b h) k j', h = h)
+            sim.masked_fill(mask == 0, -1e9)
+        # attention, what we cannot get enough of
+        attn = sim.softmax(dim = -1)
+        attn = self.dropout(attn)
+        out = einsum('b i j, b j d -> b i d', attn, v)
+        out = rearrange(out, '(b h) n d -> b n (h d)', h = h)
+        return self.to_out(out), attn
+class SVGEmbedding(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.command_embed = nn.Embedding(4, 512)
+        self.arg_embed = nn.Embedding(128, 128,padding_idx=0)
+        self.embed_fcn = nn.Linear(128 * 8, 512)
+        self.pos_encoding = PositionalEncoding(d_model=opts.hidden_size, max_len=opts.max_seq_len + 1)
+        self._init_embeddings()
+    def _init_embeddings(self):
+        nn.init.kaiming_normal_(self.command_embed.weight, mode="fan_in")
+        nn.init.kaiming_normal_(self.arg_embed.weight, mode="fan_in")
+        nn.init.kaiming_normal_(self.embed_fcn.weight, mode="fan_in")
+    def forward(self, commands, args, groups=None):
+        S, GN,_ = commands.shape
+        src = self.command_embed(commands.long()).squeeze() + \
+            self.embed_fcn(self.arg_embed((args).long()).view(S, GN, -1)) # shift due to -1 PAD_VAL
+        src = self.pos_encoding(src)
+        return src
+class PositionwiseFeedForward(nn.Module):
+    "Implements FFN equation."
+    def __init__(self, d_model, d_ff, dropout):
+        super(PositionwiseFeedForward, self).__init__()
+        self.w_1 = nn.Linear(d_model, d_ff)
+        self.w_2 = nn.Linear(d_ff, d_model)
+        self.dropout = nn.Dropout(dropout)
+    def forward(self, x):
+        return self.w_2(F.relu(self.dropout(self.w_1(x))))
+class Transformer_decoder(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.SVG_embedding = SVGEmbedding()
+        self.command_fcn = nn.Linear(512, 4)
+        self.args_fcn = nn.Linear(512, 8 * 128)
+        c = copy.deepcopy
+        attn = MultiHeadedAttention(h=8, d_model=512, dropout=0.0)
+        ff = PositionwiseFeedForward(d_model=512, d_ff=1024, dropout=0.0)
+        self.decoder_layers = clones(DecoderLayer(512, c(attn), c(attn),c(ff), dropout=0.0), 6)
+        self.decoder_norm = nn.LayerNorm(512)
+        self.decoder_layers_parallel = clones(DecoderLayer(512, c(attn), c(attn), c(ff), dropout=0.0), 1)
+        self.decoder_norm_parallel = nn.LayerNorm(512)
+        self.cls_embedding = nn.Embedding(52,512)
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, 512))
+    def forward(self, x, memory, trg_char, src_mask=None, tgt_mask=None):
+        memory = memory.unsqueeze(1)
+        commands = x[:, :, :1]
+        args = x[:, :, 1:]
+        x = self.SVG_embedding(commands, args).transpose(0,1)
+        trg_char = trg_char.long()
+        trg_char = self.cls_embedding(trg_char)
+        x[:, 0:1, :] = trg_char
+        tgt_mask = tgt_mask.squeeze()
+        for layer in self.decoder_layers:
+            x,attn = layer(x, memory, src_mask, tgt_mask)
+        out = self.decoder_norm(x)
+        N, S, _ = out.shape
+        cmd_logits = self.command_fcn(out)
+        args_logits = self.args_fcn(out) # shape: bs, max_len, 8, 256
+        args_logits = args_logits.reshape(N, S, 8, 128)
+        return cmd_logits,args_logits,attn
+    def parallel_decoder(self, cmd_logits, args_logits, memory, trg_char):
+        memory = memory.unsqueeze(1)
+        cmd_args_mask =  torch.Tensor([[0, 0, 0., 0., 0., 0., 0., 0.],
+                                       [1, 1, 0., 0., 0., 0., 1., 1.],
+                                       [1, 1, 0., 0., 0., 0., 1., 1.],
+                                       [1, 1, 1., 1., 1., 1., 1., 1.]]).to(cmd_logits.device)
+        if opts.mode == 'train':
+            cmd2 = torch.argmax(cmd_logits, -1).unsqueeze(-1).transpose(0, 1)
+            arg2 = torch.argmax(args_logits, -1).transpose(0, 1)
+            cmd2paddingmask = _get_key_padding_mask(cmd2).transpose(0,1).unsqueeze(-1).to(cmd2.device)
+            cmd2 = cmd2 * cmd2paddingmask
+            args_mask = torch.matmul(F.one_hot(cmd2.long(),4).float(), cmd_args_mask).transpose(-1,-2).squeeze(-1)
+            arg2 = arg2 * args_mask
+            x = self.SVG_embedding(cmd2, arg2).transpose(0, 1)
+        else:
+            cmd2 = cmd_logits
+            arg2 = args_logits
+            cmd2paddingmask = _get_key_padding_mask(cmd2).transpose(0, 1).unsqueeze(-1).to(cmd2.device)
+            cmd2 = cmd2 * cmd2paddingmask
+            args_mask = torch.matmul(F.one_hot(cmd2.long(),4).float(), cmd_args_mask).transpose(-1, -2).squeeze(-1)
+            arg2 = arg2 * args_mask
+            x = self.SVG_embedding(cmd2, arg2).transpose(0,1)
+        S = x.size(1)
+        B = x.size(0)
+        tgt_mask = torch.ones(S,S).to(x.device).unsqueeze(0).repeat(B, 1, 1)
+        cmd2paddingmask = cmd2paddingmask.transpose(0, 1).transpose(-1, -2)
+        tgt_mask  = tgt_mask * cmd2paddingmask
+        trg_char = trg_char.long()
+        trg_char = self.cls_embedding(trg_char)
+        x = torch.cat([trg_char, x],1)
+        x[:, 0:1, :] = trg_char
+        x = x[:,:opts.max_seq_len,:]
+        tgt_mask = tgt_mask #*tri
+        for layer in self.decoder_layers_parallel:
+            x, attn = layer(x, memory, src_mask=None, tgt_mask=tgt_mask)
+        out = self.decoder_norm_parallel(x)
+        N, S, _ = out.shape
+        cmd_logits = self.command_fcn(out)
+        args_logits = self.args_fcn(out)
+        args_logits = args_logits.reshape(N, S, 8, 128)
+        return cmd_logits, args_logits
+def _get_key_padding_mask(commands, seq_dim=0):
+    """
+    Args:
+        commands: Shape [S, ...]
+    """
+    lens =[]
+    with torch.no_grad():
+        key_padding_mask = (commands == 0).cumsum(dim=seq_dim) > 0
+        commands=commands.transpose(0,1).squeeze(-1) #bs, opts.max_seq_len
+        for i in range(commands.size(0)):
+            try:
+                seqi = commands[i]#blue opts.max_seq_len
+                index = torch.where(seqi==0)[0][0]
+            except:
+                index=opts.max_seq_len
+            lens.append(index)
+        lens = torch.tensor(lens)+1#blue b
+        seqlen_mask = util_funcs.sequence_mask(lens, opts.max_seq_len)#blue b,opts.max_seq_len
+        return seqlen_mask
+class Transformer(nn.Module):
+    def __init__(
+        self,
+        *,
+        num_freq_bands,
+        depth,
+        max_freq,
+        input_channels = 1,
+        input_axis = 2,
+        num_latents = 512,
+        latent_dim = 512,
+        cross_heads = 1,
+        latent_heads = 8,
+        cross_dim_head = 64,
+        latent_dim_head = 64,
+        num_classes = 1000,
+        attn_dropout = 0.,
+        ff_dropout = 0.,
+        weight_tie_layers = False,
+        fourier_encode_data = True,
+        self_per_cross_attn = 2,
+        final_classifier_head = True
+    ):
+        """The shape of the final attention mechanism will be:
+        depth * (cross attention -> self_per_cross_attn * self attention)
+        Args:
+          num_freq_bands: Number of freq bands, with original value (2 * K + 1)
+          depth: Depth of net.
+          max_freq: Maximum frequency, hyperparameter depending on how
+              fine the data is.
+          freq_base: Base for the frequency
+          input_channels: Number of channels for each token of the input.
+          input_axis: Number of axes for input data (2 for images, 3 for video)
+          num_latents: Number of latents, or induced set points, or centroids.
+              Different papers giving it different names.
+          latent_dim: Latent dimension.
+          cross_heads: Number of heads for cross attention. Paper said 1.
+          latent_heads: Number of heads for latent self attention, 8.
+          cross_dim_head: Number of dimensions per cross attention head.
+          latent_dim_head: Number of dimensions per latent self attention head.
+          num_classes: Output number of classes.
+          attn_dropout: Attention dropout
+          ff_dropout: Feedforward dropout
+          weight_tie_layers: Whether to weight tie layers (optional).
+          fourier_encode_data: Whether to auto-fourier encode the data, using
+              the input_axis given. defaults to True, but can be turned off
+              if you are fourier encoding the data yourself.
+          self_per_cross_attn: Number of self attention blocks per cross attn.
+          final_classifier_head: mean pool and project embeddings to number of classes (num_classes) at the end
+        """
+        super().__init__()
+        self.input_axis = input_axis
+        self.max_freq = max_freq
+        self.num_freq_bands = num_freq_bands
+        self.fourier_encode_data = fourier_encode_data
+        fourier_channels = (input_axis * ((num_freq_bands * 2) + 1)) if fourier_encode_data else 0 # 26
+        input_dim = fourier_channels + input_channels
+        self.latents = nn.Parameter(torch.randn(num_latents, latent_dim))
+        get_cross_attn = lambda: PreNorm(latent_dim, Attention(latent_dim, input_dim, heads=cross_heads, dim_head=cross_dim_head, dropout=attn_dropout), context_dim=input_dim)
+        get_cross_ff = lambda: PreNorm(latent_dim, FeedForward(latent_dim, dropout=ff_dropout))
+        get_latent_attn = lambda: PreNorm(latent_dim, Attention(latent_dim, heads=latent_heads, dim_head=latent_dim_head, dropout=attn_dropout))
+        get_latent_ff = lambda: PreNorm(latent_dim, FeedForward(latent_dim, dropout=ff_dropout))
+        get_cross_attn, get_cross_ff, get_latent_attn, get_latent_ff = map(cache_fn, (get_cross_attn, get_cross_ff, get_latent_attn, get_latent_ff))
+        #self_per_cross_attn=1
+        self.layers = nn.ModuleList([])
+        for i in range(depth):
+            should_cache = i > 0 and weight_tie_layers
+            cache_args = {'_cache': should_cache}
+            self_attns = nn.ModuleList([])
+            for block_ind in range(self_per_cross_attn): #BUG 之前是2  self_per_cross_attn
+                self_attns.append(nn.ModuleList([
+                    get_latent_attn(**cache_args, key = block_ind),
+                    get_latent_ff(**cache_args, key = block_ind)
+                ]))
+            self.layers.append(nn.ModuleList([
+                get_cross_attn(**cache_args),
+                get_cross_ff(**cache_args),
+                self_attns
+            ]))
+        get_cross_attn2 = lambda: PreNorm(latent_dim, Attention(latent_dim, input_dim, heads = cross_heads, dim_head = cross_dim_head, dropout = attn_dropout), context_dim = input_dim)
+        get_cross_ff2 = lambda: PreNorm(latent_dim, FeedForward(latent_dim, dropout = ff_dropout))
+        get_latent_attn2 = lambda: PreNorm(latent_dim, Attention(latent_dim, heads = latent_heads, dim_head = latent_dim_head, dropout = attn_dropout))
+        get_latent_ff2 = lambda: PreNorm(latent_dim, FeedForward(latent_dim, dropout = ff_dropout))
+        get_cross_attn2, get_cross_ff2, get_latent_attn2, get_latent_ff2 = map(cache_fn, (get_cross_attn2, get_cross_ff2, get_latent_attn2, get_latent_ff2))
+        self.layers_cnnsvg = nn.ModuleList([])
+        for i in range(1):
+            should_cache = i > 0 and weight_tie_layers
+            cache_args = {'_cache': should_cache}
+            self_attns2 = nn.ModuleList([])
+            for block_ind in range(self_per_cross_attn):
+                self_attns2.append(nn.ModuleList([
+                    get_latent_attn2(**cache_args, key = block_ind),
+                    get_latent_ff2(**cache_args, key = block_ind)
+                ]))
+            self.layers_cnnsvg.append(nn.ModuleList([
+                get_cross_attn2(**cache_args),
+                get_cross_ff2(**cache_args),
+                self_attns2
+            ]))
+        self.to_logits = nn.Sequential(
+            Reduce('b n d -> b d', 'mean'),
+            nn.LayerNorm(latent_dim),
+            nn.Linear(latent_dim, num_classes)
+        ) if final_classifier_head else nn.Identity()
+        self.pre_lstm_fc = nn.Linear(10,opts.hidden_size)
+        self.posr = PositionalEncoding(d_model=opts.hidden_size,max_len=opts.max_seq_len)
+        patch_height = 2
+        patch_width = 2
+        patch_dim =  1 * patch_height * patch_width
+        self.to_patch_embedding = nn.Sequential(
+            Rearrange('b (h p1) (w p2) c -> b (h w) (p1 p2 c)', p1 = patch_height, p2 = patch_width),
+            nn.Linear(patch_dim, 16),
+        )
+        self.SVG_embedding = SVGEmbedding()
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, 512))
+    def forward(self, data, seq, ref_cls_onehot=None, mask=None, return_embeddings=True):
+        b, *axis, _, device, dtype = *data.shape, data.device, data.dtype
+        assert len(axis) == self.input_axis, 'input data must have the right number of axis' # img is 2
+        x = seq
+        commands=x[:, :, :1]
+        args=x[:, :, 1:]
+        x = self.SVG_embedding(commands, args).transpose(0,1)
+        cls_tokens = repeat(self.cls_token, '() n d -> b n d', b = x.size(0))
+        x = torch.cat([cls_tokens,x],dim = 1)
+        cls_one_pad = torch.ones((1,1,1)).to(x.device).repeat(x.size(0),1,1)
+        mask = torch.cat([cls_one_pad,mask],dim=-1)
+        self_atten = []
+        for cross_attn, cross_ff, self_attns in self.layers:
+            for self_attn, self_ff in self_attns:
+                x_,atten = self_attn(x,mask=mask)
+                x = x_ + x
+                self_atten.append(atten)
+                x = self_ff(x) + x
+        x = x + torch.randn_like(x) # add a perturbation
+        return x, self_atten
+    def att_residual(self, x, mask=None):
+        for cross_attn, cross_ff, self_attns in self.layers_cnnsvg:
+            for self_attn, self_ff in self_attns:
+                x_, atten = self_attn(x)
+                x = x_ + x
+                x = self_ff(x) + x
+        return x
+    def loss(self, cmd_logits, args_logits, trg_seq, trg_seqlen, trg_pts_aux):
+        '''
+        Inputs:
+        cmd_logits: [b, 51, 4]
+        args_logits: [b, 51, 6]
+        '''
+        cmd_args_mask =  torch.Tensor([[0, 0, 0., 0., 0., 0., 0., 0.],
+                                       [1, 1, 0., 0., 0., 0., 1., 1.],
+                                       [1, 1, 0., 0., 0., 0., 1., 1.],
+                                       [1, 1, 1., 1., 1., 1., 1., 1.]]).to(cmd_logits.device)
+        tgt_commands = trg_seq[:,:,:1].transpose(0,1)
+        tgt_args = trg_seq[:,:,1:].transpose(0,1)
+        seqlen_mask = util_funcs.sequence_mask(trg_seqlen, opts.max_seq_len).unsqueeze(-1)
+        seqlen_mask2 = seqlen_mask.repeat(1,1,4)# NOTE b,501,4
+        seqlen_mask4 = seqlen_mask.repeat(1,1,8)
+        seqlen_mask3 = seqlen_mask.unsqueeze(-1).repeat(1,1,8,128)
+        tgt_commands_onehot = F.one_hot(tgt_commands, 4)
+        tgt_args_onehot = F.one_hot(tgt_args, 128)
+        args_mask = torch.matmul(tgt_commands_onehot.float(),cmd_args_mask).squeeze()
+        loss_cmd = torch.sum(- tgt_commands_onehot.squeeze() * F.log_softmax(cmd_logits, -1), -1)
+        loss_cmd = torch.mul(loss_cmd, seqlen_mask.squeeze())
+        loss_cmd = torch.mean(torch.sum(loss_cmd/trg_seqlen.unsqueeze(-1),-1))
+        loss_args = (torch.sum(-tgt_args_onehot*F.log_softmax(args_logits,-1),-1)*seqlen_mask4*args_mask)
+        loss_args = torch.mean(loss_args,dim=-1,keepdim=False)
+        loss_args = torch.mean(torch.sum(loss_args/trg_seqlen.unsqueeze(-1),-1))
+        SE_mask =  torch.Tensor([[1, 1],
+                                 [0, 0],
+                                 [1, 1],
+                                 [1, 1]]).to(cmd_logits.device)
+        SE_args_mask = torch.matmul(tgt_commands_onehot.float(),SE_mask).squeeze().unsqueeze(-1)
+        args_prob = F.softmax(args_logits, -1)
+        args_end = args_prob[:,:,6:]
+        args_end_shifted = torch.cat((torch.zeros(args_end.size(0),1,args_end.size(2),args_end.size(3)).to(args_end.device),args_end),1)
+        args_end_shifted = args_end_shifted[:,:opts.max_seq_len,:,:]
+        args_end_shifted = args_end_shifted*SE_args_mask + args_end*(1-SE_args_mask)
+        args_start = args_prob[:,:,:2]
+        seqlen_mask5 = util_funcs.sequence_mask(trg_seqlen-1, opts.max_seq_len).unsqueeze(-1)
+        seqlen_mask5 = seqlen_mask5.repeat(1,1,2)
+        smooth_constrained = torch.sum(torch.pow((args_end_shifted - args_start), 2), -1) * seqlen_mask5
+        smooth_constrained = torch.mean(smooth_constrained, dim=-1, keepdim=False)
+        smooth_constrained = torch.mean(torch.sum(smooth_constrained / (trg_seqlen - 1).unsqueeze(-1), -1))
+        args_prob2 = F.softmax(args_logits / 0.1, -1)
+        c = torch.argmax(args_prob2,-1).unsqueeze(-1).float() - args_prob2.detach()
+        p_argmax = args_prob2 + c
+        p_argmax = torch.mean(p_argmax,-1)
+        control_pts = denumericalize(p_argmax)
+        p0 = control_pts[:,:,:2]
+        p1 = control_pts[:,:,2:4]
+        p2 = control_pts[:,:,4:6]
+        p3 = control_pts[:,:,6:8]
+        line_mask = (tgt_commands==2).float() + (tgt_commands==1).float()
+        curve_mask = (tgt_commands==3).float()
+        t=0.25
+        aux_pts_line = p0 + t*(p3-p0)
+        for t in [0.5,0.75]:
+            coord_t = p0 + t*(p3-p0)
+            aux_pts_line = torch.cat((aux_pts_line,coord_t),-1)
+        aux_pts_line = aux_pts_line*line_mask
+        t=0.25
+        aux_pts_curve = (1-t)*(1-t)*(1-t)*p0 + 3*t*(1-t)*(1-t)*p1 + 3*t*t*(1-t)*p2 + t*t*t*p3
+        for t in [0.5, 0.75]:
+            coord_t = (1-t)*(1-t)*(1-t)*p0 + 3*t*(1-t)*(1-t)*p1 + 3*t*t*(1-t)*p2 + t*t*t*p3
+            aux_pts_curve = torch.cat((aux_pts_curve,coord_t),-1)
+        aux_pts_curve = aux_pts_curve * curve_mask
+        aux_pts_predict = aux_pts_curve + aux_pts_line
+        seqlen_mask_aux = util_funcs.sequence_mask(trg_seqlen - 1, opts.max_seq_len).unsqueeze(-1)
+        aux_pts_loss = torch.pow((aux_pts_predict - trg_pts_aux), 2) * seqlen_mask_aux
+        loss_aux = torch.mean(aux_pts_loss, dim=-1, keepdim=False)
+        loss_aux = torch.mean(torch.sum(loss_aux / trg_seqlen.unsqueeze(-1), -1))
+        loss = opts.loss_w_cmd * loss_cmd + opts.loss_w_args * loss_args + opts.loss_w_aux * loss_aux + opts.loss_w_smt * smooth_constrained
+        svg_losses = {}
+        svg_losses['loss_total'] = loss
+        svg_losses["loss_cmd"] = loss_cmd
+        svg_losses["loss_args"] = loss_args
+        svg_losses["loss_smt"] = smooth_constrained
+        svg_losses["loss_aux"] = loss_aux
+        return svg_losses
+class DecoderLayer(nn.Module):
+    "Decoder is made of self-attn, src-attn, and feed forward (defined below)"
+    def __init__(self, size, self_attn, src_attn, feed_forward, dropout):
+        super(DecoderLayer, self).__init__()
+        self.size = size
+        self.self_attn = self_attn
+        self.src_attn = src_attn
+        self.feed_forward = feed_forward
+        self.sublayer = clones(SublayerConnection(size, dropout), 3)
+    def forward(self, x, memory, src_mask, tgt_mask):
+        "Follow Figure 1 (right) for connections."
+        m = memory
+        x = self.sublayer[0](x, lambda x: self.self_attn(x, x, x, tgt_mask))
+        x = self.sublayer[1](x, lambda x: self.src_attn(x, m, m, src_mask))
+        attn = self.self_attn.attn
+        return self.sublayer[2](x, self.feed_forward),attn
+def subsequent_mask(size):
+    "Mask out subsequent positions."
+    attn_shape = (1, size, size)
+    subsequent_mask = np.triu(np.ones(attn_shape), k=1).astype('uint8')
+    return torch.from_numpy(subsequent_mask) == 0
+def numericalize(cmd, n=128):
+    """NOTE: shall only be called after normalization"""
+    # assert np.max(cmd.origin) <= 1.0 and np.min(cmd.origin) >= -1.0
+    cmd = (cmd / 30 * n).round().clip(min=0, max=n-1).int()
+    return cmd
+def denumericalize(cmd, n=128):
+    cmd = cmd / n * 30
+    return cmd
+def attention(query, key, value, mask=None, trg_tri_mask=None,dropout=None, posr=None):
+    "Compute 'Scaled Dot Product Attention'"
+    d_k = query.size(-1)
+    scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(d_k)
+    if posr is not None:
+        posr = posr.unsqueeze(1)
+        scores = scores + posr
+    if mask is not None:
+        try:
+            scores = scores.masked_fill(mask == 0, -1e9) # note mask: b,1,501,501  scores: b, head, 501,501
+        except Exception as e:
+            print("Shape: ",scores.shape)
+            print("Error: ",e)
+            import pdb; pdb.set_trace()
+    if trg_tri_mask is not None:
+        scores = scores.masked_fill(trg_tri_mask == 0, -1e9)
+    p_attn = F.softmax(scores, dim=-1)
+    if dropout is not None:
+        p_attn = dropout(p_attn)
+    return torch.matmul(p_attn, value), p_attn
+class MultiHeadedAttention(nn.Module):
+    def __init__(self, h, d_model, dropout):
+        "Take in model size and number of heads."
+        super(MultiHeadedAttention, self).__init__()
+        assert d_model % h == 0
+        # We assume d_v always equals d_k
+        self.d_k = d_model // h #32
+        self.h = h #8
+        self.linears = clones(nn.Linear(d_model, d_model), 4)
+        self.attn = None
+        self.dropout = nn.Dropout(p=dropout)
+    def forward(self, query, key, value, mask=None,trg_tri_mask=None, posr=None):
+        "Implements Figure 2"
+        if mask is not None:
+            # Same mask applied to all h heads.
+            mask = mask.unsqueeze(1)
+        nbatches = query.size(0) #16
+        query, key, value = \
+            [l(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2)
+             for l, x in zip(self.linears, (query, key, value))]
+        x, self.attn = attention(query, key, value, mask=mask,trg_tri_mask=trg_tri_mask,
+                                 dropout=self.dropout, posr=posr)
+        x = x.transpose(1, 2).contiguous() \
+            .view(nbatches, -1, self.h * self.d_k)
+        return self.linears[-1](x)
+def clones(module, N):
+    "Produce N identical layers."
+    return nn.ModuleList([copy.deepcopy(module) for _ in range(N)])
+class SublayerConnection(nn.Module):
+    """
+    A residual connection followed by a layer norm.
+    Note for code simplicity the norm is first as opposed to last.
+    """
+    def __init__(self, size, dropout):
+        super(SublayerConnection, self).__init__()
+        self.norm = nn.LayerNorm(size)
+        self.dropout = nn.Dropout(dropout)
+    def forward(self, x, sublayer):
+        "Apply residual connection to any sublayer with the same size."
+        x_norm=self.norm(x)
+        return x + self.dropout(sublayer(x_norm))#+ self.augs(x_norm)
+if __name__ == '__main__':
+    model = Transformer(
+        input_channels = 1,          # number of channels for each token of the input
+        input_axis = 2,              # number of axis for input data (2 for images, 3 for video)
+        num_freq_bands = 6,          # number of freq bands, with original value (2 * K + 1)
+        max_freq = 10.,              # maximum frequency, hyperparameter depending on how fine the data is
+        depth = 6,                   # depth of net. The shape of the final attention mechanism will be:
+                                    #   depth * (cross attention -> self_per_cross_attn * self attention)
+        num_latents = 256,           # number of latents, or induced set points, or centroids. different papers giving it different names
+        latent_dim = 512,            # latent dimension
+        cross_heads = 1,             # number of heads for cross attention. paper said 1
+        latent_heads = 8,            # number of heads for latent self attention, 8
+        cross_dim_head = 64,         # number of dimensions per cross attention head
+        latent_dim_head = 64,        # number of dimensions per latent self attention head
+        num_classes = 1000,          # output number of classes
+        attn_dropout = 0.,
+        ff_dropout = 0.,
+        weight_tie_layers = False,   # whether to weight tie layers (optional, as indicated in the diagram)
+        fourier_encode_data = True,  # whether to auto-fourier encode the data, using the input_axis given. defaults to True, but can be turned off if you are fourier encoding the data yourself
+        self_per_cross_attn = 2      # number of self attention blocks per cross attention
+    )
+    img = torch.randn(1, 224, 224, 3) # 1 imagenet image, pixelized
     model(img) # (1, 1000)

{models → ThaiVecFont/models}/util_funcs.py RENAMED Viewed

@@ -1,96 +1,96 @@
-import torch
-import torch.nn.functional as F
-import cairosvg
-from data_utils.common_utils import trans2_white_bg
-from PIL import Image
-import numpy as np
-def select_imgs(images_of_onefont, selected_cls, opts):
-    # given selected char classes, return selected imgs
-    # images_of_onefont: [bs, 52, opts.img_size, opts.img_size]
-    # selected_cls: [bs, nshot]
-    nums = selected_cls.size(1)
-    selected_cls_ = selected_cls.unsqueeze(2)
-    selected_cls_ = selected_cls_.unsqueeze(3)
-    selected_cls_ = selected_cls_.expand(images_of_onefont.size(0), nums, opts.img_size, opts.img_size)
-    selected_img = torch.gather(images_of_onefont, 1, selected_cls_)
-    return selected_img
-def select_seqs(seqs_of_onefont, selected_cls, opts, seq_dim):
-    nums = selected_cls.size(1)
-    selected_cls_ = selected_cls.unsqueeze(2)
-    selected_cls_ = selected_cls_.unsqueeze(3)
-    selected_cls_ = selected_cls_.expand(seqs_of_onefont.size(0), nums, opts.max_seq_len, seq_dim)
-    selected_seqs = torch.gather(seqs_of_onefont, 1, selected_cls_)
-    return selected_seqs
-def select_seqlens(seqlens_of_onefont, selected_cls, opts):
-    nums = selected_cls.size(1)
-    selected_cls_ = selected_cls.unsqueeze(2)
-    selected_cls_ = selected_cls_.expand(seqlens_of_onefont.size(0), nums, 1)     # 64, nums, 1
-    selected_seqlens = torch.gather(seqlens_of_onefont, 1, selected_cls_)
-    return selected_seqlens
-def trgcls_to_onehot(trg_cls, opts):
-    trg_char = F.one_hot(trg_cls, num_classes=opts.char_num).squeeze(dim=1)
-    return trg_char
-def shift_right(x, pad_value=None):
-    if pad_value is None:
-        shifted = F.pad(x, (0, 0, 0, 0, 1, 0))[:-1, :, :]
-    else:
-        shifted = torch.cat([pad_value, x], axis=0)[:-1, :, :]
-    return shifted
-def length_form_embedding(emb):
-    """Compute the length of each sequence in the batch
-    Args:
-        emb: [seq_len, batch, depth]
-    Returns:
-        a 0/1 tensor: [batch]
-    """
-    absed = torch.abs(emb)
-    sum_last = torch.sum(absed, dim=2, keepdim=True)
-    mask = sum_last != 0
-    sum_except_batch = torch.sum(mask, dim=(0, 2), dtype=torch.long)
-    return sum_except_batch
-def lognormal(y, mean, logstd, logsqrttwopi):
-    y_mean = y - mean # NOTE y:[b*51*6, 1]   mean:  [b*51*6, 50]
-    logstd_exp = logstd.exp() # NOTE  [b*51*6, 50]
-    y_mean_divide_exp = y_mean / logstd_exp
-    return -0.5 * (y_mean_divide_exp) ** 2 - logstd - logsqrttwopi
-def sequence_mask(lengths, max_len=None):
-    batch_size=lengths.numel()
-    max_len=max_len or lengths.max()
-    return (torch.arange(0, max_len, device=lengths.device)
-    .type_as(lengths)
-    .unsqueeze(0).expand(batch_size,max_len)
-    .lt(lengths.unsqueeze(1)))
-def svg2img(path_svg, path_img, img_size):
-    cairosvg.svg2png(url=path_svg, write_to=path_img, output_width=img_size, output_height=img_size)
-    img_arr = trans2_white_bg(path_img)
-    return img_arr
-def cal_img_l1_dist(path_img1, path_img2):
-    img1 = np.array(Image.open(path_img1))
-    img2 = np.array(Image.open(path_img2))
-    dist = np.mean(np.abs(img1 - img2[:, :, 0]))
-    return dist
-def cal_iou(path_img1, path_img2):
-    img1 = np.array(Image.open(path_img1))
-    img2 = np.array(Image.open(path_img2))[:, :, 0]
-    mask_img1 = img1 < (255 * 3 / 4)
-    mask_img2 = img2 < (255 * 3 / 4)
-    iou = np.sum(mask_img1 * mask_img2) / (np.sum(mask_img1 + mask_img2))
-    l1_dist = np.mean(np.abs(mask_img1.astype(float) - mask_img2.astype(float)))
     return iou, l1_dist

+import torch
+import torch.nn.functional as F
+import cairosvg
+from data_utils.common_utils import trans2_white_bg
+from PIL import Image
+import numpy as np
+def select_imgs(images_of_onefont, selected_cls, opts):
+    # given selected char classes, return selected imgs
+    # images_of_onefont: [bs, 52, opts.img_size, opts.img_size]
+    # selected_cls: [bs, nshot]
+    nums = selected_cls.size(1)
+    selected_cls_ = selected_cls.unsqueeze(2)
+    selected_cls_ = selected_cls_.unsqueeze(3)
+    selected_cls_ = selected_cls_.expand(images_of_onefont.size(0), nums, opts.img_size, opts.img_size)
+    selected_img = torch.gather(images_of_onefont, 1, selected_cls_)
+    return selected_img
+def select_seqs(seqs_of_onefont, selected_cls, opts, seq_dim):
+    nums = selected_cls.size(1)
+    selected_cls_ = selected_cls.unsqueeze(2)
+    selected_cls_ = selected_cls_.unsqueeze(3)
+    selected_cls_ = selected_cls_.expand(seqs_of_onefont.size(0), nums, opts.max_seq_len, seq_dim)
+    selected_seqs = torch.gather(seqs_of_onefont, 1, selected_cls_)
+    return selected_seqs
+def select_seqlens(seqlens_of_onefont, selected_cls, opts):
+    nums = selected_cls.size(1)
+    selected_cls_ = selected_cls.unsqueeze(2)
+    selected_cls_ = selected_cls_.expand(seqlens_of_onefont.size(0), nums, 1)     # 64, nums, 1
+    selected_seqlens = torch.gather(seqlens_of_onefont, 1, selected_cls_)
+    return selected_seqlens
+def trgcls_to_onehot(trg_cls, opts):
+    trg_char = F.one_hot(trg_cls, num_classes=opts.char_num).squeeze(dim=1)
+    return trg_char
+def shift_right(x, pad_value=None):
+    if pad_value is None:
+        shifted = F.pad(x, (0, 0, 0, 0, 1, 0))[:-1, :, :]
+    else:
+        shifted = torch.cat([pad_value, x], axis=0)[:-1, :, :]
+    return shifted
+def length_form_embedding(emb):
+    """Compute the length of each sequence in the batch
+    Args:
+        emb: [seq_len, batch, depth]
+    Returns:
+        a 0/1 tensor: [batch]
+    """
+    absed = torch.abs(emb)
+    sum_last = torch.sum(absed, dim=2, keepdim=True)
+    mask = sum_last != 0
+    sum_except_batch = torch.sum(mask, dim=(0, 2), dtype=torch.long)
+    return sum_except_batch
+def lognormal(y, mean, logstd, logsqrttwopi):
+    y_mean = y - mean # NOTE y:[b*51*6, 1]   mean:  [b*51*6, 50]
+    logstd_exp = logstd.exp() # NOTE  [b*51*6, 50]
+    y_mean_divide_exp = y_mean / logstd_exp
+    return -0.5 * (y_mean_divide_exp) ** 2 - logstd - logsqrttwopi
+def sequence_mask(lengths, max_len=None):
+    batch_size=lengths.numel()
+    max_len=max_len or lengths.max()
+    return (torch.arange(0, max_len, device=lengths.device)
+    .type_as(lengths)
+    .unsqueeze(0).expand(batch_size,max_len)
+    .lt(lengths.unsqueeze(1)))
+def svg2img(path_svg, path_img, img_size):
+    cairosvg.svg2png(url=path_svg, write_to=path_img, output_width=img_size, output_height=img_size)
+    img_arr = trans2_white_bg(path_img)
+    return img_arr
+def cal_img_l1_dist(path_img1, path_img2):
+    img1 = np.array(Image.open(path_img1))
+    img2 = np.array(Image.open(path_img2))
+    dist = np.mean(np.abs(img1 - img2[:, :, 0]))
+    return dist
+def cal_iou(path_img1, path_img2):
+    img1 = np.array(Image.open(path_img1))
+    img2 = np.array(Image.open(path_img2))[:, :, 0]
+    mask_img1 = img1 < (255 * 3 / 4)
+    mask_img2 = img2 < (255 * 3 / 4)
+    iou = np.sum(mask_img1 * mask_img2) / (np.sum(mask_img1 + mask_img2))
+    l1_dist = np.mean(np.abs(mask_img1.astype(float) - mask_img2.astype(float)))
     return iou, l1_dist

{models → ThaiVecFont/models}/vgg_perceptual_loss.py RENAMED Viewed

@@ -1,69 +1,69 @@
-import torch
-import torchvision
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-class VGG19Feats(torch.nn.Module):
-    def __init__(self, requires_grad=False):
-        super(VGG19Feats, self).__init__()
-        vgg = torchvision.models.vgg19(pretrained=True).to(device) #.cuda()
-        # vgg.eval()
-        vgg_pretrained_features = vgg.features.eval()
-        self.requires_grad = requires_grad
-        self.slice1 = torch.nn.Sequential()
-        self.slice2 = torch.nn.Sequential()
-        self.slice3 = torch.nn.Sequential()
-        self.slice4 = torch.nn.Sequential()
-        self.slice5 = torch.nn.Sequential()
-        for x in range(3):
-            self.slice1.add_module(str(x), vgg_pretrained_features[x])
-        for x in range(3, 8):
-            self.slice2.add_module(str(x), vgg_pretrained_features[x])
-        for x in range(8, 13):
-            self.slice3.add_module(str(x), vgg_pretrained_features[x])
-        for x in range(13, 22):
-            self.slice4.add_module(str(x), vgg_pretrained_features[x])
-        for x in range(22, 31):
-            self.slice5.add_module(str(x), vgg_pretrained_features[x])
-        if not self.requires_grad:
-            for param in self.parameters():
-                param.requires_grad = False
-    def forward(self, img):
-        conv1_2 = self.slice1(img)
-        conv2_2 = self.slice2(conv1_2)
-        conv3_2 = self.slice3(conv2_2)
-        conv4_2 = self.slice4(conv3_2)
-        conv5_2 = self.slice5(conv4_2)
-        out = [conv1_2, conv2_2, conv3_2, conv4_2, conv5_2]
-        return out
-class VGGPerceptualLoss(torch.nn.Module):
-    def __init__(self):
-        super(VGGPerceptualLoss, self).__init__()
-        self.vgg = VGG19Feats().to(device)
-        self.criterion = torch.nn.functional.l1_loss
-        self.register_buffer("mean", torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1))
-        self.register_buffer("std", torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1))
-        self.weights = [1.0/2.6, 1.0/4.8, 1.0/3.7, 1.0/5.6, 1.0*10/1.5]
-    def forward(self, input_img, target_img):
-        if input_img.shape[1] != 3:
-            input_img = input_img.repeat(1, 3, 1, 1)
-            target_img = target_img.repeat(1, 3, 1, 1)
-        input_img = (input_img - self.mean) / self.std
-        target_img = (target_img - self.mean) / self.std
-        x_vgg, y_vgg = self.vgg(input_img), self.vgg(target_img)
-        loss = {}
-        loss['pt_c_loss'] = self.weights[0] * self.criterion(x_vgg[0], y_vgg[0])+\
-                            self.weights[1] * self.criterion(x_vgg[1], y_vgg[1])+\
-                            self.weights[2] * self.criterion(x_vgg[2], y_vgg[2])+\
-                            self.weights[3] * self.criterion(x_vgg[3], y_vgg[3])+\
-                            self.weights[4] * self.criterion(x_vgg[4], y_vgg[4])
-        loss['pt_s_loss'] = 0.0
         return loss

+import torch
+import torchvision
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+class VGG19Feats(torch.nn.Module):
+    def __init__(self, requires_grad=False):
+        super(VGG19Feats, self).__init__()
+        vgg = torchvision.models.vgg19(pretrained=True).to(device) #.cuda()
+        # vgg.eval()
+        vgg_pretrained_features = vgg.features.eval()
+        self.requires_grad = requires_grad
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        for x in range(3):
+            self.slice1.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(3, 8):
+            self.slice2.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(8, 13):
+            self.slice3.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(13, 22):
+            self.slice4.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(22, 31):
+            self.slice5.add_module(str(x), vgg_pretrained_features[x])
+        if not self.requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, img):
+        conv1_2 = self.slice1(img)
+        conv2_2 = self.slice2(conv1_2)
+        conv3_2 = self.slice3(conv2_2)
+        conv4_2 = self.slice4(conv3_2)
+        conv5_2 = self.slice5(conv4_2)
+        out = [conv1_2, conv2_2, conv3_2, conv4_2, conv5_2]
+        return out
+class VGGPerceptualLoss(torch.nn.Module):
+    def __init__(self):
+        super(VGGPerceptualLoss, self).__init__()
+        self.vgg = VGG19Feats().to(device)
+        self.criterion = torch.nn.functional.l1_loss
+        self.register_buffer("mean", torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1))
+        self.register_buffer("std", torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1))
+        self.weights = [1.0/2.6, 1.0/4.8, 1.0/3.7, 1.0/5.6, 1.0*10/1.5]
+    def forward(self, input_img, target_img):
+        if input_img.shape[1] != 3:
+            input_img = input_img.repeat(1, 3, 1, 1)
+            target_img = target_img.repeat(1, 3, 1, 1)
+        input_img = (input_img - self.mean) / self.std
+        target_img = (target_img - self.mean) / self.std
+        x_vgg, y_vgg = self.vgg(input_img), self.vgg(target_img)
+        loss = {}
+        loss['pt_c_loss'] = self.weights[0] * self.criterion(x_vgg[0], y_vgg[0])+\
+                            self.weights[1] * self.criterion(x_vgg[1], y_vgg[1])+\
+                            self.weights[2] * self.criterion(x_vgg[2], y_vgg[2])+\
+                            self.weights[3] * self.criterion(x_vgg[3], y_vgg[3])+\
+                            self.weights[4] * self.criterion(x_vgg[4], y_vgg[4])
+        loss['pt_s_loss'] = 0.0
         return loss

options.py → ThaiVecFont/options.py RENAMED Viewed

@@ -1,67 +1,67 @@
-import argparse
-def get_parser_main_model():
-    parser = argparse.ArgumentParser()
-    # basic parameters training related
-    parser.add_argument('--model_name', type=str, default='main_model', choices=['main_model', 'neural_raster'], help='current model_name')
-    parser.add_argument("--language", type=str, default='tha', choices=['eng', 'chn', 'tha'])
-    parser.add_argument('--bottleneck_bits', type=int, default=512, help='latent code number of bottleneck bits')
-    parser.add_argument('--char_num', type=int, default=44, help='number of glyphs, original is 44 (Thai)')
-    parser.add_argument('--seed', type=int, default=3712)
-    parser.add_argument('--ref_nshot', type=int, default=8, help='reference number')
-    parser.add_argument('--batch_size', type=int, default=64, help='batch size')
-    parser.add_argument('--batch_size_val', type=int, default=8, help='batch size when do validation')
-    parser.add_argument('--img_size', type=int, default=64, help='image size')
-    parser.add_argument('--max_seq_len', type=int, default=121, help='maximum length of sequence')
-    parser.add_argument('--dim_seq', type=int, default=12, help='the dim of each stroke in a sequence, 4 + 8, 4 is cmd, and 8 is args')
-    parser.add_argument('--dim_seq_short', type=int, default=9, help='the short dim of each stroke in a sequence, 1 + 8, 1 is cmd class num, and 8 is args')
-    parser.add_argument('--hidden_size', type=int, default=512, help='hidden_size')
-    parser.add_argument('--dim_seq_latent', type=int, default=512, help='sequence encoder latent dim')
-    parser.add_argument('--ngf', type=int, default=16, help='the basic num of channel in image encoder and decoder')
-    parser.add_argument('--n_aux_pts', type=int, default=6, help='the number of aux pts in bezier curves for additional supervison')
-    # experiment related
-    parser.add_argument('--random_index', type=str, default='00')
-    parser.add_argument('--name_ckpt', type=str, default='600_192921.ckpt')
-    parser.add_argument('--model_path', type=str, default='.')
-    parser.add_argument('--n_epochs', type=int, default=800, help='number of epochs')
-    parser.add_argument('--n_samples', type=int, default=20, help='the number of samples for each glyph when testing')
-    parser.add_argument('--lr', type=float, default=0.0002, help='learning rate')
-    parser.add_argument('--ref_char_ids', type=str, default='0,1,26,27', help='default is A, B, a, b')
-    parser.add_argument('--mode', type=str, default='test', choices=['train', 'val', 'test'])
-    parser.add_argument('--multi_gpu', type=bool, default=False)
-    parser.add_argument('--name_exp', type=str, default='dvf')
-    # continue training'
-    parser.add_argument('--continue_training', type=bool, default=False, help='whether continue training from old checkpoint')
-    parser.add_argument('--continue_ckpt', type=str, default='.', help='checkpoint model for continue training')
-    parser.add_argument('--init_epoch', type=int, default=0, help='init epoch')
-    # Manually Add
-    parser.add_argument('--exp_path', type=str, default='.')
-    parser.add_argument('--dir_res', type=str, default=None)
-    parser.add_argument('--data_root', type=str, default='./data/vecfont_dataset/')
-    parser.add_argument('--freq_ckpt', type=int, default=50, help='save checkpoint frequency of epoch')
-    parser.add_argument('--threshold_ckpt', type=int, default=0, help='save checkpoint only when more than threshold epoch')
-    parser.add_argument('--freq_sample', type=int, default=500, help='sample train output of steps')
-    parser.add_argument('--freq_log', type=int, default=50, help='freq of showing logs')
-    parser.add_argument('--freq_val', type=int, default=500, help='sample validate output of steps')
-    parser.add_argument('--beta1', type=float, default=0.9, help='beta1 of Adam optimizer')
-    parser.add_argument('--beta2', type=float, default=0.999, help='beta2 of Adam optimizer')
-    parser.add_argument('--eps', type=float, default=1e-8, help='Adam epsilon')
-    parser.add_argument('--weight_decay', type=float, default=0.0, help='weight decay')
-    parser.add_argument('--wandb', type=bool, default=True, help='whether use wandb to visulize loss')
-    parser.add_argument('--wandb_project_name', type=str, default="DeepVecFontV2", help='wandb project name')
-    # loss weight
-    parser.add_argument('--kl_beta', type=float, default=0.01, help='latent code kl loss beta')
-    parser.add_argument('--loss_w_pt_c', type=float, default=0.001 * 10, help='the weight of perceptual content loss')
-    parser.add_argument('--loss_w_l1', type=float, default=1.0 * 10, help='the weight of image reconstruction l1 loss')
-    parser.add_argument('--loss_w_cmd', type=float, default=1.0, help='the weight of cmd loss')
-    parser.add_argument('--loss_w_args', type=float, default=1.0, help='the weight of args loss')
-    parser.add_argument('--loss_w_aux', type=float, default=0.01, help='the weight of pts aux loss')
-    parser.add_argument('--loss_w_smt', type=float, default=10., help='the weight of smooth loss')
-    return parser

+import argparse
+def get_parser_main_model():
+    parser = argparse.ArgumentParser()
+    # basic parameters training related
+    parser.add_argument('--model_name', type=str, default='main_model', choices=['main_model', 'neural_raster'], help='current model_name')
+    parser.add_argument("--language", type=str, default='tha', choices=['eng', 'chn', 'tha'])
+    parser.add_argument('--bottleneck_bits', type=int, default=512, help='latent code number of bottleneck bits')
+    parser.add_argument('--char_num', type=int, default=44, help='number of glyphs, original is 44 (Thai)')
+    parser.add_argument('--seed', type=int, default=3712)
+    parser.add_argument('--ref_nshot', type=int, default=8, help='reference number')
+    parser.add_argument('--batch_size', type=int, default=64, help='batch size')
+    parser.add_argument('--batch_size_val', type=int, default=8, help='batch size when do validation')
+    parser.add_argument('--img_size', type=int, default=64, help='image size')
+    parser.add_argument('--max_seq_len', type=int, default=121, help='maximum length of sequence')
+    parser.add_argument('--dim_seq', type=int, default=12, help='the dim of each stroke in a sequence, 4 + 8, 4 is cmd, and 8 is args')
+    parser.add_argument('--dim_seq_short', type=int, default=9, help='the short dim of each stroke in a sequence, 1 + 8, 1 is cmd class num, and 8 is args')
+    parser.add_argument('--hidden_size', type=int, default=512, help='hidden_size')
+    parser.add_argument('--dim_seq_latent', type=int, default=512, help='sequence encoder latent dim')
+    parser.add_argument('--ngf', type=int, default=16, help='the basic num of channel in image encoder and decoder')
+    parser.add_argument('--n_aux_pts', type=int, default=6, help='the number of aux pts in bezier curves for additional supervison')
+    # experiment related
+    parser.add_argument('--random_index', type=str, default='00')
+    parser.add_argument('--name_ckpt', type=str, default='600_192921.ckpt')
+    parser.add_argument('--model_path', type=str, default='.')
+    parser.add_argument('--n_epochs', type=int, default=800, help='number of epochs')
+    parser.add_argument('--n_samples', type=int, default=20, help='the number of samples for each glyph when testing')
+    parser.add_argument('--lr', type=float, default=0.0002, help='learning rate')
+    parser.add_argument('--ref_char_ids', type=str, default='0,1,26,27', help='default is A, B, a, b')
+    parser.add_argument('--mode', type=str, default='test', choices=['train', 'val', 'test'])
+    parser.add_argument('--multi_gpu', type=bool, default=False)
+    parser.add_argument('--name_exp', type=str, default='dvf')
+    # continue training'
+    parser.add_argument('--continue_training', type=bool, default=False, help='whether continue training from old checkpoint')
+    parser.add_argument('--continue_ckpt', type=str, default='.', help='checkpoint model for continue training')
+    parser.add_argument('--init_epoch', type=int, default=0, help='init epoch')
+    # Manually Add
+    parser.add_argument('--exp_path', type=str, default='.')
+    parser.add_argument('--dir_res', type=str, default=None)
+    parser.add_argument('--data_root', type=str, default='./data/vecfont_dataset/')
+    parser.add_argument('--freq_ckpt', type=int, default=50, help='save checkpoint frequency of epoch')
+    parser.add_argument('--threshold_ckpt', type=int, default=0, help='save checkpoint only when more than threshold epoch')
+    parser.add_argument('--freq_sample', type=int, default=500, help='sample train output of steps')
+    parser.add_argument('--freq_log', type=int, default=50, help='freq of showing logs')
+    parser.add_argument('--freq_val', type=int, default=500, help='sample validate output of steps')
+    parser.add_argument('--beta1', type=float, default=0.9, help='beta1 of Adam optimizer')
+    parser.add_argument('--beta2', type=float, default=0.999, help='beta2 of Adam optimizer')
+    parser.add_argument('--eps', type=float, default=1e-8, help='Adam epsilon')
+    parser.add_argument('--weight_decay', type=float, default=0.0, help='weight decay')
+    parser.add_argument('--wandb', type=bool, default=True, help='whether use wandb to visulize loss')
+    parser.add_argument('--wandb_project_name', type=str, default="DeepVecFontV2", help='wandb project name')
+    # loss weight
+    parser.add_argument('--kl_beta', type=float, default=0.01, help='latent code kl loss beta')
+    parser.add_argument('--loss_w_pt_c', type=float, default=0.001 * 10, help='the weight of perceptual content loss')
+    parser.add_argument('--loss_w_l1', type=float, default=1.0 * 10, help='the weight of image reconstruction l1 loss')
+    parser.add_argument('--loss_w_cmd', type=float, default=1.0, help='the weight of cmd loss')
+    parser.add_argument('--loss_w_args', type=float, default=1.0, help='the weight of args loss')
+    parser.add_argument('--loss_w_aux', type=float, default=0.01, help='the weight of pts aux loss')
+    parser.add_argument('--loss_w_smt', type=float, default=10., help='the weight of smooth loss')
+    return parser

test.py → ThaiVecFont/test.py RENAMED Viewed

@@ -1,60 +1,60 @@
-import os
-import numpy as np
-import torch
-import torch.nn.functional as F
-from torchvision.utils import save_image
-from dataloader import get_loader
-from models.model_main import ModelMain
-from models.transformers import denumericalize
-from options import get_parser_main_model
-from data_utils.svg_utils import render
-from models.util_funcs import svg2img, cal_iou
-# Testing (Only accuracy)
-def test_main_model(opts):
-    test_loader = get_loader(opts.data_root, opts.img_size, opts.language, opts.char_num, opts.max_seq_len, opts.dim_seq, opts.batch_size, 'test')
-    model_main = ModelMain(opts)
-    path_ckpt = os.path.join(f"{opts.model_path}")
-    model_main.load_state_dict(torch.load(path_ckpt)['model'])
-    model_main.cuda()
-    model_main.eval() # Testing mode
-    with torch.no_grad():
-        loss_val = {'img':{'l1':0.0, 'vggpt':0.0}, 'svg':{'total':0.0, 'cmd':0.0, 'args':0.0, 'aux':0.0},
-                                'svg_para':{'total':0.0, 'cmd':0.0, 'args':0.0, 'aux':0.0}}
-        for val_idx, val_data in enumerate(test_loader):
-            for key in val_data: val_data[key] = val_data[key].cuda()
-            ret_dict_val, loss_dict_val = model_main(val_data, mode='val')
-            for loss_cat in ['img', 'svg']:
-                for key, _ in loss_val[loss_cat].items():
-                    loss_val[loss_cat][key] += loss_dict_val[loss_cat][key]
-        for loss_cat in ['img', 'svg']:
-            for key, _ in loss_val[loss_cat].items():
-                loss_val[loss_cat][key] /= len(test_loader)
-        val_msg = (
-            f"Val loss img l1: {loss_val['img']['l1']: .6f}, "
-            f"Val loss img pt: {loss_val['img']['vggpt']: .6f}, "
-            f"Val loss total: {loss_val['svg']['total']: .6f}, "
-            f"Val loss cmd: {loss_val['svg']['cmd']: .6f}, "
-            f"Val loss args: {loss_val['svg']['args']: .6f}, "
-        )
-        print(val_msg)
-        print(f"l1: {loss_val['img']['l1']: .6f}, pt: {loss_val['img']['vggpt']: .6f}")
-def main():
-    opts = get_parser_main_model().parse_args()
-    opts.name_exp = opts.name_exp + '_' + opts.model_name
-    experiment_dir = os.path.join(f"{opts.exp_path}","experiments", opts.name_exp)
-    print(f"Testing on experiment {opts.name_exp}...")
-    # Dump options
-    test_main_model(opts)
-if __name__ == "__main__":
     main()

+import os
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torchvision.utils import save_image
+from dataloader import get_loader
+from models.model_main import ModelMain
+from models.transformers import denumericalize
+from options import get_parser_main_model
+from data_utils.svg_utils import render
+from models.util_funcs import svg2img, cal_iou
+# Testing (Only accuracy)
+def test_main_model(opts):
+    test_loader = get_loader(opts.data_root, opts.img_size, opts.language, opts.char_num, opts.max_seq_len, opts.dim_seq, opts.batch_size, 'test')
+    model_main = ModelMain(opts)
+    path_ckpt = os.path.join(f"{opts.model_path}")
+    model_main.load_state_dict(torch.load(path_ckpt)['model'])
+    model_main.cuda()
+    model_main.eval() # Testing mode
+    with torch.no_grad():
+        loss_val = {'img':{'l1':0.0, 'vggpt':0.0}, 'svg':{'total':0.0, 'cmd':0.0, 'args':0.0, 'aux':0.0},
+                                'svg_para':{'total':0.0, 'cmd':0.0, 'args':0.0, 'aux':0.0}}
+        for val_idx, val_data in enumerate(test_loader):
+            for key in val_data: val_data[key] = val_data[key].cuda()
+            ret_dict_val, loss_dict_val = model_main(val_data, mode='val')
+            for loss_cat in ['img', 'svg']:
+                for key, _ in loss_val[loss_cat].items():
+                    loss_val[loss_cat][key] += loss_dict_val[loss_cat][key]
+        for loss_cat in ['img', 'svg']:
+            for key, _ in loss_val[loss_cat].items():
+                loss_val[loss_cat][key] /= len(test_loader)
+        val_msg = (
+            f"Val loss img l1: {loss_val['img']['l1']: .6f}, "
+            f"Val loss img pt: {loss_val['img']['vggpt']: .6f}, "
+            f"Val loss total: {loss_val['svg']['total']: .6f}, "
+            f"Val loss cmd: {loss_val['svg']['cmd']: .6f}, "
+            f"Val loss args: {loss_val['svg']['args']: .6f}, "
+        )
+        print(val_msg)
+        print(f"l1: {loss_val['img']['l1']: .6f}, pt: {loss_val['img']['vggpt']: .6f}")
+def main():
+    opts = get_parser_main_model().parse_args()
+    opts.name_exp = opts.name_exp + '_' + opts.model_name
+    experiment_dir = os.path.join(f"{opts.exp_path}","experiments", opts.name_exp)
+    print(f"Testing on experiment {opts.name_exp}...")
+    # Dump options
+    test_main_model(opts)
+if __name__ == "__main__":
     main()

test_few_shot.py → ThaiVecFont/test_few_shot.py RENAMED Viewed

@@ -1,164 +1,164 @@
-import os
-import numpy as np
-import torch
-import torch.nn.functional as F
-from torchvision.utils import save_image
-from dataloader import get_loader
-from models.model_main import ModelMain
-from models.transformers import denumericalize
-from options import get_parser_main_model
-from data_utils.svg_utils import render
-from models.util_funcs import svg2img, cal_iou
-from tqdm import tqdm
-from PIL import Image
-def test_main_model(opts):
-    if opts.streamlit:
-        import streamlit as st
-    if opts.dir_res:
-        os.mkdir(os.path.join(opts.dir_res, "results"))
-        dir_res = os.path.join(opts.dir_res, "results")
-    else:
-        dir_res = os.path.join(f"{opts.exp_path}", "experiments/", opts.name_exp, "results")
-    test_loader = get_loader(opts.data_root, opts.img_size, opts.language, opts.char_num, opts.max_seq_len, opts.dim_seq, opts.batch_size, 'test')
-    if torch.cuda.is_available():
-        device = torch.device("cuda")
-    else:
-        device = torch.device("cpu")
-    if opts.streamlit:
-        st.write("Loading Model Weight...")
-    model_main = ModelMain(opts)
-    path_ckpt = os.path.join(f"{opts.model_path}")
-    model_main.load_state_dict(torch.load(path_ckpt)['model'])
-    model_main.to(device)
-    model_main.eval()
-    with torch.no_grad():
-        for test_idx, test_data in enumerate(test_loader):
-            for key in test_data: test_data[key] = test_data[key].to(device)
-            print("testing font %04d ..."%test_idx)
-            dir_save = os.path.join(dir_res, "%04d"%test_idx)
-            if not os.path.exists(dir_save):
-                os.mkdir(dir_save)
-                os.mkdir(os.path.join(dir_save, "imgs"))
-                os.mkdir(os.path.join(dir_save, "svgs_single"))
-                os.mkdir(os.path.join(dir_save, "svgs_merge"))
-            svg_merge_dir = os.path.join(dir_save, "svgs_merge")
-            iou_max = np.zeros(opts.char_num)
-            idx_best_sample = np.zeros(opts.char_num)
-            # syn_svg_merge_f = open(os.path.join(svg_merge_dir, f"{opts.name_ckpt}_syn_merge_{test_idx}_rand_{sample_idx}.html"), 'w')
-            syn_svg_merge_f = open(os.path.join(svg_merge_dir, f"{opts.name_ckpt}_syn_merge_{test_idx}.html"), 'w')
-            for sample_idx in tqdm(range(opts.n_samples)):
-                ret_dict_test, loss_dict_test = model_main(test_data, mode='test')
-                svg_sampled = ret_dict_test['svg']['sampled_1']
-                sampled_svg_2 = ret_dict_test['svg']['sampled_2']
-                img_trg = ret_dict_test['img']['trg']
-                img_output = ret_dict_test['img']['out']
-                trg_seq_gt = ret_dict_test['svg']['trg']
-                img_sample_merge = torch.cat((img_trg.data, img_output.data), -2)
-                save_file_merge = os.path.join(dir_save, "imgs", f"merge_{opts.img_size}.png")
-                save_image(img_sample_merge, save_file_merge, nrow=8, normalize=True)
-                if opts.streamlit:
-                    st.progress((sample_idx+1)/opts.n_samples, f"Generating Font Sample {sample_idx+1} Please wait...")
-                    im = Image.open(save_file_merge)
-                    st.image(im, caption='img_sample_merge')
-                for char_idx in range(opts.char_num):
-                    img_gt = (1.0 - img_trg[char_idx,...]).data
-                    save_file_gt = os.path.join(dir_save,"imgs", f"{char_idx:02d}_gt.png")
-                    save_image(img_gt, save_file_gt, normalize=True)
-                    img_sample = (1.0 - img_output[char_idx,...]).data
-                    save_file = os.path.join(dir_save,"imgs", f"{char_idx:02d}_{opts.img_size}.png")
-                    save_image(img_sample, save_file, normalize=True)
-                # write results w/o parallel refinement
-                svg_dec_out = svg_sampled.clone().detach()
-                for i, one_seq in enumerate(svg_dec_out):
-                    syn_svg_outfile = os.path.join(os.path.join(dir_save, "svgs_single"), f"syn_{i:02d}_{sample_idx}_wo_refine.svg")
-                    syn_svg_f_ = open(syn_svg_outfile, 'w')
-                    try:
-                        svg = render(one_seq.cpu().numpy())
-                        syn_svg_f_.write(svg)
-                        # syn_svg_merge_f.write(svg)
-                        if i > 0 and i % 13 == 12:
-                            syn_svg_f_.write('<br>')
-                            # syn_svg_merge_f.write('<br>')
-                    except:
-                        continue
-                    syn_svg_f_.close()
-                # write results w/ parallel refinement
-                svg_dec_out = sampled_svg_2.clone().detach()
-                for i, one_seq in enumerate(svg_dec_out):
-                    syn_svg_outfile = os.path.join(os.path.join(dir_save, "svgs_single"), f"syn_{i:02d}_{sample_idx}_refined.svg")
-                    syn_svg_f = open(syn_svg_outfile, 'w')
-                    try:
-                        svg = render(one_seq.cpu().numpy())
-                        syn_svg_f.write(svg)
-                        #syn_svg_merge_f.write(svg)
-                        #if i > 0 and i % 13 == 12:
-                        #    syn_svg_merge_f.write('<br>')
-                    except:
-                        continue
-                    syn_svg_f.close()
-                    syn_img_outfile = syn_svg_outfile.replace('.svg', '.png')
-                    svg2img(syn_svg_outfile, syn_img_outfile, img_size=opts.img_size)
-                    iou_tmp, l1_tmp = cal_iou(syn_img_outfile, os.path.join(dir_save, "imgs", f"{i:02d}_{opts.img_size}.png"))
-                    iou_tmp = iou_tmp
-                    if iou_tmp > iou_max[i]:
-                        iou_max[i] = iou_tmp
-                        idx_best_sample[i] = sample_idx
-            for i in range(opts.char_num):
-                # print(idx_best_sample[i])
-                syn_svg_outfile_best = os.path.join(os.path.join(dir_save, "svgs_single"), f"syn_{i:02d}_{int(idx_best_sample[i])}_refined.svg")
-                syn_svg_merge_f.write(open(syn_svg_outfile_best, 'r').read())
-                if i > 0 and i % 13 == 12:
-                    syn_svg_merge_f.write('<br>')
-            svg_target = trg_seq_gt.clone().detach()
-            tgt_commands_onehot = F.one_hot(svg_target[:, :, :1].long(), 4).squeeze()
-            tgt_args_denum = denumericalize(svg_target[:, :, 1:])
-            svg_target = torch.cat([tgt_commands_onehot, tgt_args_denum], dim=-1)
-            for i, one_gt_seq in enumerate(svg_target):
-                # gt_svg_outfile = os.path.join(os.path.join(dir_save, "svgs_single"), f"gt_{i:02d}.svg")
-                # gt_svg_f = open(gt_svg_outfile, 'w')
-                gt_svg = render(one_gt_seq.cpu().numpy())
-                # gt_svg_f.write(gt_svg)
-                syn_svg_merge_f.write(gt_svg)
-                # gt_svg_f.close()
-                if i > 0 and i % 13 == 12:
-                    syn_svg_merge_f.write('<br>')
-            syn_svg_merge_f.close()
-        return im
-def main():
-    opts = get_parser_main_model().parse_args()
-    opts.name_exp = opts.name_exp + '_' + opts.model_name
-    experiment_dir = os.path.join(f"{opts.exp_path}","experiments", opts.name_exp)
-    print(f"Testing on experiment {opts.name_exp}...")
-    # Dump options
-    test_main_model(opts)
-if __name__ == "__main__":
     main()

+import os
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torchvision.utils import save_image
+from dataloader import get_loader
+from models.model_main import ModelMain
+from models.transformers import denumericalize
+from options import get_parser_main_model
+from data_utils.svg_utils import render
+from models.util_funcs import svg2img, cal_iou
+from tqdm import tqdm
+from PIL import Image
+def test_main_model(opts):
+    if opts.streamlit:
+        import streamlit as st
+    if opts.dir_res:
+        os.mkdir(os.path.join(opts.dir_res, "results"))
+        dir_res = os.path.join(opts.dir_res, "results")
+    else:
+        dir_res = os.path.join(f"{opts.exp_path}", "experiments/", opts.name_exp, "results")
+    test_loader = get_loader(opts.data_root, opts.img_size, opts.language, opts.char_num, opts.max_seq_len, opts.dim_seq, opts.batch_size, 'test')
+    if torch.cuda.is_available():
+        device = torch.device("cuda")
+    else:
+        device = torch.device("cpu")
+    if opts.streamlit:
+        st.write("Loading Model Weight...")
+    model_main = ModelMain(opts)
+    path_ckpt = os.path.join(f"{opts.model_path}")
+    model_main.load_state_dict(torch.load(path_ckpt)['model'])
+    model_main.to(device)
+    model_main.eval()
+    with torch.no_grad():
+        for test_idx, test_data in enumerate(test_loader):
+            for key in test_data: test_data[key] = test_data[key].to(device)
+            print("testing font %04d ..."%test_idx)
+            dir_save = os.path.join(dir_res, "%04d"%test_idx)
+            if not os.path.exists(dir_save):
+                os.mkdir(dir_save)
+                os.mkdir(os.path.join(dir_save, "imgs"))
+                os.mkdir(os.path.join(dir_save, "svgs_single"))
+                os.mkdir(os.path.join(dir_save, "svgs_merge"))
+            svg_merge_dir = os.path.join(dir_save, "svgs_merge")
+            iou_max = np.zeros(opts.char_num)
+            idx_best_sample = np.zeros(opts.char_num)
+            # syn_svg_merge_f = open(os.path.join(svg_merge_dir, f"{opts.name_ckpt}_syn_merge_{test_idx}_rand_{sample_idx}.html"), 'w')
+            syn_svg_merge_f = open(os.path.join(svg_merge_dir, f"{opts.name_ckpt}_syn_merge_{test_idx}.html"), 'w')
+            for sample_idx in tqdm(range(opts.n_samples)):
+                ret_dict_test, loss_dict_test = model_main(test_data, mode='test')
+                svg_sampled = ret_dict_test['svg']['sampled_1']
+                sampled_svg_2 = ret_dict_test['svg']['sampled_2']
+                img_trg = ret_dict_test['img']['trg']
+                img_output = ret_dict_test['img']['out']
+                trg_seq_gt = ret_dict_test['svg']['trg']
+                img_sample_merge = torch.cat((img_trg.data, img_output.data), -2)
+                save_file_merge = os.path.join(dir_save, "imgs", f"merge_{opts.img_size}.png")
+                save_image(img_sample_merge, save_file_merge, nrow=8, normalize=True)
+                if opts.streamlit:
+                    st.progress((sample_idx+1)/opts.n_samples, f"Generating Font Sample {sample_idx+1} Please wait...")
+                    im = Image.open(save_file_merge)
+                    st.image(im, caption='img_sample_merge')
+                for char_idx in range(opts.char_num):
+                    img_gt = (1.0 - img_trg[char_idx,...]).data
+                    save_file_gt = os.path.join(dir_save,"imgs", f"{char_idx:02d}_gt.png")
+                    save_image(img_gt, save_file_gt, normalize=True)
+                    img_sample = (1.0 - img_output[char_idx,...]).data
+                    save_file = os.path.join(dir_save,"imgs", f"{char_idx:02d}_{opts.img_size}.png")
+                    save_image(img_sample, save_file, normalize=True)
+                # write results w/o parallel refinement
+                svg_dec_out = svg_sampled.clone().detach()
+                for i, one_seq in enumerate(svg_dec_out):
+                    syn_svg_outfile = os.path.join(os.path.join(dir_save, "svgs_single"), f"syn_{i:02d}_{sample_idx}_wo_refine.svg")
+                    syn_svg_f_ = open(syn_svg_outfile, 'w')
+                    try:
+                        svg = render(one_seq.cpu().numpy())
+                        syn_svg_f_.write(svg)
+                        # syn_svg_merge_f.write(svg)
+                        if i > 0 and i % 13 == 12:
+                            syn_svg_f_.write('<br>')
+                            # syn_svg_merge_f.write('<br>')
+                    except:
+                        continue
+                    syn_svg_f_.close()
+                # write results w/ parallel refinement
+                svg_dec_out = sampled_svg_2.clone().detach()
+                for i, one_seq in enumerate(svg_dec_out):
+                    syn_svg_outfile = os.path.join(os.path.join(dir_save, "svgs_single"), f"syn_{i:02d}_{sample_idx}_refined.svg")
+                    syn_svg_f = open(syn_svg_outfile, 'w')
+                    try:
+                        svg = render(one_seq.cpu().numpy())
+                        syn_svg_f.write(svg)
+                        #syn_svg_merge_f.write(svg)
+                        #if i > 0 and i % 13 == 12:
+                        #    syn_svg_merge_f.write('<br>')
+                    except:
+                        continue
+                    syn_svg_f.close()
+                    syn_img_outfile = syn_svg_outfile.replace('.svg', '.png')
+                    svg2img(syn_svg_outfile, syn_img_outfile, img_size=opts.img_size)
+                    iou_tmp, l1_tmp = cal_iou(syn_img_outfile, os.path.join(dir_save, "imgs", f"{i:02d}_{opts.img_size}.png"))
+                    iou_tmp = iou_tmp
+                    if iou_tmp > iou_max[i]:
+                        iou_max[i] = iou_tmp
+                        idx_best_sample[i] = sample_idx
+            for i in range(opts.char_num):
+                # print(idx_best_sample[i])
+                syn_svg_outfile_best = os.path.join(os.path.join(dir_save, "svgs_single"), f"syn_{i:02d}_{int(idx_best_sample[i])}_refined.svg")
+                syn_svg_merge_f.write(open(syn_svg_outfile_best, 'r').read())
+                if i > 0 and i % 13 == 12:
+                    syn_svg_merge_f.write('<br>')
+            svg_target = trg_seq_gt.clone().detach()
+            tgt_commands_onehot = F.one_hot(svg_target[:, :, :1].long(), 4).squeeze()
+            tgt_args_denum = denumericalize(svg_target[:, :, 1:])
+            svg_target = torch.cat([tgt_commands_onehot, tgt_args_denum], dim=-1)
+            for i, one_gt_seq in enumerate(svg_target):
+                # gt_svg_outfile = os.path.join(os.path.join(dir_save, "svgs_single"), f"gt_{i:02d}.svg")
+                # gt_svg_f = open(gt_svg_outfile, 'w')
+                gt_svg = render(one_gt_seq.cpu().numpy())
+                # gt_svg_f.write(gt_svg)
+                syn_svg_merge_f.write(gt_svg)
+                # gt_svg_f.close()
+                if i > 0 and i % 13 == 12:
+                    syn_svg_merge_f.write('<br>')
+            syn_svg_merge_f.close()
+        return im
+def main():
+    opts = get_parser_main_model().parse_args()
+    opts.name_exp = opts.name_exp + '_' + opts.model_name
+    experiment_dir = os.path.join(f"{opts.exp_path}","experiments", opts.name_exp)
+    print(f"Testing on experiment {opts.name_exp}...")
+    # Dump options
+    test_main_model(opts)
+if __name__ == "__main__":
     main()

train.py → ThaiVecFont/train.py RENAMED Viewed

@@ -1,216 +1,216 @@
-import os
-import random
-import numpy as np
-import shutil
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.optim import Adam, AdamW
-from torchvision.utils import save_image
-import wandb
-from dataloader import get_loader
-from models import util_funcs
-from models.model_main import ModelMain
-from options import get_parser_main_model
-from data_utils.svg_utils import render
-from time import time
-def setup_seed(seed):
-    torch.manual_seed(seed)
-    torch.cuda.manual_seed_all(seed)
-    np.random.seed(seed)
-    random.seed(seed)
-    torch.backends.cudnn.deterministic = True
-def train_main_model(opts):
-    setup_seed(opts.seed)
-    dir_exp = os.path.join(f"{opts.exp_path}", "experiments", opts.name_exp)
-    dir_sample = os.path.join(dir_exp, "samples")
-    dir_ckpt = os.path.join(dir_exp, "checkpoints")
-    dir_log = os.path.join(dir_exp, "logs")
-    logfile_train = open(os.path.join(dir_log, "train_loss_log.txt"), 'w')
-    logfile_val = open(os.path.join(dir_log, "val_loss_log.txt"), 'w')
-    train_loader = get_loader(opts.data_root, opts.img_size, opts.language, opts.char_num, opts.max_seq_len, opts.dim_seq, opts.batch_size, opts.mode)
-    val_loader = get_loader(opts.data_root, opts.img_size, opts.language, opts.char_num, opts.max_seq_len, opts.dim_seq, opts.batch_size_val, 'val')
-    run = wandb.init(project=opts.wandb_project_name, config=opts) # initialize wandb project
-    text_table = wandb.Table(columns=["epoch", "loss", "ref"])
-    model_main = ModelMain(opts)
-    if torch.cuda.is_available() and opts.multi_gpu:
-        model_main = torch.nn.DataParallel(model_main)
-    if opts.continue_training:
-        model_main.load_state_dict(torch.load(opts.continue_ckpt)['model'])
-    model_main.cuda()
-    parameters_all = [{"params": model_main.img_encoder.parameters()}, {"params": model_main.img_decoder.parameters()},
-                            {"params": model_main.modality_fusion.parameters()}, {"params": model_main.transformer_main.parameters()},
-                            {"params": model_main.transformer_seqdec.parameters()}]
-    optimizer = AdamW(parameters_all, lr=opts.lr, betas=(opts.beta1, opts.beta2), eps=opts.eps, weight_decay=opts.weight_decay)
-    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.997)
-    for epoch in range(opts.init_epoch, opts.n_epochs):
-        t0 = time()
-        for idx, data in enumerate(train_loader):
-            for key in data: data[key] = data[key].cuda()
-            ret_dict, loss_dict = model_main(data)
-            loss = opts.loss_w_l1 * loss_dict['img']['l1'] + opts.loss_w_pt_c * loss_dict['img']['vggpt'] + opts.kl_beta * loss_dict['kl'] \
-                    + loss_dict['svg']['total'] + loss_dict['svg_para']['total']
-            # perform optimization
-            optimizer.zero_grad()
-            loss.backward()
-            optimizer.step()
-            batches_done = epoch * len(train_loader) + idx + 1
-            message = (
-                f"Time: {'{} seconds'.format(time() - t0)}, "
-                f"Epoch: {epoch}/{opts.n_epochs}, Batch: {idx}/{len(train_loader)}, "
-                f"Loss: {loss.item():.6f}, "
-                f"img_l1_loss: {opts.loss_w_l1 * loss_dict['img']['l1'].item():.6f}, "
-                f"img_pt_c_loss: {opts.loss_w_pt_c * loss_dict['img']['vggpt']:.6f}, "
-                f"svg_total_loss: {loss_dict['svg']['total'].item():.6f}, "
-                f"svg_cmd_loss: {opts.loss_w_cmd * loss_dict['svg']['cmd'].item():.6f}, "
-                f"svg_args_loss: {opts.loss_w_args * loss_dict['svg']['args'].item():.6f}, "
-                f"svg_smooth_loss: {opts.loss_w_smt * loss_dict['svg']['smt'].item():.6f}, "
-                f"svg_aux_loss: {opts.loss_w_aux * loss_dict['svg']['aux'].item():.6f}, "
-                f"lr: {optimizer.param_groups[0]['lr']:.6f}, "
-                f"Step: {batches_done}"
-            )
-            if batches_done % opts.freq_log == 0:
-                logfile_train.write(message + '\n')
-                print(message)
-                if opts.wandb:
-                    # print("Running With Wandb")
-                    # Define the items for image and SVG losses
-                    loss_img_items = ['l1', 'vggpt']
-                    loss_svg_items = ['total', 'cmd', 'args', 'aux', 'smt']
-                    # Log image loss items
-                    for item in loss_img_items:
-                        wandb.log({f'Loss/img_{item}': loss_dict['img'][item].item()}, step=batches_done)
-                    # Log SVG loss items
-                    for item in loss_svg_items:
-                        wandb.log({f'Loss/svg_{item}': loss_dict['svg'][item].item()}, step=batches_done)
-                        wandb.log({f'Loss/svg_para_{item}': loss_dict['svg_para'][item].item()}, step=batches_done)
-                    # Log KL loss
-                    wandb.log({'Loss/img_kl_loss': opts.kl_beta * loss_dict['kl'].item()}, step=batches_done)
-                    wandb.log({
-                        'Images/trg_img': wandb.Image(ret_dict['img']['trg'][0], caption="Target"),
-                        'Images/img_output': wandb.Image(ret_dict['img']['out'][0], caption="Output")
-                    }, step=batches_done)
-                    text_table.add_data(epoch, loss, str(ret_dict['img']['ref'][0]))
-                    wandb.log({"training_samples" : text_table})
-            if opts.freq_sample > 0 and batches_done % opts.freq_sample == 0:
-                img_sample = torch.cat((ret_dict['img']['trg'].data, ret_dict['img']['out'].data), -2)
-                save_file = os.path.join(dir_sample, f"train_epoch_{epoch}_batch_{batches_done}.png")
-                save_image(img_sample, save_file, nrow=8, normalize=True)
-            if opts.freq_val > 0 and batches_done % opts.freq_val == 0:
-                with torch.no_grad():
-                    model_main.eval()
-                    loss_val = {'img':{'l1':0.0, 'vggpt':0.0}, 'svg':{'total':0.0, 'cmd':0.0, 'args':0.0, 'aux':0.0},
-                                'svg_para':{'total':0.0, 'cmd':0.0, 'args':0.0, 'aux':0.0}}
-                    for val_idx, val_data in enumerate(val_loader):
-                        for key in val_data: val_data[key] = val_data[key].cuda()
-                        ret_dict_val, loss_dict_val = model_main(val_data, mode='val')
-                        for loss_cat in ['img', 'svg']:
-                            for key, _ in loss_val[loss_cat].items():
-                                loss_val[loss_cat][key] += loss_dict_val[loss_cat][key]
-                    for loss_cat in ['img', 'svg']:
-                        for key, _ in loss_val[loss_cat].items():
-                            loss_val[loss_cat][key] /= len(val_loader)
-                    if opts.wandb:
-                        for loss_cat in ['img', 'svg']:
-                            # Iterate over keys and values in the loss dictionary
-                            for key, value in loss_val[loss_cat].items():
-                                # Log loss value to WandB
-                                wandb.log({f'VAL/loss_{loss_cat}_{key}': value})
-                    val_msg = (
-                        f"Epoch: {epoch}/{opts.n_epochs}, Batch: {idx}/{len(train_loader)}, "
-                        f"Val loss img l1: {loss_val['img']['l1']: .6f}, "
-                        f"Val loss img pt: {loss_val['img']['vggpt']: .6f}, "
-                        f"Val loss total: {loss_val['svg']['total']: .6f}, "
-                        f"Val loss cmd: {loss_val['svg']['cmd']: .6f}, "
-                        f"Val loss args: {loss_val['svg']['args']: .6f}, "
-                    )
-                    logfile_val.write(val_msg + "\n")
-                    print(val_msg)
-        scheduler.step()
-        if epoch % opts.freq_ckpt == 0 and epoch >= opts.threshold_ckpt:
-            if opts.multi_gpu:
-                print(f"Saved {dir_ckpt}/{epoch}_{batches_done}.ckpt")
-                torch.save({'model':model_main.module.state_dict(), 'opt':optimizer.state_dict(), 'n_epoch':epoch, 'n_iter':batches_done}, f'{dir_ckpt}/{epoch}_{batches_done}.ckpt')
-            else:
-                print(f"Saved {dir_ckpt}/{epoch}_{batches_done}.ckpt")
-                torch.save({'model':model_main.state_dict(), 'opt':optimizer.state_dict(), 'n_epoch':epoch, 'n_iter':batches_done}, f'{dir_ckpt}/{epoch}_{batches_done}.ckpt')
-            if opts.wandb:
-                artifact = wandb.Artifact('model_main_checkpoints', type='model')
-                artifact.add_file(f'{dir_ckpt}/{epoch}_{batches_done}.ckpt')
-                run.log_artifact(artifact)
-    logfile_train.close()
-    logfile_val.close()
-def backup_code(name_exp, exp_path):
-    os.makedirs(os.path.join(exp_path,'experiments', name_exp, 'code'), exist_ok=True)
-    shutil.copy('models/transformers.py', os.path.join(exp_path,'experiments', name_exp, 'code', 'transformers.py') )
-    shutil.copy('models/model_main.py', os.path.join(exp_path,'experiments', name_exp, 'code', 'model_main.py'))
-    shutil.copy('models/image_encoder.py', os.path.join(exp_path,'experiments', name_exp, 'code', 'image_encoder.py'))
-    shutil.copy('models/image_decoder.py', os.path.join(exp_path,'experiments', name_exp, 'code', 'image_decoder.py'))
-    shutil.copy('./train.py', os.path.join(exp_path,'experiments', name_exp, 'code', 'train.py'))
-    shutil.copy('./options.py', os.path.join(exp_path,'experiments', name_exp, 'code', 'options.py'))
-def train(opts):
-    if opts.model_name == 'main_model':
-        train_main_model(opts)
-    elif opts.model_name == 'others':
-        train_others(opts)
-    else:
-        raise NotImplementedError
-def main():
-    opts = get_parser_main_model().parse_args()
-    opts.name_exp = opts.name_exp + '_' + opts.model_name
-    os.makedirs(f"{opts.exp_path}/experiments", exist_ok=True)
-    debug = True
-    # Create directories
-    experiment_dir = os.path.join(f"{opts.exp_path}","experiments", opts.name_exp)
-    backup_code(opts.name_exp, opts.exp_path)
-    os.makedirs(experiment_dir, exist_ok=debug)  # False to prevent multiple train run by mistake
-    os.makedirs(os.path.join(experiment_dir, "samples"), exist_ok=True)
-    os.makedirs(os.path.join(experiment_dir, "checkpoints"), exist_ok=True)
-    os.makedirs(os.path.join(experiment_dir, "results"), exist_ok=True)
-    os.makedirs(os.path.join(experiment_dir, "logs"), exist_ok=True)
-    print(f"Training on experiment {opts.name_exp}...")
-    # Dump options
-    with open(os.path.join(experiment_dir, "opts.txt"), "w") as f:
-        for key, value in vars(opts).items():
-            f.write(str(key) + ": " + str(value) + "\n")
-    train(opts)
-if __name__ == "__main__":
-    main()

+import os
+import random
+import numpy as np
+import shutil
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.optim import Adam, AdamW
+from torchvision.utils import save_image
+import wandb
+from dataloader import get_loader
+from models import util_funcs
+from models.model_main import ModelMain
+from options import get_parser_main_model
+from data_utils.svg_utils import render
+from time import time
+def setup_seed(seed):
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    np.random.seed(seed)
+    random.seed(seed)
+    torch.backends.cudnn.deterministic = True
+def train_main_model(opts):
+    setup_seed(opts.seed)
+    dir_exp = os.path.join(f"{opts.exp_path}", "experiments", opts.name_exp)
+    dir_sample = os.path.join(dir_exp, "samples")
+    dir_ckpt = os.path.join(dir_exp, "checkpoints")
+    dir_log = os.path.join(dir_exp, "logs")
+    logfile_train = open(os.path.join(dir_log, "train_loss_log.txt"), 'w')
+    logfile_val = open(os.path.join(dir_log, "val_loss_log.txt"), 'w')
+    train_loader = get_loader(opts.data_root, opts.img_size, opts.language, opts.char_num, opts.max_seq_len, opts.dim_seq, opts.batch_size, opts.mode)
+    val_loader = get_loader(opts.data_root, opts.img_size, opts.language, opts.char_num, opts.max_seq_len, opts.dim_seq, opts.batch_size_val, 'val')
+    run = wandb.init(project=opts.wandb_project_name, config=opts) # initialize wandb project
+    text_table = wandb.Table(columns=["epoch", "loss", "ref"])
+    model_main = ModelMain(opts)
+    if torch.cuda.is_available() and opts.multi_gpu:
+        model_main = torch.nn.DataParallel(model_main)
+    if opts.continue_training:
+        model_main.load_state_dict(torch.load(opts.continue_ckpt)['model'])
+    model_main.cuda()
+    parameters_all = [{"params": model_main.img_encoder.parameters()}, {"params": model_main.img_decoder.parameters()},
+                            {"params": model_main.modality_fusion.parameters()}, {"params": model_main.transformer_main.parameters()},
+                            {"params": model_main.transformer_seqdec.parameters()}]
+    optimizer = AdamW(parameters_all, lr=opts.lr, betas=(opts.beta1, opts.beta2), eps=opts.eps, weight_decay=opts.weight_decay)
+    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.997)
+    for epoch in range(opts.init_epoch, opts.n_epochs):
+        t0 = time()
+        for idx, data in enumerate(train_loader):
+            for key in data: data[key] = data[key].cuda()
+            ret_dict, loss_dict = model_main(data)
+            loss = opts.loss_w_l1 * loss_dict['img']['l1'] + opts.loss_w_pt_c * loss_dict['img']['vggpt'] + opts.kl_beta * loss_dict['kl'] \
+                    + loss_dict['svg']['total'] + loss_dict['svg_para']['total']
+            # perform optimization
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+            batches_done = epoch * len(train_loader) + idx + 1
+            message = (
+                f"Time: {'{} seconds'.format(time() - t0)}, "
+                f"Epoch: {epoch}/{opts.n_epochs}, Batch: {idx}/{len(train_loader)}, "
+                f"Loss: {loss.item():.6f}, "
+                f"img_l1_loss: {opts.loss_w_l1 * loss_dict['img']['l1'].item():.6f}, "
+                f"img_pt_c_loss: {opts.loss_w_pt_c * loss_dict['img']['vggpt']:.6f}, "
+                f"svg_total_loss: {loss_dict['svg']['total'].item():.6f}, "
+                f"svg_cmd_loss: {opts.loss_w_cmd * loss_dict['svg']['cmd'].item():.6f}, "
+                f"svg_args_loss: {opts.loss_w_args * loss_dict['svg']['args'].item():.6f}, "
+                f"svg_smooth_loss: {opts.loss_w_smt * loss_dict['svg']['smt'].item():.6f}, "
+                f"svg_aux_loss: {opts.loss_w_aux * loss_dict['svg']['aux'].item():.6f}, "
+                f"lr: {optimizer.param_groups[0]['lr']:.6f}, "
+                f"Step: {batches_done}"
+            )
+            if batches_done % opts.freq_log == 0:
+                logfile_train.write(message + '\n')
+                print(message)
+                if opts.wandb:
+                    # print("Running With Wandb")
+                    # Define the items for image and SVG losses
+                    loss_img_items = ['l1', 'vggpt']
+                    loss_svg_items = ['total', 'cmd', 'args', 'aux', 'smt']
+                    # Log image loss items
+                    for item in loss_img_items:
+                        wandb.log({f'Loss/img_{item}': loss_dict['img'][item].item()}, step=batches_done)
+                    # Log SVG loss items
+                    for item in loss_svg_items:
+                        wandb.log({f'Loss/svg_{item}': loss_dict['svg'][item].item()}, step=batches_done)
+                        wandb.log({f'Loss/svg_para_{item}': loss_dict['svg_para'][item].item()}, step=batches_done)
+                    # Log KL loss
+                    wandb.log({'Loss/img_kl_loss': opts.kl_beta * loss_dict['kl'].item()}, step=batches_done)
+                    wandb.log({
+                        'Images/trg_img': wandb.Image(ret_dict['img']['trg'][0], caption="Target"),
+                        'Images/img_output': wandb.Image(ret_dict['img']['out'][0], caption="Output")
+                    }, step=batches_done)
+                    text_table.add_data(epoch, loss, str(ret_dict['img']['ref'][0]))
+                    wandb.log({"training_samples" : text_table})
+            if opts.freq_sample > 0 and batches_done % opts.freq_sample == 0:
+                img_sample = torch.cat((ret_dict['img']['trg'].data, ret_dict['img']['out'].data), -2)
+                save_file = os.path.join(dir_sample, f"train_epoch_{epoch}_batch_{batches_done}.png")
+                save_image(img_sample, save_file, nrow=8, normalize=True)
+            if opts.freq_val > 0 and batches_done % opts.freq_val == 0:
+                with torch.no_grad():
+                    model_main.eval()
+                    loss_val = {'img':{'l1':0.0, 'vggpt':0.0}, 'svg':{'total':0.0, 'cmd':0.0, 'args':0.0, 'aux':0.0},
+                                'svg_para':{'total':0.0, 'cmd':0.0, 'args':0.0, 'aux':0.0}}
+                    for val_idx, val_data in enumerate(val_loader):
+                        for key in val_data: val_data[key] = val_data[key].cuda()
+                        ret_dict_val, loss_dict_val = model_main(val_data, mode='val')
+                        for loss_cat in ['img', 'svg']:
+                            for key, _ in loss_val[loss_cat].items():
+                                loss_val[loss_cat][key] += loss_dict_val[loss_cat][key]
+                    for loss_cat in ['img', 'svg']:
+                        for key, _ in loss_val[loss_cat].items():
+                            loss_val[loss_cat][key] /= len(val_loader)
+                    if opts.wandb:
+                        for loss_cat in ['img', 'svg']:
+                            # Iterate over keys and values in the loss dictionary
+                            for key, value in loss_val[loss_cat].items():
+                                # Log loss value to WandB
+                                wandb.log({f'VAL/loss_{loss_cat}_{key}': value})
+                    val_msg = (
+                        f"Epoch: {epoch}/{opts.n_epochs}, Batch: {idx}/{len(train_loader)}, "
+                        f"Val loss img l1: {loss_val['img']['l1']: .6f}, "
+                        f"Val loss img pt: {loss_val['img']['vggpt']: .6f}, "
+                        f"Val loss total: {loss_val['svg']['total']: .6f}, "
+                        f"Val loss cmd: {loss_val['svg']['cmd']: .6f}, "
+                        f"Val loss args: {loss_val['svg']['args']: .6f}, "
+                    )
+                    logfile_val.write(val_msg + "\n")
+                    print(val_msg)
+        scheduler.step()
+        if epoch % opts.freq_ckpt == 0 and epoch >= opts.threshold_ckpt:
+            if opts.multi_gpu:
+                print(f"Saved {dir_ckpt}/{epoch}_{batches_done}.ckpt")
+                torch.save({'model':model_main.module.state_dict(), 'opt':optimizer.state_dict(), 'n_epoch':epoch, 'n_iter':batches_done}, f'{dir_ckpt}/{epoch}_{batches_done}.ckpt')
+            else:
+                print(f"Saved {dir_ckpt}/{epoch}_{batches_done}.ckpt")
+                torch.save({'model':model_main.state_dict(), 'opt':optimizer.state_dict(), 'n_epoch':epoch, 'n_iter':batches_done}, f'{dir_ckpt}/{epoch}_{batches_done}.ckpt')
+            if opts.wandb:
+                artifact = wandb.Artifact('model_main_checkpoints', type='model')
+                artifact.add_file(f'{dir_ckpt}/{epoch}_{batches_done}.ckpt')
+                run.log_artifact(artifact)
+    logfile_train.close()
+    logfile_val.close()
+def backup_code(name_exp, exp_path):
+    os.makedirs(os.path.join(exp_path,'experiments', name_exp, 'code'), exist_ok=True)
+    shutil.copy('models/transformers.py', os.path.join(exp_path,'experiments', name_exp, 'code', 'transformers.py') )
+    shutil.copy('models/model_main.py', os.path.join(exp_path,'experiments', name_exp, 'code', 'model_main.py'))
+    shutil.copy('models/image_encoder.py', os.path.join(exp_path,'experiments', name_exp, 'code', 'image_encoder.py'))
+    shutil.copy('models/image_decoder.py', os.path.join(exp_path,'experiments', name_exp, 'code', 'image_decoder.py'))
+    shutil.copy('./train.py', os.path.join(exp_path,'experiments', name_exp, 'code', 'train.py'))
+    shutil.copy('./options.py', os.path.join(exp_path,'experiments', name_exp, 'code', 'options.py'))
+def train(opts):
+    if opts.model_name == 'main_model':
+        train_main_model(opts)
+    elif opts.model_name == 'others':
+        train_others(opts)
+    else:
+        raise NotImplementedError
+def main():
+    opts = get_parser_main_model().parse_args()
+    opts.name_exp = opts.name_exp + '_' + opts.model_name
+    os.makedirs(f"{opts.exp_path}/experiments", exist_ok=True)
+    debug = True
+    # Create directories
+    experiment_dir = os.path.join(f"{opts.exp_path}","experiments", opts.name_exp)
+    backup_code(opts.name_exp, opts.exp_path)
+    os.makedirs(experiment_dir, exist_ok=debug)  # False to prevent multiple train run by mistake
+    os.makedirs(os.path.join(experiment_dir, "samples"), exist_ok=True)
+    os.makedirs(os.path.join(experiment_dir, "checkpoints"), exist_ok=True)
+    os.makedirs(os.path.join(experiment_dir, "results"), exist_ok=True)
+    os.makedirs(os.path.join(experiment_dir, "logs"), exist_ok=True)
+    print(f"Training on experiment {opts.name_exp}...")
+    # Dump options
+    with open(os.path.join(experiment_dir, "opts.txt"), "w") as f:
+        for key, value in vars(opts).items():
+            f.write(str(key) + ": " + str(value) + "\n")
+    train(opts)
+if __name__ == "__main__":
+    main()