Spaces:

microhum
/

ThaiVecFont

Sleeping

App Files Files Community

ThaiVecFont / data_utils /convert_ttf_to_sfd.py

microhum

rm dockerfile

024aa56 8 months ago

raw

history blame

4.25 kB

	import fontforge # noqa
	import os
	import sys
	from tqdm import tqdm
	import multiprocessing as mp
	import argparse


	def convert_mp(opts):
	"""Useing multiprocessing to convert all fonts to sfd files"""

	charset_th = open(f"{opts.data_path}/char_set/{opts.language}.txt", 'r').read()
	charset = charset_th
	if opts.ref_nshot == 52:
	charset_eng = open(f"{opts.data_path}/char_set/eng.txt", 'r').read()
	charset = charset_th + charset_eng
	charset_lenw = len(str(len(charset)))
	fonts_file_path = os.path.join(opts.ttf_path, opts.language) # opts.ttf_path,opts.language,
	sfd_path = os.path.join(opts.sfd_path, opts.language)
	print(os.path.join(fonts_file_path, opts.split))
	for root, dirs, files in os.walk(os.path.join(fonts_file_path, opts.split)):
	ttf_fnames = files
	print(ttf_fnames)

	font_num = len(ttf_fnames)
	process_num = mp.cpu_count() - 1
	font_num_per_process = font_num // process_num + 1

	def process(process_id, font_num_p_process):
	for i in tqdm(range(process_id * font_num_p_process, (process_id + 1) * font_num_p_process)):
	if i >= font_num:
	break

	font_id = ttf_fnames[i].split('.')[0]
	split = opts.split
	font_name = ttf_fnames[i]

	font_file_path = os.path.join(fonts_file_path, split, font_name)
	try:
	cur_font = fontforge.open(font_file_path)
	except Exception as e:
	print('Cannot open ', font_name)
	print(e)
	continue

	target_dir = os.path.join(sfd_path, split, "{}".format(font_id))
	if not os.path.exists(target_dir):
	os.makedirs(target_dir)

	for char_id, char in enumerate(charset):
	try:
	char_description = open(os.path.join(target_dir, '{}_{num:0{width}}.txt'.format(font_id, num=char_id, width=charset_lenw)), 'w')
	if char in charset_th:
	char = 'uni' + char.encode("unicode_escape")[2:].decode("utf-8")

	cur_font.selection.select(char)
	cur_font.copy()

	new_font_for_char = fontforge.font()
	# new_font_for_char.ascent = 750
	# new_font_for_char.descent = 250
	# new_font_for_char.em = new_font_for_char.ascent + new_font_for_char.descent
	char = 'A'

	new_font_for_char.selection.select(char)
	new_font_for_char.paste()
	new_font_for_char.fontname = "{}_".format(font_id) + font_name

	new_font_for_char.save(os.path.join(target_dir, '{}_{num:0{width}}.sfd'.format(font_id, num=char_id, width=charset_lenw)))

	char_description.write(str(ord(char)) + '\n')
	char_description.write(str(new_font_for_char[char].width) + '\n')
	char_description.write(str(new_font_for_char[char].vwidth) + '\n')
	char_description.write('{num:0{width}}'.format(num=char_id, width=charset_lenw) + '\n')
	char_description.write('{}'.format(font_id))
	# print('{}_{num:0{width}}.sfd'.format(font_id, num=char_id, width=charset_lenw))
	char_description.close()
	except Exception as e:
	print("Found Error:", font_id, font_name ,char_id, char)
	print(e)

	cur_font.close()

	processes = [mp.Process(target=process, args=(pid, font_num_per_process)) for pid in range(process_num)]

	for p in processes:
	p.start()
	for p in processes:
	p.join()


	def main():
	parser = argparse.ArgumentParser(description="Convert ttf fonts to sfd fonts")
	parser.add_argument("--language", type=str, default='eng', choices=['eng', 'chn', 'tha'])
	parser.add_argument("--data_path", type=str, default='./Font_Dataset', help="Path to Dataset")
	parser.add_argument("--ttf_path", type=str, default='../data/font_ttfs')
	parser.add_argument('--sfd_path', type=str, default='../data/font_sfds')
	parser.add_argument('--split', type=str, default='train')
	opts = parser.parse_args()
	convert_mp(opts)

	if __name__ == "__main__":
	main()