Spaces:
Runtime error
Runtime error
import json | |
import pathlib | |
import argparse | |
import re | |
TABLE_OF_CONTENT = r"#+\s+Table of content:?" | |
def find_tc_in_cell(cell): | |
tc_cell = None | |
tc_line_number = None | |
for i, line in enumerate(cell["source"]): | |
if re.match(TABLE_OF_CONTENT, line): | |
tc_cell = cell | |
tc_line_number = i | |
break | |
return tc_cell, tc_line_number | |
def create_title_for_tc(title): | |
title_for_tc = title.lstrip("#").lstrip() | |
title_for_tc = re.sub(r"[\[\]\n]", "", title_for_tc) | |
title_for_tc = re.sub(r"\(http.*\)", "", title_for_tc) | |
return title_for_tc | |
def create_link_for_tc(title): | |
link = re.sub(r"[`$^]", "", title) | |
link = link.replace(" ", "-") | |
return link | |
def remove_old_tc(cell, idx): | |
if cell is not None: | |
for line in cell["source"][idx:]: | |
if re.match(r"\s*-\s*\[.*\]\(#.*\).*", line) or re.match(TABLE_OF_CONTENT, line): | |
cell["source"].remove(line) | |
return cell | |
def get_tc_line(title, title_for_tc, link, tc_list, titles_list): | |
# calc indents for Table of content | |
try: | |
indents_num = (title.index(" ") - 2) * 4 | |
except: | |
indents_num = -1 | |
if len(tc_list) == 0 or indents_num < 0: | |
# when first list item have more than 1 indents the alignment would be broken | |
indents_num = 0 | |
elif indents_num - tc_list[-1].index("-") > 4: | |
# when previous list item have n indents and current have n+4+1 it broke the alignment | |
indents_num = tc_list[-1].index("-") + 4 | |
elif indents_num != tc_list[-1].index("-") and title.index(" ") == titles_list[-1].index(" "): | |
# when we have several titles with same wrong alignments | |
indents_num = tc_list[-1].index("-") | |
indents = " " * indents_num + "-" + " " | |
line = f"{indents}[{title_for_tc}](#{link})\n" | |
return line | |
def is_ref_to_top_exists(cell, idx): | |
ref_exists = False | |
for row in cell[idx + 1 :]: | |
row = row.strip() | |
if "[back to top ⬆️](#Table-of-content" in row: | |
ref_exists = True | |
break | |
elif row != "": | |
# content of block started | |
break | |
return ref_exists | |
def is_markdown(cell): | |
return "markdown" == cell["cell_type"] | |
def is_title(line): | |
return line.strip().startswith("#") and line.strip().lstrip("#").lstrip() | |
def generate_table_of_content(notebook_path: pathlib.Path): | |
table_of_content = [] | |
table_of_content_cell = None | |
table_of_content_cell_idx = None | |
with open(notebook_path, "r", encoding="utf-8") as notebook_file: | |
notebook_json = json.load(notebook_file) | |
if not notebook_json["cells"]: | |
return | |
table_of_content_cell, table_of_content_cell_idx = find_tc_in_cell(notebook_json["cells"][0]) | |
all_titles = [] | |
for cell in filter(is_markdown, notebook_json["cells"][1:]): | |
if table_of_content_cell is None: | |
table_of_content_cell, table_of_content_cell_idx = find_tc_in_cell(cell) | |
if not table_of_content_cell is None: | |
continue | |
titles = [line for line in cell["source"] if is_title(line)] | |
for title in titles: | |
idx = cell["source"].index(title) | |
if not is_ref_to_top_exists(cell["source"], idx): | |
if not title.endswith("\n"): | |
cell["source"].insert(idx, title + "\n") | |
cell["source"].insert(idx + 1, "[back to top ⬆️](#Table-of-contents:)\n") | |
cell["source"].insert(idx + 2, "") | |
title = title.strip() | |
title_for_tc = create_title_for_tc(title) | |
link_for_tc = create_link_for_tc(title_for_tc) | |
new_line = get_tc_line(title, title_for_tc, link_for_tc, table_of_content, all_titles) | |
if table_of_content.count(new_line) > 1: | |
print( | |
f'WARINING: the title "{title_for_tc}" has already used in titles.\n' | |
+ "Navigation will work inccorect, the link will only point to " | |
+ "the first encountered title" | |
) | |
table_of_content.append(new_line) | |
all_titles.append(title) | |
table_of_content = ["\n", "#### Table of contents:\n\n"] + table_of_content + ["\n"] | |
if table_of_content_cell is not None: | |
table_of_content_cell = remove_old_tc(table_of_content_cell, table_of_content_cell_idx) | |
if table_of_content_cell is not None: | |
table_of_content_cell["source"].extend(table_of_content) | |
else: | |
notebook_json["cells"][0]["source"].extend(table_of_content) | |
with open(notebook_path, "w", encoding="utf-8") as in_f: | |
json.dump(notebook_json, in_f, ensure_ascii=False, indent=1) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"-s", | |
"--source", | |
help="Please, specify notebook or folder with notebooks.\ | |
Table of content will be added or modified in each.", | |
required=True, | |
) | |
args = parser.parse_args() | |
path_to_source = pathlib.Path(args.source) | |
if not path_to_source.exists(): | |
print(f"Incorrect path to notebook(s) {path_to_source}") | |
exit() | |
elif path_to_source.is_file(): | |
generate_table_of_content(path_to_source) | |
elif path_to_source.is_dir(): | |
for notebook in path_to_source.glob("**/*.ipynb"): | |
generate_table_of_content(notebook) | |