Spaces:
Runtime error
Runtime error
File size: 5,617 Bytes
db5855f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
import json
import pathlib
import argparse
import re
TABLE_OF_CONTENT = r"#+\s+Table of content:?"
def find_tc_in_cell(cell):
tc_cell = None
tc_line_number = None
for i, line in enumerate(cell["source"]):
if re.match(TABLE_OF_CONTENT, line):
tc_cell = cell
tc_line_number = i
break
return tc_cell, tc_line_number
def create_title_for_tc(title):
title_for_tc = title.lstrip("#").lstrip()
title_for_tc = re.sub(r"[\[\]\n]", "", title_for_tc)
title_for_tc = re.sub(r"\(http.*\)", "", title_for_tc)
return title_for_tc
def create_link_for_tc(title):
link = re.sub(r"[`$^]", "", title)
link = link.replace(" ", "-")
return link
def remove_old_tc(cell, idx):
if cell is not None:
for line in cell["source"][idx:]:
if re.match(r"\s*-\s*\[.*\]\(#.*\).*", line) or re.match(TABLE_OF_CONTENT, line):
cell["source"].remove(line)
return cell
def get_tc_line(title, title_for_tc, link, tc_list, titles_list):
# calc indents for Table of content
try:
indents_num = (title.index(" ") - 2) * 4
except:
indents_num = -1
if len(tc_list) == 0 or indents_num < 0:
# when first list item have more than 1 indents the alignment would be broken
indents_num = 0
elif indents_num - tc_list[-1].index("-") > 4:
# when previous list item have n indents and current have n+4+1 it broke the alignment
indents_num = tc_list[-1].index("-") + 4
elif indents_num != tc_list[-1].index("-") and title.index(" ") == titles_list[-1].index(" "):
# when we have several titles with same wrong alignments
indents_num = tc_list[-1].index("-")
indents = " " * indents_num + "-" + " "
line = f"{indents}[{title_for_tc}](#{link})\n"
return line
def is_ref_to_top_exists(cell, idx):
ref_exists = False
for row in cell[idx + 1 :]:
row = row.strip()
if "[back to top ⬆️](#Table-of-content" in row:
ref_exists = True
break
elif row != "":
# content of block started
break
return ref_exists
def is_markdown(cell):
return "markdown" == cell["cell_type"]
def is_title(line):
return line.strip().startswith("#") and line.strip().lstrip("#").lstrip()
def generate_table_of_content(notebook_path: pathlib.Path):
table_of_content = []
table_of_content_cell = None
table_of_content_cell_idx = None
with open(notebook_path, "r", encoding="utf-8") as notebook_file:
notebook_json = json.load(notebook_file)
if not notebook_json["cells"]:
return
table_of_content_cell, table_of_content_cell_idx = find_tc_in_cell(notebook_json["cells"][0])
all_titles = []
for cell in filter(is_markdown, notebook_json["cells"][1:]):
if table_of_content_cell is None:
table_of_content_cell, table_of_content_cell_idx = find_tc_in_cell(cell)
if not table_of_content_cell is None:
continue
titles = [line for line in cell["source"] if is_title(line)]
for title in titles:
idx = cell["source"].index(title)
if not is_ref_to_top_exists(cell["source"], idx):
if not title.endswith("\n"):
cell["source"].insert(idx, title + "\n")
cell["source"].insert(idx + 1, "[back to top ⬆️](#Table-of-contents:)\n")
cell["source"].insert(idx + 2, "")
title = title.strip()
title_for_tc = create_title_for_tc(title)
link_for_tc = create_link_for_tc(title_for_tc)
new_line = get_tc_line(title, title_for_tc, link_for_tc, table_of_content, all_titles)
if table_of_content.count(new_line) > 1:
print(
f'WARINING: the title "{title_for_tc}" has already used in titles.\n'
+ "Navigation will work inccorect, the link will only point to "
+ "the first encountered title"
)
table_of_content.append(new_line)
all_titles.append(title)
table_of_content = ["\n", "#### Table of contents:\n\n"] + table_of_content + ["\n"]
if table_of_content_cell is not None:
table_of_content_cell = remove_old_tc(table_of_content_cell, table_of_content_cell_idx)
if table_of_content_cell is not None:
table_of_content_cell["source"].extend(table_of_content)
else:
notebook_json["cells"][0]["source"].extend(table_of_content)
with open(notebook_path, "w", encoding="utf-8") as in_f:
json.dump(notebook_json, in_f, ensure_ascii=False, indent=1)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-s",
"--source",
help="Please, specify notebook or folder with notebooks.\
Table of content will be added or modified in each.",
required=True,
)
args = parser.parse_args()
path_to_source = pathlib.Path(args.source)
if not path_to_source.exists():
print(f"Incorrect path to notebook(s) {path_to_source}")
exit()
elif path_to_source.is_file():
generate_table_of_content(path_to_source)
elif path_to_source.is_dir():
for notebook in path_to_source.glob("**/*.ipynb"):
generate_table_of_content(notebook)
|