me / blog.py
nbroad's picture
nbroad HF staff
add utteranc.es
281ac5e verified
from app import *
import yaml
import functools
from pathlib import Path
import re
file_path = Path(__file__).parent
NUM_RECENT_BLOGS = 20
def full_page():
secs = Sections(["Recent Blogs"], [Div(*[blog_preview(blog_id) for blog_id in sorted_blogs[:NUM_RECENT_BLOGS]])])
return BstPage(2, '', *secs)
def blog_preview(blog_id):
details = all_blogs[blog_id]
return Div(
A(H3(details[0]["title"]), href=f"/blog/{blog_id}"),
P(details[0].get("date_published", "")),
P(details[0].get("desc", "")+"...")
)
@functools.lru_cache()
def get_blogs():
blogs = (file_path / "blogs").rglob("*.md")
blog_dict = {}
for blog in blogs:
with open(blog, 'r') as f:
id_ = blog.stem
text = f.read()
if "---" not in text:
continue
metadata, markdown = text.split("---", 2)[1:]
metadata = yaml.safe_load(metadata)
metadata["id"] = id_
blog_dict[id_] = (metadata, markdown)
blog_dict = {k:v for k, v in blog_dict.items() if v[0].get("date_published", "") != ""}
sorted_blogs = [x[0]["id"] for x in sorted(blog_dict.values(), key=lambda x: x[0].get("date_published"), reverse=True)]
return blog_dict, sorted_blogs
all_blogs, sorted_blogs = get_blogs()
def parse_markdown_sections(markdown_text):
"""
Extracts main headers (h1) and their associated content from markdown text.
Preserves all subsection headers (##, ###, etc.) in the content.
Args:
markdown_text (str): The markdown text to parse
Returns:
tuple: (headers, contents) where headers is a list of h1 headers
and contents is a list of the text content under each header
"""
# Split the text into lines
lines = markdown_text.strip().split('\n\n')
headers = []
contents = []
current_content = []
for line in lines:
# Check if line is a main header (h1) - exactly one #
h1_match = re.match(r'^#\s+(.+)$', line.strip())
if h1_match:
# If we have accumulated content, save it for the previous header
if current_content and headers:
contents.append('\n\n'.join(current_content).strip())
current_content = []
# Add the new header
headers.append(h1_match.group(1))
else:
# Add all other lines (including subsection headers) to content
if line.strip():
current_content.append(line)
# Add the last section's content
if current_content and headers:
contents.append('\n\n'.join(current_content).strip())
return headers, contents
def FullWidthImage(src, alt=None, sz:SizeT=SizeT.Sm, caption=None, capcls='', pad=2, left=True, cls='', retina=True, **kw):
place = 'start' if left else 'end'
if retina: kw['srcset'] = f'{src} 2x'
return Figure(
Img(src=src, alt=alt,
cls=f'figure-img img-fluid {cls}', **kw),
Figcaption(caption, cls=f'caption-{sz} {capcls} text-center'),
cls=f'd-sm-table mx-{sz}-{pad+1} my-{sz}-{pad}')
def split_blog(text, figs=None):
# For each blog, create a list of sections.
# Each fig needs to be added.
# fig = Image('/assets/webdev.jpg', alt='Web dev', caption=caption, left=False)
# h2s = 'Getting started', 'Background', 'Current Status'
# txts = [Markdown(s1), Div(fig, Markdown(s2)), Markdown(s3)]
headers, contents = parse_markdown_sections(text)
if figs is None:
return headers, [Markdown(c) for c in contents]
# match fig names such as <|img|>
pattern = r"<\|([a-z_]+)\|>"
sections = []
idx = 0
while idx < len(contents):
matches = [x for x in re.finditer(pattern, contents[idx])]
if len(matches):
temp_divs = []
prev = None
for match in matches:
fig_name = match.group(1)
cls_bonus = "" if figs[fig_name].get("full_width", False) else "mx-auto d-block"
if figs[fig_name].get("full_width", False):
image_class = FullWidthImage
else:
image_class = Image
fig = image_class(
src=figs[fig_name]["src"],
alt=figs[fig_name].get("alt", ""),
caption=figs[fig_name].get("caption", ""),
left=figs[fig_name].get("left", False),
sz=figs[fig_name].get("sz", "sm"),
cls=figs[fig_name].get("cls", "") + " " + cls_bonus,
pad=2,
)
if figs[fig_name].get("full_width", False):
fig = Div(fig, cls="d-flex justify-content-center my-4")
if prev is None:
before = contents[idx][:match.start()]
else:
before = contents[idx][prev.end():match.start()]
temp_divs.append(Markdown(before))
temp_divs.append(fig)
prev = match
sections.append(Div(*temp_divs, Markdown(contents[idx][prev.end():])))
else:
sections.append(Markdown(contents[idx]))
idx += 1
return headers, sections
utterances = Script(
src="https://utteranc.es/client.js",
repo="nbroad1881/site",
issue_term="pathname",
theme="github-light",
crossorigin="anonymous",
**{"async": True}
)
def single_blog(blog_id):
"""
Return a single blog post.
"""
metadata, md_text = all_blogs[blog_id]
headers, sections = split_blog(f"# {metadata['title']}\n{md_text}", metadata.get("figs", None))
# import pdb; pdb.set_trace()
secs = Sections(headers, sections)
return BstPage(2, "", *[*secs, utterances])