Here is some stuff we’ll need:

import json
import os
from contextlib import chdir
from mimetypes import add_type, guess_type
from pathlib import Path
from shutil import copyfile
from subprocess import PIPE, run
from typing import Iterator
from urllib.parse import quote
from urllib.request import urlopen

import bibtexparser
import pandas as pd
import toolz.curried as tz
from pandas import DataFrame, Series, Timestamp
from slugify import slugify
from tqdm.auto import tqdm

Root URL of the site. In general, the build process attempts to use relative URIs everywhere. This is currently only used for the feed generation.

SITEURL = "https://danielgrady.net"

Support files for Pandoc: filters and templates.

PANDOCDATA = str(Path.cwd() / "pandoc-data")
PANDOCDATA

The path to the hierarchy of source files.

ROOT = Path.cwd().parent.parent
ROOT

BibLaTeX bibliography.

REF = ROOT / "ref/references.bib"

Directory to write out the built site.

OUT = ROOT / ".build"
OUT.mkdir(exist_ok=True)
OUT

Directory for cache files. It would probably make more sense to make this a sibling of the build directory, but the Pandoc filter needs to be able to find this directory easily and I’m too lazy to figure out how to pass it as a parameter to the filter.

CACHE = os.environ.get("XDG_CACHE_HOME", str(Path.home().absolute() / ".cache"))
CACHE = Path(CACHE) / "blarg"
CACHE.mkdir(exist_ok=True)
CACHE

Path to use for automatically generated BibLaTeX entries.

REFAUTO = CACHE / "references-auto.bib"

Blarg ignores files and directories with leading dots, and it also ignores the following top-level directories:

IGNOREDIRS = [str(ROOT / p) for p in ["ref", "template"]]

add_type("text/markdown", ".md")
add_type("text/markdown", ".mdown")
add_type("text/markdown", ".markdown")
add_type("text/x-org", ".org")
add_type("application/ipynb+json", ".ipynb")

This dictionary maps the IANA mediatypes that Blarg considers to be “documents” to the Pandoc reader format string to use for parsing the document.

As of Pandoc 3.7.0.2, it seems like Pandoc has a different set of default extensions for Markdown in a standalone file and Markdown cells within a Jupyter notebook. (For example fenced divs and inline footnotes don’t work by default.) I couldn’t find a quick way to use same set of extensions. The list below originally came from running pandoc --list-extensions=markdown and keeping only the lines starting with a +.

pandoc_markdown_extensions = [
    "+all_symbols_escapable",
    "+auto_identifiers",
    "+backtick_code_blocks",
    "+blank_before_blockquote",
    "+blank_before_header",
    "+bracketed_spans",
    "+citations",
    "+definition_lists",
    "+escaped_line_breaks",
    "+example_lists",
    "+fancy_lists",
    "+fenced_code_attributes",
    "+fenced_code_blocks",
    "+fenced_divs",
    "+footnotes",
    "+grid_tables",
    "+header_attributes",
    "+implicit_figures",
    "+implicit_header_references",
    "+inline_code_attributes",
    "+inline_notes",
    "+intraword_underscores",
    "+latex_macros",
    "+line_blocks",
    "+link_attributes",
    "+markdown_in_html_blocks",
    "+multiline_tables",
    "+native_divs",
    "+native_spans",
    "+pandoc_title_block",
    "+pipe_tables",
    "+raw_attribute",
    "+raw_html",
    "+raw_tex",
    "+shortcut_reference_links",
    "+simple_tables",
    "+smart",
    "+space_in_atx_header",
    "+startnum",
    "+strikeout",
    "+subscript",
    "+superscript",
    "+task_lists",
    "+table_captions",
    "+tex_math_dollars",
    "+wikilinks_title_after_pipe",
    "+yaml_metadata_block",
]
pandoc_markdown_extensions = "".join(pandoc_markdown_extensions)

DOCUMENT_MEDIATYPES = {
    "text/markdown": "markdown" + pandoc_markdown_extensions,
    "application/ipynb+json": "ipynb" + pandoc_markdown_extensions,
    "text/x-org": "org",
}

“Indexing” an individual file just means looking up file-level metadata from the filesystem, and guessing what media type the file is using the standard library’s tool.

def load_file_metadata(p: Path) -> dict:
    """
    General metadata for a file

    Fields are named as in `stat`:

    - st_birthtime: when the file was created
    - st_atime: last access time
    - st_mtime: file contents modified
    - st_ctime: on macOS, file metadata modified
    """
    stat = p.stat()
    result = dict()
    mediatype, compression = guess_type(p)
    tmp = p.stat()
    result.update(
        {
            "path": p,
            "mediatype": mediatype,
            "compression": compression,
            "size": stat.st_size,
            "st_birthtime": tmp.st_birthtime,
            "st_atime": tmp.st_atime,
            "st_mtime": tmp.st_mtime,
            "st_ctime": tmp.st_ctime,
        }
    )
    return result

Index all files under the root, ignoring directories and files with leading dots.

def files_under(root: Path) -> Iterator[Path]:
    """
    Yield paths to files in the hierarchy at `root`

    Yield only files, not directories

    Ignore files and directories with a leading dot
    """
    # This relies on a weird but documented and recommended behavior - modify the list of subdirs
    # inside the loop to inform `os.walk` to avoid certain subdirectories.
    for directory, subdirs, files in os.walk(root):
        if directory in IGNOREDIRS:
            continue
        hidden_subdirs = [p for p in subdirs if p.startswith(".")]
        for p in hidden_subdirs:
            subdirs.remove(p)
        
        hidden_files = [p for p in files if p.startswith(".")]
        for p in hidden_files:
            files.remove(p)

        dp = Path(directory)
        for file in files:
            yield dp.joinpath(file)

def index_tree(root: Path) -> DataFrame:
    """
    Create an index of files under ``root``

    Get filesystem metadata for each file, as well as inferred mimetypes and compression
    """
    idx = list()
    for p in files_under(root):
        idx.append(load_file_metadata(p))
    idx = DataFrame(idx)
    idx.insert(0, "relpath", idx["path"].apply(lambda p: p.relative_to(root)))
    return idx

idx = index_tree(ROOT)

Ignore certain kinds of files.

mask = idx["path"].apply(lambda p: p.suffix in (".canvas", ".pxm"))
idx = idx[~mask].set_index("relpath", drop=False).sort_index().copy()

Create asset and document indexes

Files are either assests, or documents.

Assets will be just copied to the site directory, with some slight modification to their parent path.

A document has additional, arbitrary, metadata from the file’s frontmatter, and Blarg will additionally infer or adjust some metadata.

Documents can contain hyperlinks (point internally or externally), wiki links (point internally, fuzzy search), and citations.

Citations are identified with cite keys. A cite key is a URI, and might be listed in the bibliography.

is_doc = idx["mediatype"].isin(DOCUMENT_MEDIATYPES)

Yes. This is a good name.

assidx = idx[~is_doc].copy()
docidx = idx[is_doc].copy()

Documents have all the same indexing information as assets, and get other stuff in addition.

def load_document_metadata(p: Path, mediatype: str) -> dict:
    """
    Get metadata for a document

    This loads the information the document records about itself. The filesystem has other things to
    say about the file containing the document, not handled here.

    This function uses Pandoc to extract YAML front matter, and also a mapping that includes all
    cite keys, URL link targets, and eventually other things.

    The trick to making this work is using a Pandoc template that contains nothing except the
    `meta-json` template variable.
    """
    # fmt: off
    args = [
        "pandoc",
        "--from", DOCUMENT_MEDIATYPES[mediatype],
        "--to", "commonmark", "--standalone",
        "--data-dir", PANDOCDATA,
        "--template", "metadata.pandoctemplate",
        "--lua-filter", "analyze-document.lua",
        str(p),
    ]
    # fmt: on
    proc = run(args, check=True, stdout=PIPE)
    frontmatter = json.loads(proc.stdout)
    docmap = frontmatter["docmap"]
    del frontmatter["docmap"]
    for _, stuff in docmap.items():
        stuff["order"] = int(stuff["order"])
        stuff["level"] = int(stuff["level"])
    result = {"fm": frontmatter, "docmap": docmap}
    return result

entry = docidx.iloc[0]
load_document_metadata(entry["path"], entry["mediatype"])

tmp = {p: None for p in docidx["relpath"]}
for _, entry in tqdm(docidx.iterrows(), total=len(docidx)):
    p = entry["path"]
    mt = entry["mediatype"]
    tmp[entry["relpath"]] = load_document_metadata(p, mt)
docmeta = Series(tmp)

docidx["frontmatter"] = docmeta.apply(lambda d: d["fm"])
docidx["docmap"] = docmeta.apply(lambda d: d["docmap"])

At this point, docidx includes filesystem metadata, all of the document’s frontmatter (if any), and a document map.

Process the indexes

Next, calculate several pieces of derived metadata:

published: required; the initial publication date of the document. If it’s not declared in the document metadata, use the filesystem creation time.
updated: optional; the last date when the document was significantly revised; only assigned if it’s present in the document’s front matter.
shorttitle: required; always the file name stem. Used for breadcrumb display.
title: required. Use the value declared in the front matter if present, otherwise it’s the same as shorttitle.

(NB The Atom specification works the other way around with respect to timestamps: updated is required, published is optional.)

Note that, in an earlier iteration of this notebook, log entries and notes were more clearly distinguished. Now, they are (should be) exactly the same, just different places to put things. The dates for log entries come from the front matter and filesystem, not from the path to the entry.

There may be other fields present in the document front matter that will be rendered in the final output based on the template, for example subtitle.

pd.set_option('future.no_silent_downcasting', True)

# Convert, or assume, all timestamps to Pacific time

fmdates = docidx["frontmatter"].apply(
    lambda d: Timestamp(d["published"], tz="US/Pacific") if "published" in d else None
)

# This is a very annoying feature of Pandas. In `Timestamp.fromtimestamp(x)`, x is always an
# absolute POSIX timestamp. Calling the function like that returns a timezone-*naive* Timestamp, but
# where `x` has been converted to display in the running system's *local* time. Calling
# `Timestamp.fromtimestamp(x, tz=TZ)` returns a timezone-*aware* Timestamp, with x converted to that
# timezone.
fsdates = docidx["st_birthtime"].apply(
    lambda x: Timestamp.fromtimestamp(x, tz="US/Pacific")
)

tmp = fmdates.combine_first(fsdates)
tmp = pd.to_datetime(tmp)

docidx["published"] = tmp

# A similar calculation for the updated dates. Updated *must* come from the document's frontmatter,
# never the file system. In most cases, it will not be present, and it defaults to the
# already-calculated published date.
fmdates_updated = docidx["frontmatter"].apply(
    lambda d: Timestamp(d["updated"], tz="US/Pacific") if "updated" in d else None
)

tmp = fmdates_updated.combine_first(docidx["published"])

docidx["updated"] = tmp

shorttitles = docidx["path"].apply(lambda p: p.stem)
fmtitles = docidx["frontmatter"].apply(lambda d: d.get("title"))
titles = fmtitles.combine_first(shorttitles)
docidx["title"] = titles
docidx["shorttitle"] = shorttitles

Generate a “site path” for every asset and document. The site path is the absolute path to the resource, as accessed via HTTP. The actual output file will be at site path + “index.html”.

For assets, the site path and the output file path are the same.

For documents, the normal case is:

Document’s relative path: Egg Recipe/With Spam.md
Maps to a site path: /egg-recipe/with-spam
and output file at egg-recipe/with-spam/index.html

Note that the output path should be relative to our working directory (this is just where the site generator will write out the file), but the site path should be absolute. If the site paths were relative, then they’d only be valid as link targets from the site’s root.

There are two special cases for documents:

Egg Recipe/index.* -> egg-recipe (/index.html)
Egg Recipe/Egg Recipe.* -> egg-recipe (/index.html)

This accommodates the Directory-based notes should repeat the directory name convention, and the older convention.

Relative path components are processed with slugify to get clean URL slug.

Document metadata may override the generated slug, which will replace the final component of the site path.

For regular (non-document) files, all path components except the filename are slugified. I think this will handle the common case of support files that are stored as siblings of the document.

def relpath2sitepath(p: Path, is_document=True):
    if is_document:
        p = p.with_suffix("")
        parts = p.parts
        if (parts[-1] == "index") or (len(parts) > 1 and parts[-2] == parts[-1]):
            parts = parts[:-1]
        parts = tuple(slugify(pt) for pt in parts)
    else:
        parts = p.parts
        parts = tuple(slugify(pt) for pt in parts[:-1]) + (parts[-1],)
    sitepath = Path("/").joinpath(*parts)
    return sitepath

docidx["sitepath"] = docidx["relpath"].apply(relpath2sitepath)
docidx["outpath"] = docidx["sitepath"].apply(lambda p: p.joinpath("index.html").relative_to("/"))

assidx["sitepath"] = assidx["relpath"].apply(lambda p: relpath2sitepath(p, is_document=False))
assidx["outpath"] = assidx["sitepath"].apply(lambda p: p.relative_to("/"))

Wikilinks

I use iA Writer and Obsidian to author Blarg. Both of those programs have great support for very similar syntax for wiki-style links, and the published version of Blarg should support wiki-link resolution that’s similar enough to those programs’ rules. Pandoc already provides wiki-link parsing, but figuring out the link targets and rewriting them is something Blarg needs to handle.

Obsidian and iA both use wiki-link resolution relative to the location of the containing file, for example [[Spam]] might point to different files depending on where the wiki-link appears. As a first pass, I’m going to ignore this — any text in the wiki-link target gets a single constant rewrite.

The wiki-link targets might be generated by Obisidian, which allows # within the target to refer to subsections. For now, remove those.

# fmt: off
wikilink_targets = tz.pipe(
    docidx["docmap"],                   # Start with a list of all document maps;
                                        #   each maps header ID -> metadata
    tz.map(lambda dm: dm.values()),     # Extract just the metadata
    tz.concat,                          # Flatten the list of lists
    tz.map(tz.get("wikilinks")),        # Extract the wikilinks used under every heading
    tz.filter(None),                    # Remove empty sets
    tz.concat,                          # Flatten again
    tz.map(lambda s: s.split("#")[0]),  # Remove Obsidian-style heading references
    set,                                # Deduplicate
    list, Series,
)
# fmt: on

TODO: Should extend this to allow for wikilinks with absolute paths, for example:

wikilink_targets[lambda df: df.str.startswith("log/")]

TODO: And handle the case of references to static assets:

wikilink_targets[lambda df: df.str.contains("pdf")]

Create a mapping that goes from all the possible targets of wiki-style links to the corresponding sitepath in the output. The possible wiki-style link targets are the filename stems of all documents.

tmp_mapping = (
    docidx.join(docidx["relpath"].apply(lambda p: p.stem).rename("wikilink_target"))
    .drop_duplicates(subset=["wikilink_target", "sitepath"])
    .set_index("wikilink_target")["sitepath"]
    .sort_index()
)
tmp_mapping

I’m assuming that I’ve uniquely named all files.

assert tmp_mapping.index.is_unique

The blargify.py Pandoc filter needs to access this wikilink mapping, so write it out to a file in some well-known location.

wikilink_map = wikilink_targets.map(tmp_mapping)
wikilink_map.index = wikilink_targets.values
wikilink_map = wikilink_map.dropna().sort_index().apply(str)
with open(CACHE / "wikilink-map.json", "w", encoding="UTF-8") as f:
    json.dump(wikilink_map.to_dict(), f)
wikilink_map

library = bibtexparser.parse_file(REF)
known_citekeys = [e.key for e in library.entries]
for e in library.entries:
    if "ids" in e:
        known_citekeys.append(e.get("ids").value)
known_citekeys = frozenset(known_citekeys)
len(known_citekeys)

Second, create a table that maps every mention of a cite key or URL to the sitepath + fragment where it’s mentioned.

def docmap2mentions(d):
    result = []
    for fragment, data in d.items():
        for citekey in data["cites"]:
            result.append((fragment, citekey, "cite"))
        for link in data["links"]:
            result.append((fragment, link, "link"))
    return result

refmap = tz.pipe(
    docidx.iterrows(),
    tz.map(tz.get(1)),
    tz.map(lambda row: [(row["sitepath"], ) + t for t in docmap2mentions(row["docmap"])]),
    tz.concat,
    list,
    lambda lst: pd.DataFrame(lst, columns=["sitepath", "fragment", "uri", "type"])
)

refmap.sample(5, random_state=42)

Find all the mentioned cite keys that don’t have a manually written entry.

mentioned_citekeys = frozenset(refmap[lambda df: df["type"].eq("cite")]["uri"])
len(mentioned_citekeys)

missing_keys = mentioned_citekeys - known_citekeys
missing_keys

Get bibliographic info for every missing cite key using Wikipedia’s instance of Citoid, or the arXiv API directly. (Citoid does not seem to support arXiv article IDs.)

CITOID = "https://en.wikipedia.org/api/rest_v1/data/citation/bibtex/{query}"
ARXIV = "https://arxiv.org/bibtex/{query}"


def get_bibentry(query: str):
    url = ARXIV if query.lower().startswith("arxiv:") else CITOID
    url = url.format(query=quote(query, safe=""))
    try:
        with urlopen(url) as f:
            data = f.read()
        result = data.decode("UTF-8")
        result = result.strip()
    except Exception:
        result = None
    return result

tmp = {k: get_bibentry(k) for k in tqdm(missing_keys)}
tmp

tmp2 = []
for k, v in tmp.items():
    library = bibtexparser.parse_string(v)
    for e in library.entries:
        e.key = k
        if k.startswith("arxiv:"):
            # arXiv-only publications are just puffed-up blog posts; don't dignify them.
            e.entry_type = "online"
        tmp2.append(e)

newlib = bibtexparser.Library(tmp2)

bibtexparser.write_file(str(REFAUTO), newlib)

First build the chronological list:

tmp = docidx.assign(
    year=lambda df: df["published"].dt.year,
    date=lambda df: df["published"].dt.date,
    link=lambda df: '<a href="' + df["sitepath"].astype(str) + '">' + df["shorttitle"] + "</a>",
)

result = "<dl>\n"

for year in sorted(tmp["year"].unique(), reverse=True):
    tmp2 = tmp[lambda df: df["year"].eq(year)]
    result += f"\t<dt>{year}</dt>\n"
    result += "\t<dd>\n"
    result += "\t\t<dl>\n"
    for date in sorted(tmp2["date"].unique(), reverse=True):
        tmp3 = tmp2[lambda df: df["date"].eq(date)].sort_values("published", ascending=False)
        result += f"\t\t\t<dt>{date}</dt>\n"
        result += "\t\t\t<dd>\n"
        result += "\t\t\t\t<ul>\n"
        for _, row in tmp3.iterrows():
            result += "\t\t\t\t\t<li>" + row["link"] + "</li>\n"
        result += "\t\t\t\t</ul>\n"
        result += "\t\t\t</dd>\n\n"
    result += "\t\t</dl>\n"
    result += "\t</dd>\n\n"

result += "</dl>\n"

chronohtml = result

Next this stupid but fancy calendar view:

data = pd.date_range("2014-01-01", "2025-12-31")
data = pd.DataFrame({"date": data}, index=data).assign(
    year=data.year,
    month=data.month,
    day=data.day,
)
data = data.join(data["date"].dt.isocalendar().rename(columns=lambda s: f"week_{s}"))

data["Week"] = data["week_week"].copy()
data.loc[data["year"] > data["week_year"], "Week"] = 0
data.loc[data["year"] < data["week_year"], "Week"] = 54

data

WEEKDAYS = {1: "M", 2: "T", 3: "W", 4: "R", 5: "F", 6: "S", 7: "U"}

KNOWN_DATES = frozenset(docidx["published"].dt.date)

def format_date(dt):
    if dt.date() in KNOWN_DATES:
        return f'<a href="/log/{dt.year}/{dt.date()}">{dt.day}</a>'
    else:
        return str(dt.day)

disp = (
    data.set_index(["year", "week_day", "Week"])["date"]
    .unstack()
    .sort_index(ascending=[False, True])
)

classes = pd.DataFrame(data="", index=disp.index.copy(), columns=disp.columns.copy())

# Anywhere the month to the left is not the same as the current month, add a class
m = disp.map(lambda dt: dt.month).ffill(axis=1).bfill(axis=1)
mask = m != m.shift(1, axis=1)
mask.loc[:, 0] = False  # Ignore the first column
classes[mask] = classes[mask] + "month-change-left "

# Anywhere the month above is not the same as the current month, except for Mondays, add a class
mask = m != m.shift(1)
mask.loc[(slice(None), 1), :] = False
classes[mask] = classes[mask] + "month-change-above "

sty = disp.style
sty.index.names = ["", ""]
sty.columns.name = ""
sty.format(format_date, na_rep="")
sty.format_index(lambda x: WEEKDAYS[x], axis=0, level=1)
sty.set_td_classes(classes)
sty.set_table_attributes('class="masterlog"')

None

def write_calendar(root: Path, calhtml: str):
    outpath = root / "log" / "index.html"
    outpath.parent.mkdir(exist_ok=True, parents=True)
    # fmt: off
    cmd = [
        "pandoc",
        "--from", "html", "--to", "html5", "--standalone", "--wrap", "none",
        "--data-dir", PANDOCDATA,
        "--mathjax",
        "--metadata", "title=Log",
        "--metadata", "date=" + Timestamp.now().date().isoformat(),
        "--output", str(outpath),
        "-"
    ]
    # fmt: on
    proc = run(cmd, input=calhtml.encode("UTF-8"), check=True)
    return proc

loghtml = chronohtml + sty.to_html()

write_calendar(OUT, loghtml)

Update the site directory

“Incremental updates:” Most of the content managed by Blarg is one-to-one — one source file goes to one site path. In the barest and cheapest of nods to efficiency, Blarg checks for existence of the target output file and compares modification times; if the output exists and is newer than the input, then skip. I think this does actually save real-world time because rendering an entry involves a Pandoc subprocess, which is more time consuming than a stat call.

(NB In an earlier version the check was using the output file’s st_birthtime, but birth time is not updated if a file is overwritten in place, leading to a situation where the check should have skipped a file but did not.)

# Copy the site-wide CSS from the Pandoc templates directory to the site root. It lives in the
# Pandoc templates directory to prevent Pandoc from using its default CSS when generating HTML.
copyfile(Path(PANDOCDATA) / "templates" / "styles.css", OUT / "styles.css")

for _, entry in assidx.iterrows():
    outpath: Path = OUT / entry["outpath"]
    if outpath.exists() and entry["st_mtime"] <= outpath.stat().st_mtime:
        continue
    else:
        outpath.parent.mkdir(exist_ok=True, parents=True)
        copyfile(entry["path"], outpath)

def write_document_under(root: Path, doc: dict):
    outpath = root / doc["outpath"]
    outpath.parent.mkdir(exist_ok=True, parents=True)
    # fmt: off
    cmd = [
        "pandoc",
        "--from", DOCUMENT_MEDIATYPES[doc["mediatype"]],
        "--to", "html5", "--standalone", "--wrap", "none",

        "--data-dir", PANDOCDATA, "--mathjax",

        "--citeproc", "--bibliography", str(REFAUTO), "--bibliography", str(REF),
        "--csl", "chicago-fullnote-bibliography-short-title-subsequent.csl",

        "--filter", "blargify.py",
        "--lua-filter", "diagram.lua",

        "--extract-media=.",

        "--metadata", f"title={doc["title"]}",
        "--metadata", f"date={str(doc["published"].date())}",
        # "--metadata", f"editlink={doc["editlink"]}",

        "--output", str(outpath.name), str(entry["path"])
    ]
    # fmt: on
    with chdir(outpath.parent):
        proc = run(cmd, check=True)
    return proc

for _, entry in tqdm(docidx.iterrows(), total=len(docidx)):
    outpath: Path = OUT / entry["outpath"]
    if outpath.exists() and (entry["st_mtime"] < outpath.stat().st_mtime):
        continue
    else:
        write_document_under(OUT, entry)

FEED_HEADER = f"""\
<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://danielgrady.net</id>
	<title>Daniel Grady’s web log</title>
	<subtitle>∇⋅∇𝒴</subtitle>
	<author>
		<name>Daniel Grady</name>
		<uri>https://danielgrady.net</uri>
	</author>
	<link href="https://danielgrady.net/atom.xml" rel="self"/>
	<link href="https://danielgrady.net" rel="alternate"/>
	<logo>https://danielgrady.net/favicon.ico</logo>
	<updated>{Timestamp.now(tz='US/Pacific').isoformat(timespec='seconds')}</updated>
"""

TODO Add the actual content of the entries to the feed.

ENTRY_TEMPLATE = """
	<entry>
		<id>{uri}</id>
		<title>{title}</title>
		<link rel="alternate" href="{uri}"/>
		<published>{published}</published>
		<updated>{updated}</updated>
	</entry>
"""

feeditems = docidx[lambda df: ~df["sitepath"].eq(Path("."))]
feeditems = feeditems.sort_values("published", ascending=False)

feed = FEED_HEADER

for _, entry in feeditems.iterrows():
    tmp = ENTRY_TEMPLATE.format(
        uri=f"{SITEURL}{entry['sitepath']}",
        title=entry["title"],
        published=entry["published"].isoformat(timespec="seconds"),
        updated=entry["published"].isoformat(timespec="seconds"),
    )
    feed += tmp

feed += "</feed>"

Blarg

Welcome

Globals

Indexing

Index all input files

Create asset and document indexes

Process the indexes

Wikilinks

Generated stuff

Master bibliography and reference map

Master log

Update the site directory

Make an Atom feed