#!/usr/bin/env python3
import requests
import datetime
import html
import xml.etree.ElementTree as ET
from pathlib import Path
FEED_URL = "https://www.theatlantic.com/feed/author/charlie-warzel/"
CATALOG_PATH = Path("catalog.html")
MAX_ITEMS = 50 # tweak if you want more or fewer
def fetch_feed(url: str) -> str:
resp = requests.get(url, timeout=15)
resp.raise_for_status()
return resp.text
def parse_items(feed_xml: str):
"""
Return a list of dicts: {title, link, date}
Date is YYYY-MM-DD (or "" if missing).
"""
root = ET.fromstring(feed_xml)
# RSS feeds usually: - …
channel = root.find("channel")
if channel is None:
return []
items = []
for item in channel.findall("item"):
title_el = item.find("title")
link_el = item.find("link")
pub_el = item.find("pubDate")
title = title_el.text.strip() if title_el is not None and title_el.text else ""
link = link_el.text.strip() if link_el is not None and link_el.text else ""
pub_raw = pub_el.text.strip() if pub_el is not None and pub_el.text else ""
# Try to parse pubDate -> YYYY-MM-DD
date_str = ""
if pub_raw:
try:
# Example format: "Wed, 20 Nov 2024 10:00:00 -0400"
dt = datetime.datetime.strptime(pub_raw, "%a, %d %b %Y %H:%M:%S %z")
date_str = dt.strftime("%Y-%m-%d")
except Exception:
# If parsing fails, just leave date_str empty
date_str = ""
if title and link:
items.append(
{
"title": title,
"link": link,
"date": date_str,
}
)
return items
def build_list_html(items):
"""
Build the HTML for the list of posts.
We'll produce something like:
From The Atlantic
"""
parts = []
parts.append(' From The Atlantic
')
parts.append(' ')
for item in items[:MAX_ITEMS]:
title = html.escape(item["title"])
link = html.escape(item["link"])
date = item["date"] or ""
if date:
date_html = f'— {date}'
else:
date_html = ""
li = f' - \n {title} {date_html}\n
'
parts.append(li)
parts.append("
")
return "\n".join(parts)
def update_catalog():
if not CATALOG_PATH.exists():
raise SystemExit(f"catalog.html not found at {CATALOG_PATH.resolve()}")
print("Fetching feed…")
xml_text = fetch_feed(FEED_URL)
items = parse_items(xml_text)
if not items:
raise SystemExit("No items found in feed; not touching catalog.html.")
list_html = build_list_html(items)
print("Reading catalog.html…")
original = CATALOG_PATH.read_text(encoding="utf-8")
# We’ll look for the block starting with and ,
# and replace it entirely.
marker_start = "From The Atlantic
"
marker_ul = ''
if marker_start in original:
# Replace from marker_start to end of the
that follows
before, _, rest = original.partition(marker_start)
_, ul_start, rest2 = rest.partition(marker_ul)
# rest2 now starts just after the opening
# we need to find the closing
that matches.
ul_close = "
"
ul_close_idx = rest2.find(ul_close)
if ul_close_idx == -1:
raise SystemExit("Could not find closing after From The Atlantic block.")
after = rest2[ul_close_idx + len(ul_close):]
new_content = before + list_html + after
else:
# If marker not found, try to find the first Catalog
and insert after it.
marker_catalog = "Catalog
"
if marker_catalog not in original:
raise SystemExit("Could not find Catalog
in catalog.html")
before, _, after = original.partition(marker_catalog)
# keep the catalog heading and meta text as-is, and inject list_html after that block's metadata
# e.g., we expect something like:
# Catalog
#
…
# so we place our list_html after that.
meta_marker = ""
if meta_marker in after:
meta_before, meta_close, meta_after = after.partition(meta_marker)
# include the closing then insert our list_html
new_after = meta_before + meta_close + "\n\n" + list_html + meta_after
new_content = before + marker_catalog + new_after
else:
# Fallback: just stick the list_html right after Catalog
new_content = before + marker_catalog + "\n" + list_html + after
print("Writing updated catalog.html…")
CATALOG_PATH.write_text(new_content, encoding="utf-8")
print("Done. Now commit and push the changes to GitHub.")
if __name__ == "__main__":
update_catalog()