← AI開発 資料アーカイブ
ビルド/生成スクリプト

変換スクリプト: SRSサンプルMarkdownをPDF化(reportlab)

元ファイル: システム要件定義の分析と汎用化方法/gen_srs_pdf.py

要約

SubstackサンプルのSRS Markdown(SRS_sample_substack_v1.md)をreportlabでPDFに変換するスクリプト。見出し階層・テーブル・コードブロック・引用に対応したスタイルを定義し、**bold**や`code`などのインライン記法をHTMLタグへ変換して整形出力する。

要点

PythonreportlabPDF生成SRSMarkdown変換

#!/usr/bin/env python3
"""SRS_sample_substack_v1.md を reportlab で PDF に変換する"""
import re
from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import mm
from reportlab.platypus import (
    SimpleDocTemplate, Paragraph, Spacer, HRFlowable, Table, TableStyle
)
from reportlab.lib.enums import TA_LEFT, TA_CENTER
from reportlab.lib import colors

INPUT  = "/home/ubuntu/SRS_sample_substack_v1.md"
OUTPUT = "/home/ubuntu/SRS_sample_substack_v1.pdf"

# ---- スタイル定義 ----
def make_styles():
    base = getSampleStyleSheet()
    def ps(name, parent_name="Normal", **kw):
        return ParagraphStyle(name, parent=base[parent_name], **kw)

    return {
        "h1":     ps("H1","Heading1", fontSize=18, spaceAfter=8, spaceBefore=4,
                     textColor=colors.HexColor("#0d3b66"), fontName="Helvetica-Bold"),
        "h2":     ps("H2","Heading2", fontSize=14, spaceAfter=6, spaceBefore=14,
                     textColor=colors.HexColor("#1b4f72"), fontName="Helvetica-Bold"),
        "h3":     ps("H3","Heading3", fontSize=11, spaceAfter=4, spaceBefore=10,
                     textColor=colors.HexColor("#2e86c1"), fontName="Helvetica-Bold"),
        "h4":     ps("H4","Normal",   fontSize=10, spaceAfter=3, spaceBefore=8,
                     textColor=colors.HexColor("#1a5276"), fontName="Helvetica-Bold"),
        "body":   ps("Body","Normal", fontSize=9,  spaceAfter=3, leading=14),
        "bullet": ps("Bullet","Normal", fontSize=9, spaceAfter=2, leading=13,
                     leftIndent=14, bulletIndent=4),
        "code":   ps("Code","Code",   fontSize=7.5, spaceAfter=3, spaceBefore=3,
                     leading=11, fontName="Courier",
                     backColor=colors.HexColor("#f4f6f8")),
        "quote":  ps("Quote","Normal", fontSize=9, spaceAfter=3, leading=13,
                     leftIndent=12, textColor=colors.HexColor("#555")),
        "table_h":ps("TH","Normal",   fontSize=8.5, fontName="Helvetica-Bold",
                     textColor=colors.white),
        "table_b":ps("TB","Normal",   fontSize=8.5, leading=12),
    }

def safe(text):
    """HTML特殊文字をエスケープ"""
    return (text.replace("&","&")
                .replace("<","&lt;")
                .replace(">","&gt;"))

def inline_fmt(text):
    """**bold**, `code`, *italic* をインライン変換"""
    text = safe(text)
    text = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', text)
    text = re.sub(r'`(.+?)`', r'<font name="Courier">\1</font>', text)
    text = re.sub(r'\*(.+?)\*',   r'<i>\1</i>', text)
    return text

def parse_table(lines, idx, styles):
    """Markdownテーブルをreportlab Tableに変換"""
    rows = []
    while idx < len(lines) and lines[idx].strip().startswith("|"):
        row = [c.strip() for c in lines[idx].strip().strip("|").split("|")]
        rows.append(row)
        idx += 1
    if not rows:
        return None, idx
    # セパレータ行を除去
    rows = [r for r in rows if not all(re.match(r'^[-:]+$', c) for c in r)]
    if not rows:
        return None, idx

    col_n = max(len(r) for r in rows)
    data = []
    for i, row in enumerate(rows):
        # 列数を揃える
        while len(row) < col_n:
            row.append("")
        if i == 0:
            data.append([Paragraph(f"<b>{safe(c)}</b>", styles["table_h"]) for c in row])
        else:
            data.append([Paragraph(inline_fmt(c), styles["table_b"]) for c in row])

    col_w = (A4[0] - 40*mm) / col_n
    t = Table(data, colWidths=[col_w]*col_n, repeatRows=1)
    t.setStyle(TableStyle([
        ("BACKGROUND",  (0,0), (-1,0),  colors.HexColor("#1b4f72")),
        ("ROWBACKGROUNDS",(0,1),(-1,-1),[colors.white, colors.HexColor("#eaf4fb")]),
        ("GRID",        (0,0), (-1,-1), 0.4, colors.HexColor("#cccccc")),
        ("VALIGN",      (0,0), (-1,-1), "MIDDLE"),
        ("TOPPADDING",  (0,0), (-1,-1), 4),
        ("BOTTOMPADDING",(0,0),(-1,-1), 4),
        ("LEFTPADDING", (0,0), (-1,-1), 5),
    ]))
    return t, idx

def build_story(content, styles):
    story = []
    lines = content.split("\n")
    i = 0
    in_code = False
    code_buf = []

    while i < len(lines):
        line = lines[i]

        # コードブロック
        if line.strip().startswith("```"):
            if in_code:
                code_text = "\n".join(
                    (l[:88]+"…" if len(l)>90 else l) for l in code_buf
                )
                code_safe = safe(code_text)
                try:
                    story.append(Paragraph(
                        f'<font name="Courier" size="7.5">{code_safe}</font>',
                        styles["code"]))
                except Exception:
                    pass
                code_buf = []; in_code = False
            else:
                in_code = True
            i += 1; continue

        if in_code:
            code_buf.append(line); i += 1; continue

        # 空行
        if not line.strip():
            story.append(Spacer(1, 2*mm)); i += 1; continue

        # 水平線
        if re.match(r'^---+\s*$', line.strip()):
            story.append(HRFlowable(width="100%", thickness=0.5,
                                    color=colors.HexColor("#cccccc"),
                                    spaceAfter=3, spaceBefore=3))
            i += 1; continue

        # テーブル
        if line.strip().startswith("|"):
            tbl, i = parse_table(lines, i, styles)
            if tbl:
                story.append(Spacer(1, 2*mm))
                story.append(tbl)
                story.append(Spacer(1, 3*mm))
            continue

        # 見出し
        m = re.match(r'^(#{1,4})\s+(.*)', line)
        if m:
            lvl = len(m.group(1))
            txt = inline_fmt(m.group(2))
            st  = ["h1","h2","h3","h4"][min(lvl-1,3)]
            try: story.append(Paragraph(txt, styles[st]))
            except Exception: pass
            i += 1; continue

        # 箇条書き
        m = re.match(r'^(\s*)[-*]\s+(.*)', line)
        if m:
            indent = len(m.group(1))
            txt = inline_fmt(m.group(2))
            bs = ParagraphStyle("BL", parent=styles["bullet"],
                                leftIndent=14+indent*4)
            try: story.append(Paragraph(f"• {txt}", bs))
            except Exception: pass
            i += 1; continue

        # 番号付きリスト
        m = re.match(r'^\s*\d+\.\s+(.*)', line)
        if m:
            txt = inline_fmt(m.group(1))
            try: story.append(Paragraph(txt, styles["bullet"]))
            except Exception: pass
            i += 1; continue

        # 引用
        m = re.match(r'^>\s*(.*)', line.strip())
        if m:
            txt = inline_fmt(m.group(1))
            try: story.append(Paragraph(f"<i>{txt}</i>", styles["quote"]))
            except Exception: pass
            i += 1; continue

        # 通常テキスト
        txt = inline_fmt(line.strip())
        if txt:
            try: story.append(Paragraph(txt, styles["body"]))
            except Exception: pass
        i += 1

    return story

def main():
    print(f"Reading {INPUT}")
    with open(INPUT, encoding="utf-8") as f:
        content = f.read()

    styles = make_styles()
    story  = build_story(content, styles)

    doc = SimpleDocTemplate(
        OUTPUT, pagesize=A4,
        rightMargin=20*mm, leftMargin=20*mm,
        topMargin=20*mm,   bottomMargin=20*mm,
        title="SRS-2026-001 Substack記事自動配信システム",
        author="山田 太郎",
    )
    doc.build(story)
    print(f"PDF generated: {OUTPUT}")

if __name__ == "__main__":
    main()

↑ トップへ戻る