Skip to content
个人作品推荐
栾媛爱动物
播放动物叫声趣味微信小程序
栾媛爱动物微信小程序
微信扫码体验

文字拼音

下面是 \_gen_zodiac_docx.py 的完整内容:

python
# -*- coding: utf-8 -*-
"""Generate Word doc: story with per-character pinyin alignment."""
from __future__ import annotations

from pathlib import Path

from docx import Document
from docx.enum.table import WD_TABLE_ALIGNMENT
from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_LINE_SPACING
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
from docx.shared import Cm, Pt, RGBColor, Twips
from pypinyin import Style, pinyin

# —— 语文课本式注音课文(更紧凑)——
PY_FONT_PT = 7
HAN_FONT_PT = 11
PY_COLOR = RGBColor(0x5C, 0x5C, 0x5C)  # 略浅于正文,常见注音色
HAN_FONT_EAST_ASIA = "KaiTi"  # 楷体;无则 Word 会回退
HAN_FONT_LATIN = "Times New Roman"
PY_FONT_LATIN = "Times New Roman"
# 段首空两格(与字号大致匹配);分段时须从首行可用宽度中扣除,避免 Word 为塞回页内而压窄列
TEXTBOOK_BODY_INDENT_TWIPS = 380

# 与 _style_pinyin_cell / _style_han_cell 左右 tcMar 一致,并留少量防压缩余量
_CELL_LR_PAD_TWIPS = int(2 * Pt(0.15).twips) + 8

# 分段时若接近行尾,优先在这些标点后断开(仍保证每格一字 / 一拼音)
_BREAK_PREFER_AFTER = set(
    ",。!?、;:“”‘’()【】《》…—·,.!?:;\"'()[]"
)


def usable_body_twips(doc: Document) -> int:
    """版心可用宽度(twips),用于按列宽累积分段。"""
    sec = doc.sections[0]
    pw = int(sec.page_width.twips)
    lm = int(sec.left_margin.twips)
    rm = int(sec.right_margin.twips)
    # 留出余量,避免表格总宽顶满版心时 Word 再按比例压列
    return max(4800, pw - lm - rm - 160)


def column_width_twips(ch: str, py: str) -> int:
    """单列最小宽度(含左右内边距):与当前字号匹配,紧凑但仍防拼音压扁。"""
    pad = _CELL_LR_PAD_TWIPS
    if "\u4e00" <= ch <= "\u9fff":
        han = 255
    else:
        han = 120
    if py:
        pinyin_w = max(365, 175 + len(py) * 80)
    else:
        pinyin_w = 145
    return pad + max(han, pinyin_w)


def set_table_fixed_layout(table) -> None:
    """固定表格布局,禁止 Word 为塞满页宽而均分/压缩列宽。"""
    table.autofit = False
    tbl = table._tbl
    tblPr = tbl.tblPr
    if tblPr is None:
        return
    tbl_layout = tblPr.find(qn("w:tblLayout"))
    if tbl_layout is None:
        tbl_layout = OxmlElement("w:tblLayout")
        tblPr.append(tbl_layout)
    tbl_layout.set(qn("w:type"), "fixed")


def set_table_preferred_width_dxa(table, total_twips: int) -> None:
    """表格首选宽度 = 各列宽度之和,减少 Word 为对齐页边而均分压缩列宽。"""
    tbl_pr = table._tbl.tblPr
    if tbl_pr is None:
        return
    for el in list(tbl_pr):
        if el.tag == qn("w:tblW"):
            tbl_pr.remove(el)
    tbl_w = OxmlElement("w:tblW")
    tbl_w.set(qn("w:w"), str(max(1, int(total_twips))))
    tbl_w.set(qn("w:type"), "dxa")
    tbl_pr.append(tbl_w)


def set_cell_width_dxa(cell, twips: int) -> None:
    tc_pr = cell._tc.get_or_add_tcPr()
    tc_w = tc_pr.find(qn("w:tcW"))
    if tc_w is None:
        tc_w = OxmlElement("w:tcW")
        tc_pr.append(tc_w)
    tw = max(120, int(twips))
    tc_w.set(qn("w:w"), str(tw))
    tc_w.set(qn("w:type"), "dxa")


def sync_tbl_grid_col_widths(table, col_widths_twips: list[int]) -> None:
    """python-docx 新建表时 tblGrid 按「整段版心÷列数」均分,总宽恒为整页;列少时会把格拉到右边界。

    将每个 w:gridCol/@w:w 改为与本列 tcW 一致,表总宽才等于内容累加宽。"""
    grid_cols = table._tbl.tblGrid.gridCol_lst
    if len(grid_cols) != len(col_widths_twips):
        return
    for gc, tw in zip(grid_cols, col_widths_twips):
        gc.w = Twips(int(tw))


def set_table_no_border(table) -> None:
    """表格级:无边框(含内部线);去掉 tblStyle 以免套用「网格」类内置样式。"""
    tbl = table._tbl
    tblPr = tbl.tblPr
    if tblPr is None:
        tblPr = OxmlElement("w:tblPr")
        tbl.insert(0, tblPr)
    for el in list(tblPr):
        if el.tag in (qn("w:tblBorders"), qn("w:tblStyle")):
            tblPr.remove(el)
    borders = OxmlElement("w:tblBorders")
    for edge in ("top", "left", "bottom", "right", "insideH", "insideV"):
        b = OxmlElement(f"w:{edge}")
        b.set(qn("w:val"), "none")
        b.set(qn("w:sz"), "0")
        b.set(qn("w:space"), "0")
        b.set(qn("w:color"), "auto")
        borders.append(b)
    tblPr.append(borders)


def set_cell_no_border(cell) -> None:
    """单元格级:四边 nil;部分 Word 版本对 none 更干净,双写 nil。"""
    tc_pr = cell._tc.get_or_add_tcPr()
    for old in list(tc_pr):
        if old.tag == qn("w:tcBorders"):
            tc_pr.remove(old)
    tc_b = OxmlElement("w:tcBorders")
    for edge in ("top", "left", "bottom", "right"):
        b = OxmlElement(f"w:{edge}")
        b.set(qn("w:val"), "none")
        b.set(qn("w:sz"), "0")
        b.set(qn("w:space"), "0")
        b.set(qn("w:color"), "auto")
        tc_b.append(b)
    tc_pr.append(tc_b)


def clear_paragraph_border(paragraph) -> None:
    """去掉段落框线(少数模板会给表格内段落加 pBdr)。"""
    p_el = paragraph._element
    p_pr = p_el.pPr
    if p_pr is None:
        return
    for child in list(p_pr):
        if child.tag == qn("w:pBdr"):
            p_pr.remove(child)


def finalize_table_no_borders(table) -> None:
    """样式与宽度都设完后再扫一遍,确保无任何表格/单元格/段落边框残留。"""
    set_table_no_border(table)
    for row in table.rows:
        for cell in row.cells:
            set_cell_no_border(cell)
            for para in cell.paragraphs:
                clear_paragraph_border(para)


def set_cell_margins(
    cell,
    top: int = 0,
    bottom: int = 0,
    left: int = 0,
    right: int = 0,
) -> None:
    """单元格内边距(twips),拉开「上拼音—下汉字」的纵向节奏。"""
    tc_pr = cell._tc.get_or_add_tcPr()
    for old in list(tc_pr):
        if old.tag == qn("w:tcMar"):
            tc_pr.remove(old)
    tc_mar = OxmlElement("w:tcMar")
    for edge, w in (("top", top), ("left", left), ("bottom", bottom), ("right", right)):
        el = OxmlElement(f"w:{edge}")
        el.set(qn("w:w"), str(max(0, int(w))))
        el.set(qn("w:type"), "dxa")
        tc_mar.append(el)
    tc_pr.append(tc_mar)


def set_table_indent(table, twips: int) -> None:
    """整张表左缩进,实现段首空两格。"""
    tbl = table._tbl
    tbl_pr = tbl.tblPr
    if tbl_pr is None:
        return
    for el in list(tbl_pr):
        if el.tag == qn("w:tblInd"):
            tbl_pr.remove(el)
    tbl_ind = OxmlElement("w:tblInd")
    tbl_ind.set(qn("w:w"), str(int(twips)))
    tbl_ind.set(qn("w:type"), "dxa")
    tbl_pr.append(tbl_ind)


def strip_run_horizontal_fit(run) -> None:
    """去掉「适应表格 / 挤压」类属性,避免拼音被横向压扁。"""
    r_pr = run._element.rPr
    if r_pr is None:
        return
    for el in list(r_pr):
        if el.tag == qn("w:fitText"):
            r_pr.remove(el)


def set_cell_no_wrap(cell) -> None:
    """One line per cell — pinyin does not wrap inside the cell."""
    tcPr = cell._tc.get_or_add_tcPr()
    for child in tcPr:
        if child.tag == qn("w:noWrap"):
            return
    tcPr.append(OxmlElement("w:noWrap"))


def char_pinyin_list(s: str) -> tuple[list[str], list[str]]:
    out_ch: list[str] = []
    out_py: list[str] = []
    i = 0
    n = len(s)
    while i < n:
        ch = s[i]
        if "\u4e00" <= ch <= "\u9fff":
            j = i + 1
            while j < n and "\u4e00" <= s[j] <= "\u9fff":
                j += 1
            seg = s[i:j]
            py_list = pinyin(seg, style=Style.TONE, heteronym=False)
            for k, c in enumerate(seg):
                out_ch.append(c)
                out_py.append(py_list[k][0] if py_list[k] else "")
            i = j
        else:
            out_ch.append(ch)
            out_py.append("")
            i += 1
    return out_ch, out_py


def split_into_width_chunks(
    chars: list[str],
    pys: list[str],
    budget_full_twips: int,
    indent_reserve_twips: int,
) -> list[tuple[list[str], list[str]]]:
    """从左往右累加每字列宽;总宽超版心则换行(下一块表)。短音节列窄、长音节列宽,同一行可排更多字。"""
    n = len(chars)
    chunks: list[tuple[list[str], list[str]]] = []
    i = 0
    first_row_of_paragraph = True
    while i < n:
        line_budget = budget_full_twips - (
            indent_reserve_twips if first_row_of_paragraph else 0
        )
        line_budget = max(2200, line_budget)

        acc = 0
        end_exc = i
        while end_exc < n:
            w = column_width_twips(chars[end_exc], pys[end_exc])
            if acc + w > line_budget and end_exc > i:
                break
            acc += w
            end_exc += 1

        if end_exc == i:
            end_exc = i + 1

        if end_exc < n:
            lookback = min(12, end_exc - i)
            cut = end_exc
            for k in range(end_exc - 1, max(i, end_exc - lookback) - 1, -1):
                if chars[k] in _BREAK_PREFER_AFTER:
                    cut = k + 1
                    break
            if cut > i:
                end_exc = cut

        chunk_ch = chars[i:end_exc]
        chunk_py = pys[i:end_exc]
        while len(chunk_ch) > 1:
            row_w = sum(
                column_width_twips(c, p) for c, p in zip(chunk_ch, chunk_py)
            )
            if row_w <= line_budget:
                break
            chunk_ch, chunk_py = chunk_ch[:-1], chunk_py[:-1]
            end_exc -= 1

        if chunk_ch:
            chunks.append((list(chunk_ch), list(chunk_py)))
        i = end_exc
        first_row_of_paragraph = False
    return chunks


def _style_pinyin_cell(cell) -> None:
    t = 0
    b = int(Pt(1).twips)
    lr = int(Pt(0.15).twips)
    set_cell_margins(cell, top=t, bottom=b, left=lr, right=lr)
    for para in cell.paragraphs:
        para.alignment = WD_ALIGN_PARAGRAPH.CENTER
        para.paragraph_format.space_before = Pt(0)
        para.paragraph_format.space_after = Pt(0)
        para.paragraph_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY
        para.paragraph_format.line_spacing = Pt(PY_FONT_PT + 1)
        for run in para.runs:
            run.font.size = Pt(PY_FONT_PT)
            run.font.name = PY_FONT_LATIN
            run.font.color.rgb = PY_COLOR
            run._element.rPr.rFonts.set(qn("w:eastAsia"), PY_FONT_LATIN)
            strip_run_horizontal_fit(run)


def _style_han_cell(cell) -> None:
    t = 0
    b = int(Pt(2.5).twips)
    lr = int(Pt(0.15).twips)
    set_cell_margins(cell, top=t, bottom=b, left=lr, right=lr)
    for para in cell.paragraphs:
        para.alignment = WD_ALIGN_PARAGRAPH.CENTER
        para.paragraph_format.space_before = Pt(0)
        para.paragraph_format.space_after = Pt(0)
        para.paragraph_format.line_spacing_rule = WD_LINE_SPACING.EXACTLY
        para.paragraph_format.line_spacing = Pt(HAN_FONT_PT + 2)
        for run in para.runs:
            run.font.size = Pt(HAN_FONT_PT)
            run.font.bold = False
            run.font.name = HAN_FONT_LATIN
            run.font.color.rgb = RGBColor(0x00, 0x00, 0x00)
            run._element.rPr.rFonts.set(qn("w:eastAsia"), HAN_FONT_EAST_ASIA)
            strip_run_horizontal_fit(run)


def add_two_row_table(
    chars: list[str],
    pys: list[str],
    add_trailing_space: bool,
    doc: Document,
    apply_paragraph_indent: bool,
) -> None:
    table = doc.add_table(rows=2, cols=len(chars))
    try:
        table.style = None
    except (KeyError, ValueError, AttributeError):
        pass
    set_table_no_border(table)
    set_table_fixed_layout(table)
    # 表宽 = 本行各列之和,短行不拉满版心;靠左排,自然截至(不居中撑满)
    table.alignment = WD_TABLE_ALIGNMENT.LEFT
    if apply_paragraph_indent:
        set_table_indent(table, TEXTBOOK_BODY_INDENT_TWIPS)

    col_widths: list[int] = []
    for j, (c, p) in enumerate(zip(chars, pys)):
        c0 = table.rows[0].cells[j]
        c1 = table.rows[1].cells[j]
        set_cell_no_wrap(c0)
        set_cell_no_wrap(c1)
        c0.text = p
        c1.text = c
        col_tw = column_width_twips(c, p)
        col_widths.append(col_tw)
        set_cell_width_dxa(c0, col_tw)
        set_cell_width_dxa(c1, col_tw)
        _style_pinyin_cell(c0)
        _style_han_cell(c1)

    sync_tbl_grid_col_widths(table, col_widths)
    row_total = sum(col_widths)
    set_table_preferred_width_dxa(table, row_total)
    finalize_table_no_borders(table)
    if add_trailing_space:
        tail = doc.add_paragraph()
        tail.paragraph_format.space_after = Pt(2)
        tail.paragraph_format.space_before = Pt(0)


def add_paragraph_table(doc: Document, text: str) -> None:
    text = text.rstrip("\n")
    if not text.strip():
        gap = doc.add_paragraph()
        gap.paragraph_format.space_before = Pt(0)
        gap.paragraph_format.space_after = Pt(1)
        gap.paragraph_format.line_spacing = Pt(1)
        return
    chars, pys = char_pinyin_list(text)
    budget = usable_body_twips(doc)
    chunks = split_into_width_chunks(
        chars,
        pys,
        budget,
        TEXTBOOK_BODY_INDENT_TWIPS,
    )
    for idx, (ch_sub, py_sub) in enumerate(chunks):
        is_last_chunk = idx == len(chunks) - 1
        add_two_row_table(
            ch_sub,
            py_sub,
            is_last_chunk,
            doc,
            apply_paragraph_indent=(idx == 0),
        )


def main() -> None:
    paragraphs = [
        "很久很久以前,人们总记不住自己的生日。",
        "玉皇大帝想了个好办法,举办一场渡河比赛,选出十二只动物来代表年份。",
        "",
        "比赛那天,小老鼠故意骗猫说比赛还没开始,猫就安心睡觉了。",
        "老鼠悄悄爬到牛背上,老牛力气大,很快就游到了对岸。",
        "快到终点时,老鼠“嗖”地一跳,得了第一名。",
        "",
        "接下来,牛、老虎、兔子、龙、蛇、马、羊、猴、鸡、狗、猪,都顺利到达终点。",
        "玉皇大帝笑着说:“你们就是十二生肖啦!”",
        "",
        "等猫睡醒匆匆赶来,名额已经满了。",
        "猫又生气又难过,从此一见到老鼠就追。老鼠也就一直害怕猫啦。",
    ]

    doc = Document()
    for sec in doc.sections:
        sec.left_margin = Cm(1.75)
        sec.right_margin = Cm(1.75)
        sec.top_margin = Cm(1.45)
        sec.bottom_margin = Cm(1.45)

    title_p = doc.add_paragraph()
    title_p.alignment = WD_ALIGN_PARAGRAPH.CENTER
    title_p.paragraph_format.space_after = Pt(4)
    title_p.paragraph_format.space_before = Pt(0)
    tr = title_p.add_run("十二生肖的故事")
    tr.font.size = Pt(16)
    tr.font.bold = True
    tr.font.name = "SimHei"
    tr._element.rPr.rFonts.set(qn("w:eastAsia"), "SimHei")

    sub_p = doc.add_paragraph()
    sub_p.alignment = WD_ALIGN_PARAGRAPH.CENTER
    sub_p.paragraph_format.space_after = Pt(3)
    sr = sub_p.add_run("(注音朗读)")
    sr.font.size = Pt(8)
    sr.font.color.rgb = RGBColor(0x66, 0x66, 0x66)
    sr.font.name = PY_FONT_LATIN
    sr._element.rPr.rFonts.set(qn("w:eastAsia"), HAN_FONT_EAST_ASIA)

    for block in paragraphs:
        if block == "":
            gap = doc.add_paragraph()
            gap.paragraph_format.space_before = Pt(0)
            gap.paragraph_format.space_after = Pt(1)
            gap.paragraph_format.line_spacing = Pt(1)
            continue
        add_paragraph_table(doc, block)

    out = Path(__file__).resolve().parent / "十二生肖故事-汉字拼音对照表.docx"
    try:
        doc.save(out)
    except PermissionError:
        alt = out.with_name(out.stem + "-未占用时覆盖主文件.docx")
        doc.save(alt)
        print("saved (Word 占用主文件):", alt)
    else:
        print("saved:", out)


if __name__ == "__main__":
    main()