325 lines
13 KiB
Python
325 lines
13 KiB
Python
from __future__ import annotations
|
|
|
|
import base64
|
|
import io
|
|
from typing import Optional
|
|
|
|
import structlog
|
|
from docx import Document
|
|
from docx.enum.section import WD_SECTION
|
|
from docx.oxml import OxmlElement
|
|
from docx.oxml.ns import qn
|
|
from docx.shared import Pt
|
|
from lxml import etree
|
|
|
|
from export.models import (
|
|
IDExportDocument,
|
|
IDExportImageFrame,
|
|
IDExportPage,
|
|
IDExportShape,
|
|
IDExportTable,
|
|
IDExportTextFrame,
|
|
)
|
|
from word.factories import FooterFactory, StyleFactory, TableFactory
|
|
|
|
log = structlog.get_logger()
|
|
|
|
# DrawingML / OOXML namespaces
|
|
_WP = 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing'
|
|
_A = 'http://schemas.openxmlformats.org/drawingml/2006/main'
|
|
_PIC = 'http://schemas.openxmlformats.org/drawingml/2006/picture'
|
|
_R = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships'
|
|
_W = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'
|
|
_WPS = 'http://schemas.microsoft.com/office/word/2010/wordprocessingShape'
|
|
|
|
|
|
def _pt_to_emu(pt: float) -> int:
|
|
"""Convert points to English Metric Units (EMU)."""
|
|
return int(pt * 914400 / 72)
|
|
|
|
|
|
class DocxBuilder:
|
|
"""Builds a DOCX from a parsed IDExportDocument using absolute page anchoring.
|
|
|
|
Every element is placed as an absolutely-positioned wp:anchor at its exact
|
|
InDesign coordinates (relativeFrom="page"). Shapes are placed behind text
|
|
(behindDoc=1); text boxes and images are placed in front (behindDoc=0).
|
|
Tables are added inline in Y-position order.
|
|
"""
|
|
|
|
def build(self, document: IDExportDocument) -> bytes:
|
|
doc = Document()
|
|
self._shape_counter = 0
|
|
|
|
# Register all paragraph styles once before any content is added
|
|
StyleFactory.register_all(doc, document)
|
|
|
|
for page_idx, page in enumerate(document.pages):
|
|
if page_idx == 0:
|
|
section = doc.sections[0]
|
|
else:
|
|
section = doc.add_section(WD_SECTION.NEW_PAGE)
|
|
|
|
w = page.width_pt or document.page_width_pt
|
|
h = page.height_pt or document.page_height_pt
|
|
section.page_width = Pt(w)
|
|
section.page_height = Pt(h)
|
|
section.left_margin = Pt(56.69)
|
|
section.right_margin = Pt(56.69)
|
|
section.top_margin = Pt(56.69)
|
|
section.bottom_margin = Pt(56.69)
|
|
|
|
# 1. Shapes first — placed behind (behindDoc=1)
|
|
for item in page.items:
|
|
if isinstance(item, IDExportShape):
|
|
self._add_shape(doc, item)
|
|
|
|
# 2. Text frames and images — placed in front (behindDoc=0)
|
|
for item in page.items:
|
|
if isinstance(item, IDExportTextFrame):
|
|
self._add_text_box(doc, item)
|
|
elif isinstance(item, IDExportImageFrame):
|
|
self._add_image_anchor(doc, item)
|
|
|
|
# 3. Tables — inline, sorted by Y position
|
|
tables = sorted(
|
|
[i for i in page.items if isinstance(i, IDExportTable)],
|
|
key=lambda t: t.y_pt,
|
|
)
|
|
for table in tables:
|
|
TableFactory.add_to_doc(doc, table)
|
|
|
|
if document.page_number_style:
|
|
FooterFactory.apply(doc, document.page_number_style)
|
|
|
|
buf = io.BytesIO()
|
|
doc.save(buf)
|
|
return buf.getvalue()
|
|
|
|
# ── Anchor helpers ─────────────────────────────────────────────────────────
|
|
|
|
def _next_id(self) -> int:
|
|
self._shape_counter += 1
|
|
return self._shape_counter
|
|
|
|
def _anchor_para(self, doc: Document) -> etree._Element:
|
|
"""Add a zero-height paragraph to host an anchor; return the raw p element."""
|
|
para = doc.add_paragraph()
|
|
pPr = OxmlElement('w:pPr')
|
|
spacing = OxmlElement('w:spacing')
|
|
spacing.set(qn('w:before'), '0')
|
|
spacing.set(qn('w:after'), '0')
|
|
spacing.set(qn('w:line'), '240')
|
|
pPr.append(spacing)
|
|
para._p.insert(0, pPr)
|
|
return para._p
|
|
|
|
def _make_anchor(self, sid: int, x_pt: float, y_pt: float,
|
|
w_pt: float, h_pt: float, behind: bool = False) -> etree._Element:
|
|
"""Build a wp:anchor element (without graphic content) at absolute page coords."""
|
|
x = _pt_to_emu(x_pt)
|
|
y = _pt_to_emu(y_pt)
|
|
w = _pt_to_emu(w_pt)
|
|
h = _pt_to_emu(h_pt)
|
|
z = '0' if behind else '251658240'
|
|
bd = '1' if behind else '0'
|
|
|
|
return etree.fromstring(
|
|
f'<wp:anchor xmlns:wp="{_WP}" '
|
|
f'distT="0" distB="0" distL="0" distR="0" '
|
|
f'simplePos="0" relativeHeight="{z}" behindDoc="{bd}" '
|
|
f'locked="0" layoutInCell="1" allowOverlap="1">'
|
|
f'<wp:simplePos x="0" y="0"/>'
|
|
f'<wp:positionH relativeFrom="page"><wp:posOffset>{x}</wp:posOffset></wp:positionH>'
|
|
f'<wp:positionV relativeFrom="page"><wp:posOffset>{y}</wp:posOffset></wp:positionV>'
|
|
f'<wp:extent cx="{w}" cy="{h}"/>'
|
|
f'<wp:effectExtent l="0" t="0" r="0" b="0"/>'
|
|
f'<wp:wrapNone/>'
|
|
f'<wp:docPr id="{sid}" name="El{sid}"/>'
|
|
f'<wp:cNvGraphicFramePr/>'
|
|
f'</wp:anchor>'
|
|
)
|
|
|
|
def _attach_anchor(self, p_el: etree._Element, anchor: etree._Element) -> None:
|
|
"""Wrap an anchor in w:r > w:drawing and append it to a paragraph element."""
|
|
r_el = etree.SubElement(p_el, f'{{{_W}}}r')
|
|
drawing = etree.SubElement(r_el, f'{{{_W}}}drawing')
|
|
drawing.append(anchor)
|
|
|
|
# ── Shape placement ────────────────────────────────────────────────────────
|
|
|
|
def _add_shape(self, doc: Document, item: IDExportShape) -> None:
|
|
"""Place a DrawingML shape (rect/ellipse/line) behind page content."""
|
|
w_pt = max(item.width_pt, 0.5)
|
|
h_pt = max(item.height_pt, 0.5)
|
|
sid = self._next_id()
|
|
w_emu = _pt_to_emu(w_pt)
|
|
h_emu = _pt_to_emu(h_pt)
|
|
|
|
prst = {'rect': 'rect', 'ellipse': 'ellipse', 'line': 'line'}.get(
|
|
item.shape_type, 'rect'
|
|
)
|
|
|
|
if item.fill_hex:
|
|
fill_xml = f'<a:solidFill><a:srgbClr val="{item.fill_hex.upper()}"/></a:solidFill>'
|
|
else:
|
|
fill_xml = '<a:noFill/>'
|
|
|
|
if item.stroke_hex and item.stroke_weight_pt:
|
|
sw = max(_pt_to_emu(item.stroke_weight_pt), 12700) # min 1pt EMU
|
|
stroke_xml = (
|
|
f'<a:ln w="{sw}">'
|
|
f'<a:solidFill><a:srgbClr val="{item.stroke_hex.upper()}"/></a:solidFill>'
|
|
f'</a:ln>'
|
|
)
|
|
else:
|
|
stroke_xml = '<a:ln><a:noFill/></a:ln>'
|
|
|
|
graphic_xml = (
|
|
f'<a:graphic xmlns:a="{_A}">'
|
|
f'<a:graphicData uri="{_WPS}">'
|
|
f'<wps:wsp xmlns:wps="{_WPS}">'
|
|
f'<wps:cNvSpPr><a:spLocks noChangeArrowheads="1"/></wps:cNvSpPr>'
|
|
f'<wps:spPr>'
|
|
f'<a:xfrm><a:off x="0" y="0"/><a:ext cx="{w_emu}" cy="{h_emu}"/></a:xfrm>'
|
|
f'<a:prstGeom prst="{prst}"><a:avLst/></a:prstGeom>'
|
|
f'{fill_xml}{stroke_xml}'
|
|
f'</wps:spPr>'
|
|
f'<wps:bodyPr/>'
|
|
f'</wps:wsp>'
|
|
f'</a:graphicData>'
|
|
f'</a:graphic>'
|
|
)
|
|
|
|
anc = self._make_anchor(sid, item.x_pt, item.y_pt, w_pt, h_pt, behind=True)
|
|
anc.append(etree.fromstring(graphic_xml))
|
|
p_el = self._anchor_para(doc)
|
|
self._attach_anchor(p_el, anc)
|
|
|
|
# ── Text box placement ─────────────────────────────────────────────────────
|
|
|
|
def _add_text_box(self, doc: Document, frame: IDExportTextFrame) -> None:
|
|
"""Place a text frame as an anchored WPS text box at exact page coordinates."""
|
|
if not frame.paragraphs:
|
|
return
|
|
|
|
w_pt = max(frame.width_pt, 1.0)
|
|
h_pt = max(frame.height_pt, 1.0)
|
|
sid = self._next_id()
|
|
w_emu = _pt_to_emu(w_pt)
|
|
h_emu = _pt_to_emu(h_pt)
|
|
|
|
# Build paragraph elements via python-docx API, then transplant into txbxContent
|
|
para_elements = self._build_txbx_paragraphs(frame, doc)
|
|
paras_xml = ''.join(
|
|
etree.tostring(p, encoding='unicode') for p in para_elements
|
|
)
|
|
|
|
graphic_xml = (
|
|
f'<a:graphic xmlns:a="{_A}">'
|
|
f'<a:graphicData uri="{_WPS}">'
|
|
f'<wps:wsp xmlns:wps="{_WPS}">'
|
|
f'<wps:cNvSpPr txBx="1"><a:spLocks noChangeArrowheads="1"/></wps:cNvSpPr>'
|
|
f'<wps:spPr>'
|
|
f'<a:xfrm><a:off x="0" y="0"/><a:ext cx="{w_emu}" cy="{h_emu}"/></a:xfrm>'
|
|
f'<a:prstGeom prst="rect"><a:avLst/></a:prstGeom>'
|
|
f'<a:noFill/><a:ln><a:noFill/></a:ln>'
|
|
f'</wps:spPr>'
|
|
f'<wps:txbx>'
|
|
f'<w:txbxContent xmlns:w="{_W}">{paras_xml}</w:txbxContent>'
|
|
f'</wps:txbx>'
|
|
f'<wps:bodyPr rot="0" vert="horz" wrap="square" '
|
|
f'lIns="0" rIns="0" tIns="0" bIns="0" anchor="t" anchorCtr="0"/>'
|
|
f'</wps:wsp>'
|
|
f'</a:graphicData>'
|
|
f'</a:graphic>'
|
|
)
|
|
|
|
anc = self._make_anchor(sid, frame.x_pt, frame.y_pt, w_pt, h_pt, behind=False)
|
|
anc.append(etree.fromstring(graphic_xml))
|
|
p_el = self._anchor_para(doc)
|
|
self._attach_anchor(p_el, anc)
|
|
|
|
def _build_txbx_paragraphs(self, frame: IDExportTextFrame,
|
|
doc: Document) -> list:
|
|
"""Build w:p XML elements for a text frame using python-docx's style API.
|
|
|
|
Paragraphs are created normally via python-docx (so styles are applied
|
|
correctly), then their XML elements are extracted and moved into the
|
|
wp:txbxContent node.
|
|
"""
|
|
result = []
|
|
for para_data in frame.paragraphs:
|
|
if not para_data.text.strip() and not para_data.runs:
|
|
result.append(etree.Element(f'{{{_W}}}p'))
|
|
continue
|
|
|
|
temp = doc.add_paragraph()
|
|
StyleFactory.apply_paragraph_style(temp, para_data.style, doc)
|
|
|
|
if para_data.runs:
|
|
for run_data in para_data.runs:
|
|
if not run_data.text:
|
|
continue
|
|
run = temp.add_run(run_data.text)
|
|
StyleFactory.apply_run_formatting(run, run_data)
|
|
else:
|
|
temp.add_run(para_data.text)
|
|
|
|
# Extract element from doc body and hand it to the caller
|
|
p_el = temp._p
|
|
p_el.getparent().remove(p_el)
|
|
result.append(p_el)
|
|
|
|
return result
|
|
|
|
# ── Image placement ────────────────────────────────────────────────────────
|
|
|
|
def _add_image_anchor(self, doc: Document, frame: IDExportImageFrame) -> None:
|
|
"""Place an image as an anchored DrawingML picture at exact page coordinates."""
|
|
if not frame.image_data_b64:
|
|
return
|
|
try:
|
|
img_bytes = base64.b64decode(frame.image_data_b64)
|
|
except Exception:
|
|
log.warning('image_decode_failed', frame_id=frame.id)
|
|
return
|
|
|
|
w_pt = max(frame.width_pt, 1.0)
|
|
h_pt = max(frame.height_pt, 1.0)
|
|
sid = self._next_id()
|
|
w_emu = _pt_to_emu(w_pt)
|
|
h_emu = _pt_to_emu(h_pt)
|
|
|
|
try:
|
|
r_id, _image = doc.part.get_or_add_image(io.BytesIO(img_bytes))
|
|
except Exception as e:
|
|
log.warning('image_register_failed', frame_id=frame.id, error=str(e))
|
|
return
|
|
|
|
graphic_xml = (
|
|
f'<a:graphic xmlns:a="{_A}">'
|
|
f'<a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture">'
|
|
f'<pic:pic xmlns:pic="{_PIC}">'
|
|
f'<pic:nvPicPr>'
|
|
f'<pic:cNvPr id="{sid}" name="Img{sid}"/>'
|
|
f'<pic:cNvPicPr/>'
|
|
f'</pic:nvPicPr>'
|
|
f'<pic:blipFill>'
|
|
f'<a:blip r:embed="{r_id}" xmlns:r="{_R}"/>'
|
|
f'<a:stretch><a:fillRect/></a:stretch>'
|
|
f'</pic:blipFill>'
|
|
f'<pic:spPr>'
|
|
f'<a:xfrm><a:off x="0" y="0"/><a:ext cx="{w_emu}" cy="{h_emu}"/></a:xfrm>'
|
|
f'<a:prstGeom prst="rect"><a:avLst/></a:prstGeom>'
|
|
f'</pic:spPr>'
|
|
f'</pic:pic>'
|
|
f'</a:graphicData>'
|
|
f'</a:graphic>'
|
|
)
|
|
|
|
anc = self._make_anchor(sid, frame.x_pt, frame.y_pt, w_pt, h_pt, behind=False)
|
|
anc.append(etree.fromstring(graphic_xml))
|
|
p_el = self._anchor_para(doc)
|
|
self._attach_anchor(p_el, anc)
|