from __future__ import annotations import base64 import io from typing import Optional import structlog from docx import Document from docx.enum.section import WD_SECTION from docx.oxml import OxmlElement from docx.oxml.ns import qn from docx.shared import Pt from lxml import etree from export.models import ( IDExportDocument, IDExportImageFrame, IDExportPage, IDExportShape, IDExportTable, IDExportTextFrame, ) from word.factories import FooterFactory, StyleFactory, TableFactory log = structlog.get_logger() # DrawingML / OOXML namespaces _WP = 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing' _A = 'http://schemas.openxmlformats.org/drawingml/2006/main' _PIC = 'http://schemas.openxmlformats.org/drawingml/2006/picture' _R = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships' _W = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main' _WPS = 'http://schemas.microsoft.com/office/word/2010/wordprocessingShape' def _pt_to_emu(pt: float) -> int: """Convert points to English Metric Units (EMU).""" return int(pt * 914400 / 72) class DocxBuilder: """Builds a DOCX from a parsed IDExportDocument using absolute page anchoring. Every element is placed as an absolutely-positioned wp:anchor at its exact InDesign coordinates (relativeFrom="page"). Shapes are placed behind text (behindDoc=1); text boxes and images are placed in front (behindDoc=0). Tables are added inline in Y-position order. """ def build(self, document: IDExportDocument) -> bytes: doc = Document() self._shape_counter = 0 # Register all paragraph styles once before any content is added StyleFactory.register_all(doc, document) for page_idx, page in enumerate(document.pages): if page_idx == 0: section = doc.sections[0] else: section = doc.add_section(WD_SECTION.NEW_PAGE) w = page.width_pt or document.page_width_pt h = page.height_pt or document.page_height_pt section.page_width = Pt(w) section.page_height = Pt(h) section.left_margin = Pt(56.69) section.right_margin = Pt(56.69) section.top_margin = Pt(56.69) section.bottom_margin = Pt(56.69) # 1. Shapes first — placed behind (behindDoc=1) for item in page.items: if isinstance(item, IDExportShape): self._add_shape(doc, item) # 2. Text frames and images — placed in front (behindDoc=0) for item in page.items: if isinstance(item, IDExportTextFrame): self._add_text_box(doc, item) elif isinstance(item, IDExportImageFrame): self._add_image_anchor(doc, item) # 3. Tables — inline, sorted by Y position tables = sorted( [i for i in page.items if isinstance(i, IDExportTable)], key=lambda t: t.y_pt, ) for table in tables: TableFactory.add_to_doc(doc, table) if document.page_number_style: FooterFactory.apply(doc, document.page_number_style) buf = io.BytesIO() doc.save(buf) return buf.getvalue() # ── Anchor helpers ───────────────────────────────────────────────────────── def _next_id(self) -> int: self._shape_counter += 1 return self._shape_counter def _anchor_para(self, doc: Document) -> etree._Element: """Add a zero-height paragraph to host an anchor; return the raw p element.""" para = doc.add_paragraph() pPr = OxmlElement('w:pPr') spacing = OxmlElement('w:spacing') spacing.set(qn('w:before'), '0') spacing.set(qn('w:after'), '0') spacing.set(qn('w:line'), '240') pPr.append(spacing) para._p.insert(0, pPr) return para._p def _make_anchor(self, sid: int, x_pt: float, y_pt: float, w_pt: float, h_pt: float, behind: bool = False) -> etree._Element: """Build a wp:anchor element (without graphic content) at absolute page coords.""" x = _pt_to_emu(x_pt) y = _pt_to_emu(y_pt) w = _pt_to_emu(w_pt) h = _pt_to_emu(h_pt) z = '0' if behind else '251658240' bd = '1' if behind else '0' return etree.fromstring( f'' f'' f'{x}' f'{y}' f'' f'' f'' f'' f'' f'' ) def _attach_anchor(self, p_el: etree._Element, anchor: etree._Element) -> None: """Wrap an anchor in w:r > w:drawing and append it to a paragraph element.""" r_el = etree.SubElement(p_el, f'{{{_W}}}r') drawing = etree.SubElement(r_el, f'{{{_W}}}drawing') drawing.append(anchor) # ── Shape placement ──────────────────────────────────────────────────────── def _add_shape(self, doc: Document, item: IDExportShape) -> None: """Place a DrawingML shape (rect/ellipse/line) behind page content.""" w_pt = max(item.width_pt, 0.5) h_pt = max(item.height_pt, 0.5) sid = self._next_id() w_emu = _pt_to_emu(w_pt) h_emu = _pt_to_emu(h_pt) prst = {'rect': 'rect', 'ellipse': 'ellipse', 'line': 'line'}.get( item.shape_type, 'rect' ) if item.fill_hex: fill_xml = f'' else: fill_xml = '' if item.stroke_hex and item.stroke_weight_pt: sw = max(_pt_to_emu(item.stroke_weight_pt), 12700) # min 1pt EMU stroke_xml = ( f'' f'' f'' ) else: stroke_xml = '' graphic_xml = ( f'' f'' f'' f'' f'' f'' f'' f'{fill_xml}{stroke_xml}' f'' f'' f'' f'' f'' ) anc = self._make_anchor(sid, item.x_pt, item.y_pt, w_pt, h_pt, behind=True) anc.append(etree.fromstring(graphic_xml)) p_el = self._anchor_para(doc) self._attach_anchor(p_el, anc) # ── Text box placement ───────────────────────────────────────────────────── def _add_text_box(self, doc: Document, frame: IDExportTextFrame) -> None: """Place a text frame as an anchored WPS text box at exact page coordinates.""" if not frame.paragraphs: return w_pt = max(frame.width_pt, 1.0) h_pt = max(frame.height_pt, 1.0) sid = self._next_id() w_emu = _pt_to_emu(w_pt) h_emu = _pt_to_emu(h_pt) # Build paragraph elements via python-docx API, then transplant into txbxContent para_elements = self._build_txbx_paragraphs(frame, doc) paras_xml = ''.join( etree.tostring(p, encoding='unicode') for p in para_elements ) graphic_xml = ( f'' f'' f'' f'' f'' f'' f'' f'' f'' f'' f'{paras_xml}' f'' f'' f'' f'' f'' ) anc = self._make_anchor(sid, frame.x_pt, frame.y_pt, w_pt, h_pt, behind=False) anc.append(etree.fromstring(graphic_xml)) p_el = self._anchor_para(doc) self._attach_anchor(p_el, anc) def _build_txbx_paragraphs(self, frame: IDExportTextFrame, doc: Document) -> list: """Build w:p XML elements for a text frame using python-docx's style API. Paragraphs are created normally via python-docx (so styles are applied correctly), then their XML elements are extracted and moved into the wp:txbxContent node. """ result = [] for para_data in frame.paragraphs: if not para_data.text.strip() and not para_data.runs: result.append(etree.Element(f'{{{_W}}}p')) continue temp = doc.add_paragraph() StyleFactory.apply_paragraph_style(temp, para_data.style, doc) if para_data.runs: for run_data in para_data.runs: if not run_data.text: continue run = temp.add_run(run_data.text) StyleFactory.apply_run_formatting(run, run_data) else: temp.add_run(para_data.text) # Extract element from doc body and hand it to the caller p_el = temp._p p_el.getparent().remove(p_el) result.append(p_el) return result # ── Image placement ──────────────────────────────────────────────────────── def _add_image_anchor(self, doc: Document, frame: IDExportImageFrame) -> None: """Place an image as an anchored DrawingML picture at exact page coordinates.""" if not frame.image_data_b64: return try: img_bytes = base64.b64decode(frame.image_data_b64) except Exception: log.warning('image_decode_failed', frame_id=frame.id) return w_pt = max(frame.width_pt, 1.0) h_pt = max(frame.height_pt, 1.0) sid = self._next_id() w_emu = _pt_to_emu(w_pt) h_emu = _pt_to_emu(h_pt) try: r_id, _image = doc.part.get_or_add_image(io.BytesIO(img_bytes)) except Exception as e: log.warning('image_register_failed', frame_id=frame.id, error=str(e)) return graphic_xml = ( f'' f'' f'' f'' f'' f'' f'' f'' f'' f'' f'' f'' f'' f'' f'' f'' f'' f'' ) anc = self._make_anchor(sid, frame.x_pt, frame.y_pt, w_pt, h_pt, behind=False) anc.append(etree.fromstring(graphic_xml)) p_el = self._anchor_para(doc) self._attach_anchor(p_el, anc)