from __future__ import annotations from typing import List from export.fonts import classify_font, get_substitute from models.scan import FontEntry, ScanReport, ScanWarning class IDExportScanner: """Lightweight scan of idconvert_export.json — builds the pre-conversion report.""" def scan(self, data: dict) -> ScanReport: """Scan a validated export dict and return a ScanReport.""" pages = data.get('pages', []) fonts_used = data.get('fonts_used', []) page_count = len(pages) frame_count, image_count, table_count = self._count_items(pages) fonts = self._classify_fonts(fonts_used, data.get('styles', {})) warnings = self._collect_warnings(data) return ScanReport( pages=page_count, stories=frame_count, images=image_count, tables=table_count, fonts=fonts, warnings=warnings, ) # ── private helpers ────────────────────────────────────────────────────── def _count_items(self, pages: list): frame_count = image_count = table_count = 0 for page in pages: for item in page.get('items', []): t = item.get('type', '') if t == 'text_frame': frame_count += 1 elif t == 'image_frame': image_count += 1 elif t == 'table': table_count += 1 return frame_count, image_count, table_count def _classify_fonts(self, fonts_used: list, styles: dict) -> List[FontEntry]: seen = {} # Fonts declared in fonts_used for f in fonts_used: name = f.get('name', '') if name and name not in seen: seen[name] = 'General' # Cross-reference paragraph styles for "used for" context for style in styles.get('paragraph', []): font = style.get('font') if not font: continue style_name = style.get('name', '').lower() if 'head' in style_name: ctx = 'Headings' elif 'caption' in style_name: ctx = 'Captions' elif 'body' in style_name or 'text' in style_name: ctx = 'Body text' else: ctx = 'General' seen[font] = ctx result = [] for name, ctx in seen.items(): status = classify_font(name) substitute, quality = get_substitute(name) result.append(FontEntry( name=name, status=status, substitute=substitute, substitute_quality=quality, used_for=ctx, )) return result def _collect_warnings(self, data: dict) -> List[ScanWarning]: warnings = [] # Check for multi-column frames for page in data.get('pages', []): for item in page.get('items', []): if item.get('type') == 'text_frame' and item.get('column_count', 1) > 1: warnings.append(ScanWarning( type='multi_column', severity='info', page=page.get('page_number'), message='Multi-column text frame will flow as single column in Word', )) # Check for shapes (not converted) shape_pages = set() for page in data.get('pages', []): for item in page.get('items', []): if item.get('type') == 'shape': shape_pages.add(page.get('page_number')) if shape_pages: warnings.append(ScanWarning( type='shapes_excluded', severity='info', page=None, message='Decorative shapes and rules are not converted to Word', )) # Check for page numbers for page in data.get('pages', []): for item in page.get('items', []): if item.get('type') == 'page_number': warnings.append(ScanWarning( type='page_number', severity='info', page=None, message='Page numbers converted to native Word footer fields', )) return warnings # only one warning needed return warnings