125 lines
4.5 KiB
Python
125 lines
4.5 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import List
|
|
|
|
from export.fonts import classify_font, get_substitute
|
|
from models.scan import FontEntry, ScanReport, ScanWarning
|
|
|
|
|
|
class IDExportScanner:
|
|
"""Lightweight scan of idconvert_export.json — builds the pre-conversion report."""
|
|
|
|
def scan(self, data: dict) -> ScanReport:
|
|
"""Scan a validated export dict and return a ScanReport."""
|
|
pages = data.get('pages', [])
|
|
fonts_used = data.get('fonts_used', [])
|
|
|
|
page_count = len(pages)
|
|
frame_count, image_count, table_count = self._count_items(pages)
|
|
fonts = self._classify_fonts(fonts_used, data.get('styles', {}))
|
|
warnings = self._collect_warnings(data)
|
|
|
|
return ScanReport(
|
|
pages=page_count,
|
|
stories=frame_count,
|
|
images=image_count,
|
|
tables=table_count,
|
|
fonts=fonts,
|
|
warnings=warnings,
|
|
)
|
|
|
|
# ── private helpers ──────────────────────────────────────────────────────
|
|
|
|
def _count_items(self, pages: list):
|
|
frame_count = image_count = table_count = 0
|
|
for page in pages:
|
|
for item in page.get('items', []):
|
|
t = item.get('type', '')
|
|
if t == 'text_frame':
|
|
frame_count += 1
|
|
elif t == 'image_frame':
|
|
image_count += 1
|
|
elif t == 'table':
|
|
table_count += 1
|
|
return frame_count, image_count, table_count
|
|
|
|
def _classify_fonts(self, fonts_used: list, styles: dict) -> List[FontEntry]:
|
|
seen = {}
|
|
|
|
# Fonts declared in fonts_used
|
|
for f in fonts_used:
|
|
name = f.get('name', '')
|
|
if name and name not in seen:
|
|
seen[name] = 'General'
|
|
|
|
# Cross-reference paragraph styles for "used for" context
|
|
for style in styles.get('paragraph', []):
|
|
font = style.get('font')
|
|
if not font:
|
|
continue
|
|
style_name = style.get('name', '').lower()
|
|
if 'head' in style_name:
|
|
ctx = 'Headings'
|
|
elif 'caption' in style_name:
|
|
ctx = 'Captions'
|
|
elif 'body' in style_name or 'text' in style_name:
|
|
ctx = 'Body text'
|
|
else:
|
|
ctx = 'General'
|
|
seen[font] = ctx
|
|
|
|
result = []
|
|
for name, ctx in seen.items():
|
|
status = classify_font(name)
|
|
substitute, quality = get_substitute(name)
|
|
result.append(FontEntry(
|
|
name=name,
|
|
status=status,
|
|
substitute=substitute,
|
|
substitute_quality=quality,
|
|
used_for=ctx,
|
|
))
|
|
return result
|
|
|
|
def _collect_warnings(self, data: dict) -> List[ScanWarning]:
|
|
warnings = []
|
|
|
|
# Check for multi-column frames
|
|
for page in data.get('pages', []):
|
|
for item in page.get('items', []):
|
|
if item.get('type') == 'text_frame' and item.get('column_count', 1) > 1:
|
|
warnings.append(ScanWarning(
|
|
type='multi_column',
|
|
severity='info',
|
|
page=page.get('page_number'),
|
|
message='Multi-column text frame will flow as single column in Word',
|
|
))
|
|
|
|
# Check for shapes (not converted)
|
|
shape_pages = set()
|
|
for page in data.get('pages', []):
|
|
for item in page.get('items', []):
|
|
if item.get('type') == 'shape':
|
|
shape_pages.add(page.get('page_number'))
|
|
if shape_pages:
|
|
warnings.append(ScanWarning(
|
|
type='shapes_excluded',
|
|
severity='info',
|
|
page=None,
|
|
message='Decorative shapes and rules are not converted to Word',
|
|
))
|
|
|
|
# Check for page numbers
|
|
for page in data.get('pages', []):
|
|
for item in page.get('items', []):
|
|
if item.get('type') == 'page_number':
|
|
warnings.append(ScanWarning(
|
|
type='page_number',
|
|
severity='info',
|
|
page=None,
|
|
message='Page numbers converted to native Word footer fields',
|
|
))
|
|
return warnings # only one warning needed
|
|
|
|
return warnings
|