diff --git a/app.py b/app.py index 351ee2f..0a2b03e 100644 --- a/app.py +++ b/app.py @@ -16,6 +16,7 @@ from flask import ( Response, ) from io import BytesIO +import re from xhtml2pdf import pisa # type: ignore from docx import Document # type: ignore from htmldocx import HtmlToDocx # type: ignore @@ -268,6 +269,13 @@ def create_app(): abort(404) return row + def _sanitize_html_for_pdf(html: str) -> str: + # xhtml2pdf плохо переносит современный CSS; вычищаем стили/скрипты + html = re.sub(r"]*>.*?", "", html, flags=re.I | re.S) + html = re.sub(r"\sstyle=(\"|\')(.*?)\1", "", html, flags=re.I | re.S) + html = re.sub(r"]*>.*?", "", html, flags=re.I | re.S) + return html + def _wrap_html_for_export(title: str, html: str) -> str: head_title = f"{title}" if title else "" return ( @@ -280,7 +288,8 @@ def create_app(): def export_pdf(uid: str): row = _fetch_page(uid) title = row["title"] or f"page-{uid[:8]}" - html_doc = _wrap_html_for_export(title, row["html"]) + cleaned = _sanitize_html_for_pdf(row["html"]) + html_doc = _wrap_html_for_export(title, cleaned) out = BytesIO() pisa.CreatePDF(src=html_doc, dest=out) out.seek(0)