PDF export: sanitize HTML (remove <style>, style attributes, <script>) to avoid xhtml2pdf font-size error
This commit is contained in:
11
app.py
11
app.py
@@ -16,6 +16,7 @@ from flask import (
|
|||||||
Response,
|
Response,
|
||||||
)
|
)
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
import re
|
||||||
from xhtml2pdf import pisa # type: ignore
|
from xhtml2pdf import pisa # type: ignore
|
||||||
from docx import Document # type: ignore
|
from docx import Document # type: ignore
|
||||||
from htmldocx import HtmlToDocx # type: ignore
|
from htmldocx import HtmlToDocx # type: ignore
|
||||||
@@ -268,6 +269,13 @@ def create_app():
|
|||||||
abort(404)
|
abort(404)
|
||||||
return row
|
return row
|
||||||
|
|
||||||
|
def _sanitize_html_for_pdf(html: str) -> str:
|
||||||
|
# xhtml2pdf плохо переносит современный CSS; вычищаем стили/скрипты
|
||||||
|
html = re.sub(r"<style[^>]*>.*?</style>", "", html, flags=re.I | re.S)
|
||||||
|
html = re.sub(r"\sstyle=(\"|\')(.*?)\1", "", html, flags=re.I | re.S)
|
||||||
|
html = re.sub(r"<script[^>]*>.*?</script>", "", html, flags=re.I | re.S)
|
||||||
|
return html
|
||||||
|
|
||||||
def _wrap_html_for_export(title: str, html: str) -> str:
|
def _wrap_html_for_export(title: str, html: str) -> str:
|
||||||
head_title = f"<title>{title}</title>" if title else ""
|
head_title = f"<title>{title}</title>" if title else ""
|
||||||
return (
|
return (
|
||||||
@@ -280,7 +288,8 @@ def create_app():
|
|||||||
def export_pdf(uid: str):
|
def export_pdf(uid: str):
|
||||||
row = _fetch_page(uid)
|
row = _fetch_page(uid)
|
||||||
title = row["title"] or f"page-{uid[:8]}"
|
title = row["title"] or f"page-{uid[:8]}"
|
||||||
html_doc = _wrap_html_for_export(title, row["html"])
|
cleaned = _sanitize_html_for_pdf(row["html"])
|
||||||
|
html_doc = _wrap_html_for_export(title, cleaned)
|
||||||
out = BytesIO()
|
out = BytesIO()
|
||||||
pisa.CreatePDF(src=html_doc, dest=out)
|
pisa.CreatePDF(src=html_doc, dest=out)
|
||||||
out.seek(0)
|
out.seek(0)
|
||||||
|
|||||||
Reference in New Issue
Block a user