PDF export: sanitize HTML (remove <style>, style attributes, <script>) to avoid xhtml2pdf font-size error
This commit is contained in:
11
app.py
11
app.py
@@ -16,6 +16,7 @@ from flask import (
|
||||
Response,
|
||||
)
|
||||
from io import BytesIO
|
||||
import re
|
||||
from xhtml2pdf import pisa # type: ignore
|
||||
from docx import Document # type: ignore
|
||||
from htmldocx import HtmlToDocx # type: ignore
|
||||
@@ -268,6 +269,13 @@ def create_app():
|
||||
abort(404)
|
||||
return row
|
||||
|
||||
def _sanitize_html_for_pdf(html: str) -> str:
|
||||
# xhtml2pdf плохо переносит современный CSS; вычищаем стили/скрипты
|
||||
html = re.sub(r"<style[^>]*>.*?</style>", "", html, flags=re.I | re.S)
|
||||
html = re.sub(r"\sstyle=(\"|\')(.*?)\1", "", html, flags=re.I | re.S)
|
||||
html = re.sub(r"<script[^>]*>.*?</script>", "", html, flags=re.I | re.S)
|
||||
return html
|
||||
|
||||
def _wrap_html_for_export(title: str, html: str) -> str:
|
||||
head_title = f"<title>{title}</title>" if title else ""
|
||||
return (
|
||||
@@ -280,7 +288,8 @@ def create_app():
|
||||
def export_pdf(uid: str):
|
||||
row = _fetch_page(uid)
|
||||
title = row["title"] or f"page-{uid[:8]}"
|
||||
html_doc = _wrap_html_for_export(title, row["html"])
|
||||
cleaned = _sanitize_html_for_pdf(row["html"])
|
||||
html_doc = _wrap_html_for_export(title, cleaned)
|
||||
out = BytesIO()
|
||||
pisa.CreatePDF(src=html_doc, dest=out)
|
||||
out.seek(0)
|
||||
|
||||
Reference in New Issue
Block a user