206 lines
6.1 KiB
Python
206 lines
6.1 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import os
|
|
from typing import Dict, Set, Tuple
|
|
from copy import deepcopy
|
|
|
|
from sphinx.builders.singlehtml import SingleFileHTMLBuilder
|
|
from sphinx.util import progress_message
|
|
from sphinx.util.osutil import os_path
|
|
from sphinx.locale import __
|
|
from sphinx.util import logging
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
import weasyprint
|
|
from .loghandler import init_wpsphinx_log
|
|
|
|
|
|
logger = logging.getLogger('weasyprint')
|
|
|
|
|
|
init_wpsphinx_log()
|
|
|
|
|
|
def extract(soup, selector: str) -> None:
|
|
elements = soup.select(selector)
|
|
for element in elements:
|
|
element.extract()
|
|
|
|
|
|
class PdfGenerator:
|
|
"""
|
|
From WeasyPrint 47 documentation Tips & Tricks
|
|
"""
|
|
def __init__(
|
|
self, base_url: str,
|
|
main_selector: str,
|
|
footer_selector: str,
|
|
header_selector: str
|
|
) -> None:
|
|
self.base_url = base_url
|
|
with open(self.base_url, encoding='utf8') as htmlfile:
|
|
self.main_html = htmlfile.read()
|
|
|
|
self.footer_html = None
|
|
self.header_html = None
|
|
|
|
if not (footer_selector or header_selector):
|
|
return
|
|
|
|
if not main_selector:
|
|
logger.error(
|
|
__('You must define a selector for content if you set selector for footer or header.'))
|
|
return
|
|
|
|
main_soup = BeautifulSoup(self.main_html, 'html.parser')
|
|
if footer_selector:
|
|
footer_soup = deepcopy(main_soup)
|
|
if header_selector:
|
|
extract(footer_soup, header_selector)
|
|
extract(footer_soup, main_selector)
|
|
|
|
if header_selector:
|
|
header_soup = deepcopy(main_soup)
|
|
if footer_selector:
|
|
extract(header_soup, footer_selector)
|
|
extract(header_soup, main_selector)
|
|
extract(main_soup, header_selector)
|
|
|
|
if footer_selector:
|
|
extract(main_soup, footer_selector)
|
|
|
|
self.main_html = str(main_soup)
|
|
if footer_soup:
|
|
self.footer_html = str(footer_soup)
|
|
|
|
if header_soup:
|
|
self.header_html = str(header_soup)
|
|
|
|
def _compute_overlay_element(self, element: str) -> weasyprint.HTML:
|
|
html = weasyprint.HTML(
|
|
string=getattr(self, f'{element}_html'),
|
|
base_url=self.base_url,
|
|
)
|
|
element_doc = html.render()
|
|
element_page = element_doc.pages[0]
|
|
element_body = PdfGenerator.get_element(
|
|
element_page._page_box.all_children(), 'body'
|
|
)
|
|
element_body = element_body.copy_with_children(
|
|
element_body.all_children()
|
|
)
|
|
|
|
return element_body
|
|
|
|
def _apply_overlay_on_main(
|
|
self, main_doc, header_body=None, footer_body=None
|
|
) -> None:
|
|
for page in main_doc.pages:
|
|
page_body = PdfGenerator.get_element(
|
|
page._page_box.all_children(), 'body'
|
|
)
|
|
|
|
if header_body:
|
|
page_body.children += header_body.all_children()
|
|
if footer_body:
|
|
page_body.children += footer_body.all_children()
|
|
|
|
def write_pdf(self, target: str) -> None:
|
|
if self.header_html:
|
|
header_body = self._compute_overlay_element(
|
|
'header'
|
|
)
|
|
else:
|
|
header_body = None
|
|
if self.footer_html:
|
|
footer_body = self._compute_overlay_element(
|
|
'footer'
|
|
)
|
|
else:
|
|
footer_body = None
|
|
|
|
html = weasyprint.HTML(
|
|
string=self.main_html,
|
|
base_url=self.base_url,
|
|
)
|
|
main_doc = html.render()
|
|
|
|
if self.header_html or self.footer_html:
|
|
self._apply_overlay_on_main(main_doc, header_body, footer_body)
|
|
main_doc.write_pdf(target)
|
|
|
|
@staticmethod
|
|
def get_element(boxes, element):
|
|
for box in boxes:
|
|
if box.element_tag == element:
|
|
return box
|
|
return PdfGenerator.get_element(box.all_children(), element)
|
|
|
|
|
|
class WeasyPrintPDFBuilder(SingleFileHTMLBuilder):
|
|
name = 'weasyprint'
|
|
epilog = __('The PDF file has been saved in %(outdir)s.')
|
|
embedded = True
|
|
search = False
|
|
|
|
def _get_translations_js(self) -> str:
|
|
return
|
|
|
|
def copy_translation_js(self) -> None:
|
|
return
|
|
|
|
def copy_stemmer_js(self) -> None:
|
|
return
|
|
|
|
def copy_html_favicon(self) -> None:
|
|
return
|
|
|
|
def get_theme_config(self) -> Tuple[str, Dict]:
|
|
return (
|
|
self.config.weasyprint_theme,
|
|
self.config.weasyprint_theme_options
|
|
)
|
|
|
|
def init_js_files(self) -> None:
|
|
return
|
|
|
|
def add_js_file(self, filename: str, **kwargs: str) -> None:
|
|
return
|
|
|
|
def prepare_writing(self, docnames: Set[str]) -> None:
|
|
super(WeasyPrintPDFBuilder, self).prepare_writing(docnames)
|
|
if self.config.weasyprint_style is not None:
|
|
stylename = self.config.weasyprint_style
|
|
elif self.theme:
|
|
stylename = self.theme.get_config('theme', 'stylesheet')
|
|
else:
|
|
stylename = 'default.css'
|
|
|
|
self.globalcontext['use_opensearch'] = False
|
|
self.globalcontext['docstitle'] = self.config.weasyprint_title
|
|
self.globalcontext['shorttitle'] = self.config.weasyprint_short_title
|
|
self.globalcontext['show_copyright'] = \
|
|
self.config.weasyprint_show_copyright
|
|
self.globalcontext['show_sphinx'] = self.config.weasyprint_show_sphinx
|
|
self.globalcontext['style'] = stylename
|
|
self.globalcontext['favicon'] = None
|
|
|
|
def finish(self) -> None:
|
|
super(WeasyPrintPDFBuilder, self).finish()
|
|
progress_message('Starting conversion to PDF with WeasyPrint')
|
|
infile = os.path.join(
|
|
self.outdir,
|
|
os_path(self.config.master_doc) + self.out_suffix
|
|
)
|
|
outfile = os.path.join(
|
|
self.outdir,
|
|
self.config.weasyprint_basename + '.pdf'
|
|
)
|
|
generator = PdfGenerator(
|
|
infile,
|
|
self.config.weasyprint_main_selector,
|
|
self.config.weasyprint_footer_selector,
|
|
self.config.weasyprint_header_selector)
|
|
generator.write_pdf(outfile)
|