sphinx_weasyprint_builder/sphinx_weasyprint_builder/weasyprint_builder.py

206 lines
6.1 KiB
Python

#!/usr/bin/env python3
import os
from typing import Dict, Set, Tuple
from copy import deepcopy
from sphinx.builders.singlehtml import SingleFileHTMLBuilder
from sphinx.util import progress_message
from sphinx.util.osutil import os_path
from sphinx.locale import __
from sphinx.util import logging
from bs4 import BeautifulSoup
import weasyprint
from .loghandler import init_wpsphinx_log
logger = logging.getLogger('weasyprint')
init_wpsphinx_log()
def extract(soup, selector: str) -> None:
elements = soup.select(selector)
for element in elements:
element.extract()
class PdfGenerator:
"""
From WeasyPrint 47 documentation Tips & Tricks
"""
def __init__(
self, base_url: str,
main_selector: str,
footer_selector: str,
header_selector: str
) -> None:
self.base_url = base_url
with open(self.base_url, encoding='utf8') as htmlfile:
self.main_html = htmlfile.read()
self.footer_html = None
self.header_html = None
if not (footer_selector or header_selector):
return
if not main_selector:
logger.error(
__('You must define a selector for content if you set selector for footer or header.'))
return
main_soup = BeautifulSoup(self.main_html, 'html.parser')
if footer_selector:
footer_soup = deepcopy(main_soup)
if header_selector:
extract(footer_soup, header_selector)
extract(footer_soup, main_selector)
if header_selector:
header_soup = deepcopy(main_soup)
if footer_selector:
extract(header_soup, footer_selector)
extract(header_soup, main_selector)
extract(main_soup, header_selector)
if footer_selector:
extract(main_soup, footer_selector)
self.main_html = str(main_soup)
if footer_soup:
self.footer_html = str(footer_soup)
if header_soup:
self.header_html = str(header_soup)
def _compute_overlay_element(self, element: str) -> weasyprint.HTML:
html = weasyprint.HTML(
string=getattr(self, f'{element}_html'),
base_url=self.base_url,
)
element_doc = html.render()
element_page = element_doc.pages[0]
element_body = PdfGenerator.get_element(
element_page._page_box.all_children(), 'body'
)
element_body = element_body.copy_with_children(
element_body.all_children()
)
return element_body
def _apply_overlay_on_main(
self, main_doc, header_body=None, footer_body=None
) -> None:
for page in main_doc.pages:
page_body = PdfGenerator.get_element(
page._page_box.all_children(), 'body'
)
if header_body:
page_body.children += header_body.all_children()
if footer_body:
page_body.children += footer_body.all_children()
def write_pdf(self, target: str) -> None:
if self.header_html:
header_body = self._compute_overlay_element(
'header'
)
else:
header_body = None
if self.footer_html:
footer_body = self._compute_overlay_element(
'footer'
)
else:
footer_body = None
html = weasyprint.HTML(
string=self.main_html,
base_url=self.base_url,
)
main_doc = html.render()
if self.header_html or self.footer_html:
self._apply_overlay_on_main(main_doc, header_body, footer_body)
main_doc.write_pdf(target)
@staticmethod
def get_element(boxes, element):
for box in boxes:
if box.element_tag == element:
return box
return PdfGenerator.get_element(box.all_children(), element)
class WeasyPrintPDFBuilder(SingleFileHTMLBuilder):
name = 'weasyprint'
epilog = __('The PDF file has been saved in %(outdir)s.')
embedded = True
search = False
def _get_translations_js(self) -> str:
return
def copy_translation_js(self) -> None:
return
def copy_stemmer_js(self) -> None:
return
def copy_html_favicon(self) -> None:
return
def get_theme_config(self) -> Tuple[str, Dict]:
return (
self.config.weasyprint_theme,
self.config.weasyprint_theme_options
)
def init_js_files(self) -> None:
return
def add_js_file(self, filename: str, **kwargs: str) -> None:
return
def prepare_writing(self, docnames: Set[str]) -> None:
super(WeasyPrintPDFBuilder, self).prepare_writing(docnames)
if self.config.weasyprint_style is not None:
stylename = self.config.weasyprint_style
elif self.theme:
stylename = self.theme.get_config('theme', 'stylesheet')
else:
stylename = 'default.css'
self.globalcontext['use_opensearch'] = False
self.globalcontext['docstitle'] = self.config.weasyprint_title
self.globalcontext['shorttitle'] = self.config.weasyprint_short_title
self.globalcontext['show_copyright'] = \
self.config.weasyprint_show_copyright
self.globalcontext['show_sphinx'] = self.config.weasyprint_show_sphinx
self.globalcontext['style'] = stylename
self.globalcontext['favicon'] = None
def finish(self) -> None:
super(WeasyPrintPDFBuilder, self).finish()
progress_message('Starting conversion to PDF with WeasyPrint')
infile = os.path.join(
self.outdir,
os_path(self.config.master_doc) + self.out_suffix
)
outfile = os.path.join(
self.outdir,
self.config.weasyprint_basename + '.pdf'
)
generator = PdfGenerator(
infile,
self.config.weasyprint_main_selector,
self.config.weasyprint_footer_selector,
self.config.weasyprint_header_selector)
generator.write_pdf(outfile)