Skip to content

Commit

Permalink
Move to pandoc for rendering sponsorship contracts
Browse files Browse the repository at this point in the history
  • Loading branch information
ewdurbin committed Dec 18, 2023
1 parent 7ba7d36 commit cca7b3a
Show file tree
Hide file tree
Showing 15 changed files with 439 additions and 434 deletions.
5 changes: 5 additions & 0 deletions Aptfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pandoc
texlive-latex-base
texlive-latex-recommended
texlive-fonts-recommended
lmodern
32 changes: 31 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,39 @@
FROM python:3.9-bullseye
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1

# By default, Docker has special steps to avoid keeping APT caches in the layers, which
# is good, but in our case, we're going to mount a special cache volume (kept between
# builds), so we WANT the cache to persist.
RUN set -eux; \
rm -f /etc/apt/apt.conf.d/docker-clean; \
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache;

# Install System level build requirements, this is done before
# everything else because these are rarely ever going to change.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
set -x \
&& apt-get update \
&& apt-get install --no-install-recommends -y \
pandoc \
texlive-latex-base \
texlive-latex-recommended \
texlive-fonts-recommended \
lmodern

RUN mkdir /code
WORKDIR /code

COPY dev-requirements.txt /code/
COPY base-requirements.txt /code/
RUN pip install -r dev-requirements.txt

RUN pip --no-cache-dir --disable-pip-version-check install --upgrade pip setuptools wheel

RUN --mount=type=cache,target=/root/.cache/pip \
set -x \
&& pip --disable-pip-version-check \
install \
-r dev-requirements.txt

COPY . /code/
7 changes: 3 additions & 4 deletions base-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,11 @@ django-filter==2.4.0
django-ordered-model==3.4.3
django-widget-tweaks==1.4.8
django-countries==7.2.1
xhtml2pdf==0.2.5
django-easy-pdf3==0.1.2
num2words==0.5.10
django-polymorphic==3.0.0
sorl-thumbnail==12.7.0
docxtpl==0.12.0
reportlab==3.6.6
django-extensions==3.1.4
django-import-export==2.7.1

pypandoc==1.12
panflute==1.12
1 change: 0 additions & 1 deletion pydotorg/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@
'ordered_model',
'widget_tweaks',
'django_countries',
'easy_pdf',
'sorl.thumbnail',

'banners',
Expand Down
79 changes: 79 additions & 0 deletions sponsors/contracts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import os
import tempfile

from django.http import HttpResponse
from django.template.loader import render_to_string
from django.utils.dateformat import format

import pypandoc

dirname = os.path.dirname(__file__)
DOCXPAGEBREAK_FILTER = os.path.join(dirname, "pandoc_filters/pagebreak.py")


def _clean_split(text, separator="\n"):
return [
t.replace("-", "").strip()
for t in text.split("\n")
if t.replace("-", "").strip()
]


def _contract_context(contract, **context):
start_date = contract.sponsorship.start_date
context.update(
{
"contract": contract,
"start_date": start_date,
"start_day_english_suffix": format(start_date, "S"),
"sponsor": contract.sponsorship.sponsor,
"sponsorship": contract.sponsorship,
"benefits": _clean_split(contract.benefits_list.raw),
"legal_clauses": _clean_split(contract.legal_clauses.raw),
}
)
return context


def render_contract_to_pdf_response(request, contract, **context):
response = HttpResponse(
render_contract_to_pdf_file(contract, **context), content_type="application/pdf"
)
return response


def render_contract_to_pdf_file(contract, **context):
with tempfile.NamedTemporaryFile() as docx_file:
with tempfile.NamedTemporaryFile(suffix=".pdf") as pdf_file:
docx_file.write(render_contract_to_docx_file(contract, **context))
pdf = pypandoc.convert_file(
docx_file.name, "pdf", outputfile=pdf_file.name, format="docx"
)
return pdf_file.read()


def render_contract_to_docx_response(request, contract, **context):
response = HttpResponse(
render_contract_to_docx_file(contract, **context),
content_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
)
response[
"Content-Disposition"
] = f"attachment; filename=sponsorship-contract-{contract.sponsorship.sponsor.name.replace(' ', '-')}.docx"
return response


def render_contract_to_docx_file(contract, **context):
template = "sponsors/admin/contracts/sponsorship-agreement.md"
reference = "sponsors/admin/contracts/reference.docx"
context = _contract_context(contract, **context)
markdown = render_to_string(template, context)
with tempfile.NamedTemporaryFile() as docx_file:
docx = pypandoc.convert_text(
markdown,
"docx",
outputfile=docx_file.name,
format="md",
filters=[DOCXPAGEBREAK_FILTER],
)
return docx_file.read()
Empty file.
90 changes: 90 additions & 0 deletions sponsors/pandoc_filters/pagebreak.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# ------------------------------------------------------------------------------
# Source: https://github.com/pandocker/pandoc-docx-pagebreak-py/
# Revision: c8cddccebb78af75168da000a3d6ac09349bef73
# ------------------------------------------------------------------------------
# MIT License
#
# Copyright (c) 2018 pandocker
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# ------------------------------------------------------------------------------

""" pandoc-docx-pagebreakpy
Pandoc filter to insert pagebreak as openxml RawBlock
Only for docx output
Trying to port pandoc-doc-pagebreak
- https://github.com/alexstoick/pandoc-docx-pagebreak
"""

import panflute as pf


class DocxPagebreak(object):
pagebreak = pf.RawBlock("<w:p><w:r><w:br w:type=\"page\" /></w:r></w:p>", format="openxml")
sectionbreak = pf.RawBlock("<w:p><w:pPr><w:sectPr><w:type w:val=\"nextPage\" /></w:sectPr></w:pPr></w:p>",
format="openxml")
toc = pf.RawBlock(r"""
<w:sdt>
<w:sdtContent xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:p>
<w:r>
<w:fldChar w:fldCharType="begin" w:dirty="true" />
<w:instrText xml:space="preserve">TOC \o "1-3" \h \z \u</w:instrText>
<w:fldChar w:fldCharType="separate" />
<w:fldChar w:fldCharType="end" />
</w:r>
</w:p>
</w:sdtContent>
</w:sdt>
""", format="openxml")

def action(self, elem, doc):
if isinstance(elem, pf.RawBlock):
if elem.text == r"\newpage":
if (doc.format == "docx"):
pf.debug("Page Break")
elem = self.pagebreak
# elif elem.text == r"\newsection":
# if (doc.format == "docx"):
# pf.debug("Section Break")
# elem = self.sectionbreak
# else:
# elem = []
elif elem.text == r"\toc":
if (doc.format == "docx"):
pf.debug("Table of Contents")
para = [pf.Para(pf.Str("Table"), pf.Space(), pf.Str("of"), pf.Space(), pf.Str("Contents"))]
div = pf.Div(*para, attributes={"custom-style": "TOC Heading"})
elem = [div, self.toc]
else:
elem = []
return elem


def main(doc=None):
dp = DocxPagebreak()
return pf.run_filter(dp.action, doc=doc)


if __name__ == "__main__":
main()
70 changes: 0 additions & 70 deletions sponsors/pdf.py

This file was deleted.

34 changes: 34 additions & 0 deletions sponsors/tests/test_contracts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from datetime import date
from model_bakery import baker
from unittest.mock import patch, Mock

from django.http import HttpRequest
from django.test import TestCase
from django.utils.dateformat import format

from sponsors.contracts import render_contract_to_docx_response


class TestRenderContract(TestCase):
def setUp(self):
self.contract = baker.make_recipe("sponsors.tests.empty_contract", sponsorship__start_date=date.today())
self.context = {
"contract": self.contract,
"start_date": self.contract.sponsorship.start_date,
"start_day_english_suffix": format(self.contract.sponsorship.start_date, "S"),
"sponsor": self.contract.sponsorship.sponsor,
"sponsorship": self.contract.sponsorship,
"benefits": [],
"legal_clauses": [],
}

# DOCX unit test
def test_render_response_with_docx_attachment(self):
request = Mock(HttpRequest)
response = render_contract_to_docx_response(request, self.contract)

self.assertEqual(response.get("Content-Disposition"), "attachment; filename=sponsorship-contract-Sponsor.docx")
self.assertEqual(
response.get("Content-Type"),
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
)
Loading

0 comments on commit cca7b3a

Please sign in to comment.