-
Notifications
You must be signed in to change notification settings - Fork 50
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1245 from rdmorganiser/refactor_pandoc
Refactor pandoc
- Loading branch information
Showing
6 changed files
with
310 additions
and
147 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
import json | ||
import logging | ||
import os | ||
import re | ||
from pathlib import Path | ||
from tempfile import mkstemp | ||
|
||
from django.apps import apps | ||
from django.conf import settings | ||
|
||
import pypandoc | ||
from packaging.version import Version | ||
from packaging.version import parse as parse_version | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
|
||
def get_pandoc_version(): | ||
return parse_version(pypandoc.get_pandoc_version()) | ||
|
||
|
||
def get_pandoc_content(html, metadata, export_format, context): | ||
pandoc_args = get_pandoc_args(export_format, context) | ||
|
||
if metadata: | ||
# create a temporary file for the metadata | ||
(metadata_tmp_fd, metadata_tmp_file_name) = mkstemp(suffix='.json') | ||
|
||
# save metadata | ||
log.info('Save metadata file %s %s', metadata_tmp_file_name, str(metadata)) | ||
with open(metadata_tmp_file_name, 'w') as fp: | ||
json.dump(metadata, fp) | ||
|
||
# add metadata file to pandoc args | ||
pandoc_args.append('--metadata-file=' + metadata_tmp_file_name) | ||
|
||
# create a temporary file | ||
(tmp_fd, tmp_file_name) = mkstemp(f'.{export_format}') | ||
|
||
# convert the file using pandoc | ||
log.info('Export %s document using args %s.', export_format, pandoc_args) | ||
html = re.sub( | ||
r'(<img.+src=["\'])' + settings.STATIC_URL + r'([\w\-\@?^=%&/~\+#]+)', r'\g<1>' + | ||
str(Path(settings.STATIC_ROOT)) + r'/\g<2>', html | ||
) | ||
pypandoc.convert_text(html, export_format, format='html', outputfile=tmp_file_name, extra_args=pandoc_args) | ||
|
||
# read the created temporary file | ||
with open(tmp_file_name, 'rb') as fp: | ||
pandoc_content = fp.read() | ||
|
||
# delete temporary files | ||
if metadata: | ||
os.remove(metadata_tmp_file_name) | ||
os.remove(tmp_file_name) | ||
|
||
return pandoc_content | ||
|
||
|
||
def get_pandoc_content_disposition(export_format, title): | ||
if export_format == 'pdf': | ||
# display pdf in browser | ||
return f'filename="{title}.{export_format}"' | ||
else: | ||
return f'attachment; filename="{title}.{export_format}"' | ||
|
||
|
||
def get_pandoc_args(export_format, context): | ||
pandoc_version = get_pandoc_version() | ||
pandoc_args = list(settings.EXPORT_PANDOC_ARGS.get(export_format, [])) # without list(), settings would be changed | ||
|
||
if export_format == 'pdf': | ||
# we used xelatex before pandoc 3 | ||
if pandoc_version < Version('3'): | ||
pandoc_args = [ | ||
arg.replace('--pdf-engine=lualatex', '--pdf-engine=xelatex') | ||
for arg in pandoc_args | ||
] | ||
|
||
elif export_format in ['docx', 'odt']: | ||
# find and add a possible reference document | ||
reference_document = get_pandoc_reference_document(export_format, context) | ||
if reference_document: | ||
if pandoc_version >= Version('2'): | ||
pandoc_args.append(f'--reference-doc={reference_document}') | ||
else: | ||
pandoc_args.append(f'--reference-{export_format}={reference_document}') | ||
|
||
# add STATIC_ROOT and possible additional resource paths | ||
if pandoc_version >= Version('2'): | ||
pandoc_args.append(f'--resource-path={settings.STATIC_ROOT}') | ||
if 'resource_path' in context: | ||
resource_path = Path(settings.MEDIA_ROOT) / context['resource_path'] | ||
pandoc_args.append(f'--resource-path={resource_path}') | ||
|
||
return pandoc_args | ||
|
||
|
||
def get_pandoc_reference_document(export_format, context): | ||
# collect all configured reference documents | ||
reference_documents = get_pandoc_reference_documents(export_format, context) | ||
|
||
# return the first reference document that actually exists | ||
for reference_document in reference_documents: | ||
if reference_document and reference_document.exists(): | ||
return Path(reference_document) | ||
|
||
|
||
def get_pandoc_reference_documents(export_format, context): | ||
# try to get the view and its uri from the context, if it is not set, the current url should be project_answers | ||
try: | ||
view = context['view'] | ||
view_uri = view.uri | ||
except (KeyError, AttributeError): | ||
view_uri = None | ||
|
||
reference_documents = [] | ||
|
||
if export_format == 'odt': | ||
# append view specific custom reference document | ||
if view_uri and view_uri in settings.EXPORT_REFERENCE_ODT_VIEWS: | ||
reference_documents.append(settings.EXPORT_REFERENCE_ODT_VIEWS[view_uri]) | ||
|
||
# append generic custom reference document | ||
if settings.EXPORT_REFERENCE_ODT: | ||
reference_documents.append(settings.EXPORT_REFERENCE_ODT) | ||
|
||
# append the default reference document | ||
reference_documents.append(Path(apps.get_app_config('rdmo').path) / 'share' / 'reference.odt') | ||
|
||
elif export_format == 'docx': | ||
# append view specific custom reference document | ||
if view_uri and view_uri in settings.EXPORT_REFERENCE_DOCX_VIEWS: | ||
reference_documents.append(settings.EXPORT_REFERENCE_DOCX_VIEWS[view_uri]) | ||
|
||
# append generic custom reference document | ||
if settings.EXPORT_REFERENCE_DOCX: | ||
reference_documents.append(settings.EXPORT_REFERENCE_DOCX) | ||
|
||
# append the default reference document | ||
reference_documents.append(Path(apps.get_app_config('rdmo').path) / 'share' / 'reference.docx') | ||
|
||
return reference_documents |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
from pathlib import Path | ||
|
||
import pytest | ||
|
||
from django.apps import apps | ||
|
||
from packaging.version import Version | ||
|
||
from ..pandoc import get_pandoc_args, get_pandoc_reference_document, get_pandoc_reference_documents, get_pandoc_version | ||
|
||
rdmo_path = Path(apps.get_app_config('rdmo').path) | ||
testing_path = rdmo_path.parent / 'testing' | ||
|
||
pandoc_versions = [ | ||
'1.9.0', | ||
'2.0.0', | ||
'3.0.0', | ||
'3.5.0' | ||
] | ||
|
||
export_formats = [ | ||
'rtf', | ||
'odt', | ||
'docx', | ||
'html', | ||
'markdown', | ||
'tex', | ||
'pdf' | ||
] | ||
|
||
pandoc_args_map = { | ||
'1.9.0': { | ||
'pdf': ['-V', 'geometry:a4paper, margin=1in', '--pdf-engine=xelatex'], | ||
'rtf': ['--standalone'], | ||
'docx': [f'--reference-docx={rdmo_path}/share/reference.docx'], | ||
'odt': [f'--reference-odt={rdmo_path}/share/reference.odt'], | ||
'other': [] | ||
}, | ||
'2.0.0': { | ||
'pdf': ['-V', 'geometry:a4paper, margin=1in', '--pdf-engine=xelatex', | ||
f'--resource-path={testing_path}/static_root'], | ||
'rtf': ['--standalone', f'--resource-path={testing_path}/static_root'], | ||
'docx': [f'--reference-doc={rdmo_path}/share/reference.docx', f'--resource-path={testing_path}/static_root'], | ||
'odt': [f'--reference-doc={rdmo_path}/share/reference.odt', f'--resource-path={testing_path}/static_root'], | ||
'other': [f'--resource-path={testing_path}/static_root'] | ||
}, | ||
'3.0.0': { | ||
'pdf': ['-V', 'geometry:a4paper, margin=1in', '--pdf-engine=lualatex', | ||
f'--resource-path={testing_path}/static_root'], | ||
'rtf': ['--standalone', f'--resource-path={testing_path}/static_root'], | ||
'docx': [f'--reference-doc={rdmo_path}/share/reference.docx', f'--resource-path={testing_path}/static_root'], | ||
'odt': [f'--reference-doc={rdmo_path}/share/reference.odt', f'--resource-path={testing_path}/static_root'], | ||
'other': [f'--resource-path={testing_path}/static_root'] | ||
}, | ||
'3.5.0': { | ||
'pdf': ['-V', 'geometry:a4paper, margin=1in', '--pdf-engine=lualatex', | ||
f'--resource-path={testing_path}/static_root'], | ||
'rtf': ['--standalone', f'--resource-path={testing_path}/static_root'], | ||
'docx': [f'--reference-doc={rdmo_path}/share/reference.docx', f'--resource-path={testing_path}/static_root'], | ||
'odt': [f'--reference-doc={rdmo_path}/share/reference.odt', f'--resource-path={testing_path}/static_root'], | ||
'other': [f'--resource-path={testing_path}/static_root'] | ||
} | ||
} | ||
|
||
class MockedView: | ||
uri = 'http://example.com/terms/views/view' | ||
|
||
@pytest.mark.parametrize('pandoc_version', pandoc_versions) | ||
def test_get_pandoc_version(mocker, pandoc_version): | ||
mocker.patch('pypandoc.get_pandoc_version', return_value=pandoc_version) | ||
assert get_pandoc_version() == Version(pandoc_version) | ||
|
||
|
||
@pytest.mark.parametrize('pandoc_version', pandoc_versions) | ||
@pytest.mark.parametrize('export_format', export_formats) | ||
def test_get_pandoc_args(settings, mocker, pandoc_version, export_format): | ||
mocker.patch('pypandoc.get_pandoc_version', return_value=pandoc_version) | ||
|
||
assert get_pandoc_args(export_format, {}) == \ | ||
pandoc_args_map[pandoc_version].get(export_format, pandoc_args_map[pandoc_version]['other']) | ||
|
||
|
||
def test_get_pandoc_reference_document(mocker): | ||
mocker.patch('rdmo.core.pandoc.get_pandoc_reference_documents', return_value=[ | ||
rdmo_path / 'share' / 'missing.docx', | ||
rdmo_path / 'share' / 'reference.docx', | ||
rdmo_path / 'share' / 'reference.odt' | ||
]) | ||
|
||
# return the first existing file | ||
assert get_pandoc_reference_document('other', {}) == rdmo_path / 'share' / 'reference.docx' | ||
|
||
|
||
def test_get_pandoc_reference_document_missing(mocker): | ||
mocker.patch('rdmo.core.pandoc.get_pandoc_reference_documents', return_value=[ | ||
rdmo_path / 'share' / 'missing.docx', | ||
rdmo_path / 'share' / 'missing.odt' | ||
]) | ||
|
||
assert get_pandoc_reference_document('other', {}) is None | ||
|
||
|
||
@pytest.mark.parametrize('export_format', export_formats) | ||
def test_get_pandoc_reference_documents(export_format): | ||
rdmo_path = Path(apps.get_app_config('rdmo').path) | ||
|
||
reference_documents = get_pandoc_reference_documents(export_format, {}) | ||
|
||
if export_format in ['docx', 'odt']: | ||
assert reference_documents == [rdmo_path / 'share' / f'reference.{export_format}'] | ||
else: | ||
assert reference_documents == [] | ||
|
||
|
||
@pytest.mark.parametrize('export_format', export_formats) | ||
def test_get_pandoc_reference_documents_view(export_format): | ||
reference_documents = get_pandoc_reference_documents(export_format, {'view': MockedView()}) | ||
|
||
if export_format in ['docx', 'odt']: | ||
assert reference_documents == [rdmo_path / 'share' / f'reference.{export_format}'] | ||
else: | ||
assert reference_documents == [] | ||
|
||
|
||
@pytest.mark.parametrize('export_format', export_formats) | ||
def test_get_pandoc_reference_documents_view_settings(settings, export_format): | ||
mock_file = rdmo_path / 'share' / f'mock.{export_format}' | ||
|
||
if export_format == 'docx': | ||
settings.EXPORT_REFERENCE_DOCX_VIEWS = {'http://example.com/terms/views/view': mock_file} | ||
elif export_format == 'odt': | ||
settings.EXPORT_REFERENCE_ODT_VIEWS = {'http://example.com/terms/views/view': mock_file} | ||
|
||
reference_documents = get_pandoc_reference_documents(export_format, {'view': MockedView()}) | ||
|
||
if export_format in ['docx', 'odt']: | ||
assert reference_documents == [mock_file, rdmo_path / 'share' / f'reference.{export_format}'] | ||
else: | ||
assert reference_documents == [] | ||
|
||
|
||
@pytest.mark.parametrize('export_format', export_formats) | ||
def test_get_pandoc_reference_documents_settings(settings, export_format): | ||
mock_file = rdmo_path / 'share' / f'mock.{export_format}' | ||
|
||
if export_format == 'docx': | ||
settings.EXPORT_REFERENCE_DOCX = mock_file | ||
elif export_format == 'odt': | ||
settings.EXPORT_REFERENCE_ODT = mock_file | ||
|
||
reference_documents = get_pandoc_reference_documents(export_format, {}) | ||
|
||
if export_format in ['docx', 'odt']: | ||
assert reference_documents == [mock_file, rdmo_path / 'share' / f'reference.{export_format}'] | ||
else: | ||
assert reference_documents == [] |
Oops, something went wrong.