From 71ac319e3b6567b894d2114e7b5dc12da7614134 Mon Sep 17 00:00:00 2001 From: Nigel Megitt Date: Tue, 26 Nov 2019 09:32:01 +0000 Subject: [PATCH] TT1 to TT3/MSPA 704 sequence identifier (#42) * Validate bindings for EBU-TT-1 documents by dynamically setting superseding class on document creation * MSPA-702: Check existence of Styling and Layout elements for EBU-TT-1 * MSPA-702: EBU-TT-1 document must contain a tt:region element * MSPA-702: body/dur attribute is not allowed in EBU-TT-1 * MSPA-702: Check that smpte timeBase is acceptable in EBU-TT-1 * tt3 to ttd conversion (#41) MSPA-728 ebu-tt-3 to ebu-tt-d conversion * Validate bindings for EBU-TT-1 documents by dynamically setting superseding class on document creation * MSPA-702: EBU-TT-1 document must contain a tt:region element * rebased and fixed ebuttd test files * extracting ebutt1object base into generic ebuttdocumentbase class * Add EBU-TT 1 to EBU-TT 3 conversion Does not handle time conversion. Sets `ebuttp:sequenceIdentifier` to the value of `tt/head/metadata/documentMetadata/documentIdentifier` if present, otherwise uses "TestConverter". Resets the `conformsToStandard` to say it is EBU-TT-3 conformant. Sets the `timeBase` to `media` whether you like it or not, but doesn't do any other conversions. * Tidy out debug prints and unneeded commented code * Address review comments Also allows for a setting that specifies whether or not to use `ebuttm:documentIdentifier` element value in the input as the `ebuttp:sequenceIdentifier` attribute value in the output. Adds a test for this. * Fix some documentation Address some warnings and add a page for EBU-TT 1 to EBU-TT 3 conversion. WIP. * Rename conversion docs Should have been committed with previous commit. * Unit test EBUTT1 to EBUTT3 conversion * Fix validation error messages for unexpected attributes so it doesn't say they are missing. * Fix cloning of unknown element, and conversion of metadata * Make EBUTT1Document instantiatable by including required attributes and elements in the constructor * Add unit test cases for programmatical construction with smpte (skipped) and media timebase * Add unit test cases for from-document construction with smpte (skipped) and media timebase * Address review comments Pass pep8 wherever we can. * Remove no-longer-used error string * Rename test file Helps distinguish test EBU-TT 1 files from test EBU-TT 3 files. * Address review comment --- docs/source/conversion_from_ebutt.rst | 30 ++ ...onversion.rst => conversion_to_ebuttd.rst} | 0 docs/source/deduplication.rst | 2 +- docs/source/ebu_tt_live.node.rst | 2 +- docs/source/inode.puml | 1 + docs/source/overview.rst | 3 +- docs/source/scripts_and_their_functions.rst | 4 +- ebu_tt_live/bindings/__init__.py | 15 +- .../bindings/converters/ebutt1_ebutt3.py | 267 +++++++++++++++++- .../bindings/validation/presentation.py | 4 +- ebu_tt_live/bindings/validation/timing.py | 2 - ebu_tt_live/documents/converters.py | 10 +- ebu_tt_live/documents/ebutt1.py | 38 ++- .../test/converter_ericsson1_media.xml | 69 +++++ .../test/converter_ericsson1_smpte.xml | 69 +++++ ..._ericsson1.xml => converter_ericsson3.xml} | 0 ebu_tt_live/documents/test/test_converters.py | 137 ++++++++- ebu_tt_live/node/ebutt1_ebutt3_producer.py | 10 +- ebu_tt_live/strings.py | 2 +- .../features/ebutt1/ebutt1_conversion.feature | 57 ++++ testing/bdd/templates/ebutt1_template.xml | 9 +- testing/bdd/test_ebutt1_conversion.py | 66 +++++ 22 files changed, 760 insertions(+), 37 deletions(-) create mode 100644 docs/source/conversion_from_ebutt.rst rename docs/source/{conversion.rst => conversion_to_ebuttd.rst} (100%) create mode 100644 ebu_tt_live/documents/test/converter_ericsson1_media.xml create mode 100644 ebu_tt_live/documents/test/converter_ericsson1_smpte.xml rename ebu_tt_live/documents/test/{converter_ericsson1.xml => converter_ericsson3.xml} (100%) create mode 100644 testing/bdd/features/ebutt1/ebutt1_conversion.feature create mode 100644 testing/bdd/test_ebutt1_conversion.py diff --git a/docs/source/conversion_from_ebutt.rst b/docs/source/conversion_from_ebutt.rst new file mode 100644 index 000000000..c650bfd5d --- /dev/null +++ b/docs/source/conversion_from_ebutt.rst @@ -0,0 +1,30 @@ +Conversion of EBU-TT Part 1 documents to EBU-TT Live documents +============================================================== + +The :py:func:`ebu_tt_live.documents.converters.ebutt1_to_ebutt3` function +creates an EBUTT3Document from an EBUTT1Document using the helper class +:py:class:`ebu_tt_live.bindings.converters.ebutt1_ebutt3.EBUTT1EBUTT3Converter`. + +This class manages various possible complications, including mapping SMPTE +timecodes into media time, and setting a sequence identifier. + +Here's some documentation from the coding process that captures some of our +internal conversation about how to map font sizes, to give an idea of the +complexity. + +The problem +----------- + +Convert an EBU-TT part 1 document to an EBU-TT part 3 document + +EBU-TT part 1 can have smpte timebase; EBU-TT part 3 cannot. +EBU-TT part 1 must not have a sequence identifier and must not +have a sequence number. EBU-TT part 3 documents must have both. + +In order to set the sequence identifier the converter can be +configured with the desired value, or it can be set to extract the +document identifier from the `ebuttm:documentIdentifier` element +and use it, if it exists. + +TODO: how to convert smpte timebase time expressions into clock or media +timebase time expressions. \ No newline at end of file diff --git a/docs/source/conversion.rst b/docs/source/conversion_to_ebuttd.rst similarity index 100% rename from docs/source/conversion.rst rename to docs/source/conversion_to_ebuttd.rst diff --git a/docs/source/deduplication.rst b/docs/source/deduplication.rst index f3b0371a8..50e912cc3 100644 --- a/docs/source/deduplication.rst +++ b/docs/source/deduplication.rst @@ -13,7 +13,7 @@ After copying ``styling`` and ``layout`` into a ``list()`` and setting them up f Because ``style`` and ``region`` elements can have ``style`` attributes, these are deduplicated first. At this stage, it's possible that where two identical elements that differed only in their style references, these may end up looking the same. - Each element is then passed through the +Each element is then passed through the :py:class:`ebu_tt_live.node.deduplicator.ComparableElement` class, which processes each attribute, omitting the ``xml:id`` and using the :py:func:`ebu_tt_live.node.deduplicator.ReplaceNone` function to replace empty diff --git a/docs/source/ebu_tt_live.node.rst b/docs/source/ebu_tt_live.node.rst index 9d4527776..2fac55c6d 100644 --- a/docs/source/ebu_tt_live.node.rst +++ b/docs/source/ebu_tt_live.node.rst @@ -74,7 +74,7 @@ node Package :show-inheritance: :mod:`deduplicator` Module ----------------------- +-------------------------- .. automodule:: ebu_tt_live.node.deduplicator :members: diff --git a/docs/source/inode.puml b/docs/source/inode.puml index e1d039eab..eee3d00af 100644 --- a/docs/source/inode.puml +++ b/docs/source/inode.puml @@ -113,6 +113,7 @@ AbstractProducerNode <|-- AbstractCombinedNode AbstractProducerNode <|-- SimpleProducer AbstractProducerNode <|-- ReSequencer AbstractConsumerNode <|-- SimpleConsumer +AbstractCombinedNode <|-- Denester AbstractCombinedNode <|-- EBUTTDEncoder AbstractCombinedNode <|-- BufferDelayNode AbstractCombinedNode <|-- RetimingDelayNode diff --git a/docs/source/overview.rst b/docs/source/overview.rst index 408143760..eeea8c099 100644 --- a/docs/source/overview.rst +++ b/docs/source/overview.rst @@ -19,4 +19,5 @@ The components mimic the nodes and carriage mechanisms defined in the specificat segmentation deduplication denesting - conversion + conversion_from_ebutt + conversion_to_ebuttd diff --git a/docs/source/scripts_and_their_functions.rst b/docs/source/scripts_and_their_functions.rst index c0b0a4e0b..e0ec41064 100644 --- a/docs/source/scripts_and_their_functions.rst +++ b/docs/source/scripts_and_their_functions.rst @@ -23,7 +23,7 @@ into a sequence of EBU-TT Live documents using natural language processing. Use The default carriage mechanism is WebSocket, so you will need to listen to ``ws://127.0.0.1:9000``. Conveniently, we've created an HTML page that does just that. After you launch the Simple Producer, open `docs/build/ui/test/index.html <../ui/test/index.html>`_ -or the `current release pre-built page `_ in your +or the `current release pre-built test page `_ in your browser. The 'Broadcast message' field should be populated with the correct address (``ws://localhost:9000``). Click 'Connect' and then 'Subscribe'. You can also change the identifier for the sequence. The documents should appear in the @@ -58,7 +58,7 @@ activated and the code built, start one, with a command line such as ``ebu-run --admin.conf ebu_tt_live/examples/config/user_input_producer_consumer.json`` - this one runs a simple consumer. Then, in your browser, open `docs/build/ui/user_input_producer/index.html <../ui/user_input_producer/index.html>`_ or the -`current release pre-built page `_ and click +`current release pre-built UIP page `_ and click 'Connect'. Select the sending mode (manual, scheduled or asynchronous). You should see the documents arriving in the command line window where the simple consumer is listening. See detailed instructions here: diff --git a/ebu_tt_live/bindings/__init__.py b/ebu_tt_live/bindings/__init__.py index 15c387e74..095da79a3 100644 --- a/ebu_tt_live/bindings/__init__.py +++ b/ebu_tt_live/bindings/__init__.py @@ -19,7 +19,8 @@ from .validation.validator import SemanticValidator from ebu_tt_live.errors import SemanticValidationError, OutsideSegmentError, RegionExtendingOutsideDocumentError, InvalidRegionOriginType, InvalidRegionExtentType from ebu_tt_live.strings import ERR_SEMANTIC_VALIDATION_MISSING_ATTRIBUTES, \ - ERR_SEMANTIC_VALIDATION_INVALID_ATTRIBUTES, ERR_SEMANTIC_STYLE_CIRCLE, ERR_SEMANTIC_STYLE_MISSING, \ + ERR_SEMANTIC_VALIDATION_UNEXPECTED_ATTRIBUTES, \ + ERR_SEMANTIC_STYLE_CIRCLE, ERR_SEMANTIC_STYLE_MISSING, \ ERR_SEMANTIC_ELEMENT_BY_ID_MISSING, ERR_SEMANTIC_VALIDATION_EXPECTED from pyxb.exceptions_ import IncompleteElementContentError, MissingAttributeError, SimpleTypeValueError, \ UnrecognizedAttributeError @@ -581,7 +582,7 @@ def __semantic_test_smpte_attrs_absent(self): extra_attrs = self._semantic_attributes_present(smpte_attrs) if extra_attrs: raise SemanticValidationError( - ERR_SEMANTIC_VALIDATION_INVALID_ATTRIBUTES.format( + ERR_SEMANTIC_VALIDATION_UNEXPECTED_ATTRIBUTES.format( elem_name='tt:tt', attr_names=extra_attrs ) @@ -607,7 +608,7 @@ def __semantic_test_time_base_clock_attrs_absent(self): extra_attrs = self._semantic_attributes_present(clock_attrs) if extra_attrs: raise SemanticValidationError( - ERR_SEMANTIC_VALIDATION_MISSING_ATTRIBUTES.format( + ERR_SEMANTIC_VALIDATION_UNEXPECTED_ATTRIBUTES.format( elem_name='tt:tt', attr_names=extra_attrs ) @@ -1320,7 +1321,6 @@ def create_default_value(cls): def _semantic_after_subtree_copy(self, copied_instance, dataset, element_content=None): pass - raw.d_styling_type._SetSupersedingClass(d_styling_type) @@ -1434,7 +1434,10 @@ def _validateBinding_vx(self): if self.sequenceNumber: raise UnrecognizedAttributeError(type(self), 'sequenceNumber') - super(tt_type, self)._validateBinding_vx() + # bypass the tt_type parent's _validateBinding_vx() + # because it checks for constraints that are mutually + # incompatible with the constraints checked here. + super(raw.tt_type, self)._validateBinding_vx() class tt1_head_type(SemanticValidationMixin, raw.head_type): @@ -1449,7 +1452,7 @@ def _validateBinding_vx(self): class tt1_layout_type(layout): - + def _validateBinding_vx(self): if len(self.region) == 0: raise IncompleteElementContentError(self, None, None, None) diff --git a/ebu_tt_live/bindings/converters/ebutt1_ebutt3.py b/ebu_tt_live/bindings/converters/ebutt1_ebutt3.py index bd380b6c7..6bb9a0482 100644 --- a/ebu_tt_live/bindings/converters/ebutt1_ebutt3.py +++ b/ebu_tt_live/bindings/converters/ebutt1_ebutt3.py @@ -1,3 +1,12 @@ +from ebu_tt_live.bindings import tt, tt1_tt_type, tt1_body_type, \ + body_type, div_type, tt1_head_type, tt1_layout_type, p_type, span_type, \ + br_type, head_type, style_type, styling, layout, \ + region_type +from ebu_tt_live.bindings._ebuttm import headMetadata_type, documentMetadata, \ + metadataBase_type, divMetadata_type +from ebu_tt_live.documents import EBUTT3Document +from pyxb.binding.basis import NonElementContent, ElementContent +import copy import logging log = logging.getLogger(__name__) @@ -5,14 +14,261 @@ class EBUTT1EBUTT3Converter(object): - def __init__(self): + _semantic_dataset = None + _sequenceIdentifier = None + _use_doc_id_as_seq_id = False + + def __init__(self, sequence_id, use_doc_id_as_seq_id=False): + self._sequenceIdentifier = sequence_id + self._use_doc_id_as_seq_id = use_doc_id_as_seq_id pass def map_type(self, in_element): - return self.convert_unknown + if isinstance(in_element, tt1_tt_type): + return self.convert_tt + elif isinstance(in_element, tt1_head_type): + return self.convert_head + elif isinstance(in_element, tt1_body_type): + return self.convert_body + elif isinstance(in_element, div_type): + return self.convert_div + elif isinstance(in_element, p_type): + return self.convert_p + elif isinstance(in_element, span_type): + return self.convert_span + elif isinstance(in_element, br_type): + return self.convert_br + elif isinstance(in_element, tt1_layout_type): + return self.convert_layout + elif isinstance(in_element, region_type): + return self.convert_region + elif isinstance(in_element, styling): + return self.convert_styling + elif isinstance(in_element, style_type): + return self.convert_style + elif isinstance(in_element, headMetadata_type): + return self.convert_headMetadata + elif isinstance(in_element, divMetadata_type): + return self.convert_divMetadata + elif isinstance(in_element, metadataBase_type): + return self.convert_metadata + else: + return self.convert_unknown + + def convert_tt(self, tt_in, dataset): + dataset['timeBase'] = tt_in.timeBase + new_elem = tt( + head=self.convert_element(tt_in.head, dataset), + body=self.convert_element(tt_in.body, dataset), + timeBase='media', + lang=tt_in.lang, + space=tt_in.space, + cellResolution=tt_in.cellResolution, + sequenceIdentifier=self._sequenceIdentifier, + sequenceNumber='1', + _strict_keywords=False + ) + + if self._use_doc_id_as_seq_id and 'documentIdentifier' in dataset: + new_elem.sequenceIdentifier = dataset['documentIdentifier'] + + return new_elem + + def convert_head(self, head_in, dataset): + new_elem = head_type() + head_children = self.convert_children(head_in, dataset) + for item in head_children: + new_elem.append(item) + + return new_elem + + def convert_headMetadata(self, headMetadata_in, dataset): + new_elem = headMetadata_type( + *self.convert_children(headMetadata_in, dataset) + ) + + # Special handling for conformsToStandard. Throw out the old, add a new. + # TODO: When XSD updated to allow ebuttm document metadata directly in + # head metadata, check for this by uncommenting the following lines: + # if new_elem.conformsToStandard is not None: + # new_elem.conformsToStandard=[ + # 'urn:ebu:tt:live:2017-05'] + if new_elem.documentMetadata is None: + new_elem.documentMetadata = documentMetadata(conformsToStandard=[ + 'urn:ebu:tt:live:2017-05' + ]) + else: + new_elem.documentMetadata.conformsToStandard = [ + 'urn:ebu:tt:live:2017-05'] - def convert_unknown(self, element, dataset): - return None + # We want to remember the documentIdentifier and use it later for the + # sequence identifier + # TODO: When XSD updated to allow ebuttm document metadata directly in + # head metadata, check for this by uncommenting the following lines: + # if new_elem.documentIdentifier is not None: + # _rememberDocumentIdentifier(new_elem.documentIdentifier, dataset) + + if new_elem.documentMetadata and new_elem.documentMetadata.documentIdentifier is not None: + self._rememberDocumentIdentifier(new_elem.documentMetadata.documentIdentifier, dataset) + + return new_elem + + def _rememberDocumentIdentifier(self, documentIdentifier_in, dataset): + if 'documentIdentifier' in dataset: + raise Exception('Already got a documentIdentifier') + dataset['documentIdentifier'] = documentIdentifier_in + + def convert_divMetadata(self, divmetadata_in, dataset): + new_elem = divMetadata_type( + *self.convert_children(divmetadata_in, dataset) + ) + return new_elem + + def convert_metadata(self, metadata_in, dataset): + new_elem = metadataBase_type( + *self.convert_children(metadata_in, dataset) + ) + return new_elem + + def convert_styling(self, styling_in, dataset): + new_elem = styling( + *self.convert_children(styling_in, dataset) + ) + return new_elem + + def convert_style(self, style_in, dataset): + new_elem = style_type( + *self.convert_children(style_in, dataset), + id=style_in.id, + style=style_in.style, + direction=style_in.direction, + fontFamily=style_in.fontFamily, + fontSize=style_in.fontSize, + lineHeight=style_in.lineHeight, + textAlign=style_in.textAlign, + color=style_in.color, + backgroundColor=style_in.backgroundColor, + fontStyle=style_in.fontStyle, + fontWeight=style_in.fontWeight, + textDecoration=style_in.textDecoration, + unicodeBidi=style_in.unicodeBidi, + wrapOption=style_in.wrapOption, + padding=style_in.padding, + multiRowAlign=style_in.multiRowAlign, + linePadding=style_in.linePadding, + _strict_keywords=False + ) + return new_elem + + def convert_layout(self, layout_in, dataset): + new_elem = layout( + *self.convert_children(layout_in, dataset) + ) + + return new_elem + + def convert_region(self, region_in, dataset): + new_elem = region_type( + *self.convert_children(region_in, dataset), + id=region_in.id, + origin=region_in.origin, + extent=region_in.extent, + style=region_in.style, + displayAlign=region_in.displayAlign, + padding=region_in.padding, + writingMode=region_in.writingMode, + showBackground=region_in.showBackground, + overflow=region_in.overflow, + _strict_keywords=False + ) + return new_elem + + def convert_body(self, body_in, dataset): + if len(body_in.div) == 0: + return None + new_elem = body_type( + *self.convert_children(body_in, dataset), + agent=body_in.agent, + role=body_in.role, + style=body_in.style, + begin=body_in.begin, + end=body_in.end + ) + return new_elem + + def convert_div(self, div_in, dataset): + if len(div_in.orderedContent()) == 0: + return None + new_elem = div_type( + *self.convert_children(div_in, dataset), + id=div_in.id, + region=div_in.region, + style=div_in.style, + lang=div_in.lang, + agent=div_in.agent, + begin=div_in.begin, + end=div_in.end + ) + return new_elem + + def convert_p(self, p_in, dataset): + new_elem = p_type( + *self.convert_children(p_in, dataset), + id=p_in.id, + space=p_in.space, + lang=p_in.lang, + region=p_in.region, + style=p_in.style, + begin=p_in.begin, + end=p_in.end, + agent=p_in.agent, + role=p_in.role + ) + return new_elem + + def convert_span(self, span_in, dataset): + new_elem = span_type( + *self.convert_children(span_in, dataset), + id=span_in.id, + space=span_in.space, + lang=span_in.lang, + style=span_in.style, + begin=span_in.begin, + end=span_in.end, + agent=span_in.agent, + role=span_in.role + ) + return new_elem + + def convert_br(self, br_in, dataset): + return br_type() + + def convert_unknown(self, element_in, dataset): + new_elem = copy.deepcopy(element_in) + return new_elem + + def convert_children(self, element, dataset): + """ + Recursive step + :param element: + :param dataset: + :return: + """ + output = [] + + children = element.orderedContent() + + for item in children: + if isinstance(item, NonElementContent): + output.append(copy.deepcopy(item.value)) + elif isinstance(item, ElementContent): + conv_elem = self.convert_element(item.value, dataset) + if conv_elem is not None: + output.append(conv_elem) + else: + raise Exception('Can this even happen!??!?!?!') + + return output def convert_element(self, element, dataset): converter = self.map_type(element) @@ -23,6 +279,9 @@ def convert_document(self, root_element, dataset=None): self._semantic_dataset = {} else: self._semantic_dataset = dataset + + # Make sure that any new elements we correct get the right bindings + EBUTT3Document.load_types_for_document() converted_bindings = self.convert_element( root_element, self._semantic_dataset diff --git a/ebu_tt_live/bindings/validation/presentation.py b/ebu_tt_live/bindings/validation/presentation.py index ac4ddc15d..4dee88c76 100644 --- a/ebu_tt_live/bindings/validation/presentation.py +++ b/ebu_tt_live/bindings/validation/presentation.py @@ -46,11 +46,11 @@ def _semantic_collect_applicable_styles(self, dataset, style_type, parent_bindin This function identifies the styling dependdncy chain for the styled element in question. :param dataset: Semantic dataset - :param style_type: the style_type to be used in the process (there are different style types for EBU-TT D and - live). + :param style_type: the style_type to be used in the process (there are different style types for EBU-TT D and live). :param parent_binding: The immediate parent of the styled element in the document structure :param defer_font_size: If True then fontsize can stay percentage in case it could not be calculated :param extra_referenced_styles: Used by region to inject its extra style attributes + :return: """ diff --git a/ebu_tt_live/bindings/validation/timing.py b/ebu_tt_live/bindings/validation/timing.py index 36a219761..0074ce264 100644 --- a/ebu_tt_live/bindings/validation/timing.py +++ b/ebu_tt_live/bindings/validation/timing.py @@ -400,8 +400,6 @@ def _post_pop_end(self): return end_timedelta - - def _semantic_timebase_validation(self, dataset, element_content): super(BodyTimingValidationMixin, self)._semantic_timebase_validation(dataset, element_content) diff --git a/ebu_tt_live/documents/converters.py b/ebu_tt_live/documents/converters.py index 2d87c1e7b..c6413ff22 100644 --- a/ebu_tt_live/documents/converters.py +++ b/ebu_tt_live/documents/converters.py @@ -3,6 +3,7 @@ from ebu_tt_live.bindings.converters.ebutt1_ebutt3 import EBUTT1EBUTT3Converter from ebu_tt_live.documents.ebuttd import EBUTTDDocument +from ebu_tt_live.documents.ebutt1 import EBUTT1Document from ebu_tt_live.documents.ebutt3 import EBUTT3Document from subprocess import Popen, PIPE import tempfile @@ -34,14 +35,17 @@ def ebutt3_to_ebuttd(ebutt3_in, media_clock): return ebuttd_document -def ebutt1_to_ebutt3(ebutt1_in): +def ebutt1_to_ebutt3(ebutt1_in, sequence_id, use_doc_id_as_seq_id): """ This function takes an EBUTT1Document instance and returns the same document as an EBUTT3Document instance. :param ebutt1_in: :return: """ - converter = EBUTT1EBUTT3Converter() - ebutt3_bindings = converter.convert_document(ebutt1_in.binding) + converter = EBUTT1EBUTT3Converter(sequence_id=sequence_id, + use_doc_id_as_seq_id=use_doc_id_as_seq_id) + doc_xml = ebutt1_in.get_xml() + ebutt1_doc = EBUTT1Document.create_from_xml(doc_xml) + ebutt3_bindings = converter.convert_document(ebutt1_doc.binding) ebutt3_document = EBUTT3Document.create_from_raw_binding(ebutt3_bindings) ebutt3_document.validate() return ebutt3_document diff --git a/ebu_tt_live/documents/ebutt1.py b/ebu_tt_live/documents/ebutt1.py index 2e21d2885..036e519a0 100644 --- a/ebu_tt_live/documents/ebutt1.py +++ b/ebu_tt_live/documents/ebutt1.py @@ -1,8 +1,10 @@ from ebu_tt_live import bindings -from ebu_tt_live.bindings import _ebuttlm as ebuttlm from ebu_tt_live.documents import SubtitleDocument from ebu_tt_live.documents.base import EBUTTDocumentBase from ebu_tt_live.documents.time_utils import TimelineUtilMixin +from .base import TimeBase +from pyxb import BIND + class EBUTT1Document(TimelineUtilMixin, SubtitleDocument, EBUTTDocumentBase): """ @@ -16,9 +18,33 @@ class EBUTT1Document(TimelineUtilMixin, SubtitleDocument, EBUTTDocumentBase): def _cmp_key(self): raise NotImplementedError() - def __init__(self): + def __init__(self, time_base, lang, head, clock_mode=None, + frame_rate=None, frame_rate_multiplier=None, + drop_mode=None, marker_mode=None): self.load_types_for_document() - self._ebutt1_content = bindings.tt() + if not clock_mode and time_base is TimeBase.CLOCK: + clock_mode = 'local' + if time_base is TimeBase.SMPTE: + if not frame_rate: + frame_rate = '30' + if not frame_rate_multiplier: + frame_rate_multiplier = '1 1' + if not drop_mode: + drop_mode = 'nonDrop' + if not marker_mode: + marker_mode = 'discontinuous' + + self._ebutt1_content = bindings.tt( + timeBase=time_base, + clockMode=clock_mode, + frameRate=frame_rate, + frameRateMultiplier=frame_rate_multiplier, + dropMode=drop_mode, + markerMode=marker_mode, + lang=lang, + head=head, + body=BIND() + ) self.validate() def validate(self): @@ -40,7 +66,8 @@ def create_from_raw_binding(cls, binding): @classmethod def create_from_xml(cls, xml): cls.load_types_for_document() - instance = cls.create_from_raw_binding(binding=bindings.CreateFromDocument(xml_text=xml)) + instance = cls.create_from_raw_binding( + binding=bindings.CreateFromDocument(xml_text=xml)) return instance @classmethod @@ -76,4 +103,5 @@ def get_dom(self): return self._ebutt1_content.toDOM() def get_element_by_id(self, elem_id, elem_type=None): - return self.binding.get_element_by_id(elem_id=elem_id, elem_type=elem_type) + return self.binding.get_element_by_id( + elem_id=elem_id, elem_type=elem_type) diff --git a/ebu_tt_live/documents/test/converter_ericsson1_media.xml b/ebu_tt_live/documents/test/converter_ericsson1_media.xml new file mode 100644 index 000000000..fef50158e --- /dev/null +++ b/ebu_tt_live/documents/test/converter_ericsson1_media.xml @@ -0,0 +1,69 @@ + + + + + + v1.0 + 3.12.0 + BBC + 300 + 4:3 + WSTTeletextSubtitles + tgv + tgv + 2016-09-06 + + 37 + UK + Ericsson and Redbee + prernaB + + + + + + + v1.0 + timeOfDay + TestSequence1#localhost_EbuTT3_TestSeq#647#20160906#12-11-53#EbuTT3#source + tgv#2016-09-5T23:00:00Z#TestSequence1 + TestSequence1 + + + + + Subito VX + + + + + + + + + + + + + + + + + + + + + + + + + Subtitle_Source_Facet is of type VOICE + + + This is a position and text color + + test. + + + + diff --git a/ebu_tt_live/documents/test/converter_ericsson1_smpte.xml b/ebu_tt_live/documents/test/converter_ericsson1_smpte.xml new file mode 100644 index 000000000..951cbb1b2 --- /dev/null +++ b/ebu_tt_live/documents/test/converter_ericsson1_smpte.xml @@ -0,0 +1,69 @@ + + + + + + v1.0 + 3.12.0 + BBC + 300 + 4:3 + WSTTeletextSubtitles + tgv + tgv + 2016-09-06 + + 37 + UK + Ericsson and Redbee + prernaB + + + + + + + v1.0 + timeOfDay + TestSequence1#localhost_EbuTT3_TestSeq#647#20160906#12-11-53#EbuTT3#source + tgv#2016-09-5T23:00:00Z#TestSequence1 + TestSequence1 + + + + + Subito VX + + + + + + + + + + + + + + + + + + + + + + + + + Subtitle_Source_Facet is of type VOICE + + + This is a position and text color + + test. + + + + diff --git a/ebu_tt_live/documents/test/converter_ericsson1.xml b/ebu_tt_live/documents/test/converter_ericsson3.xml similarity index 100% rename from ebu_tt_live/documents/test/converter_ericsson1.xml rename to ebu_tt_live/documents/test/converter_ericsson3.xml diff --git a/ebu_tt_live/documents/test/test_converters.py b/ebu_tt_live/documents/test/test_converters.py index 2fc9a1d4a..3e2eadc5b 100644 --- a/ebu_tt_live/documents/test/test_converters.py +++ b/ebu_tt_live/documents/test/test_converters.py @@ -2,11 +2,14 @@ from unittest import TestCase from datetime import timedelta import os -from ebu_tt_live.documents.converters import ebutt3_to_ebuttd +from ebu_tt_live.documents.converters import ebutt3_to_ebuttd, ebutt1_to_ebutt3 from ebu_tt_live.documents.ebutt3 import EBUTT3Document -from ebu_tt_live.clocks.local import LocalMachineClock +from ebu_tt_live.documents.ebutt1 import EBUTT1Document from ebu_tt_live.clocks.media import MediaClock -from ebu_tt_live.bindings import div_type, p_type, span_type, br_type, ebuttdt +from ebu_tt_live.bindings import tt1_head_type, styling, \ + style_type, tt1_layout_type, region_type, div_type, p_type, \ + span_type, br_type, ebuttdt +from pyxb.exceptions_ import PyXBException class TestEBUTT3ToEBUTTDConverter(TestCase): @@ -45,11 +48,133 @@ def test_simple(self): ebutt3_to_ebuttd(document, self._media_clock) - def test_ericsson_1(self): + def test_ericsson_3(self): - xml_file = self._load_asset('converter_ericsson1.xml') + xml_file = self._load_asset('converter_ericsson3.xml') - self._media_clock.adjust_time(timedelta(), ebuttdt.LimitedClockTimingType('12:11:50.000').timedelta) + self._media_clock.adjust_time( + timedelta(), + ebuttdt.LimitedClockTimingType('12:11:50.000').timedelta) document = EBUTT3Document.create_from_xml(xml_file) cdoc = ebutt3_to_ebuttd(document, self._media_clock) + + +class TestEBUTT1ToEBUTT3Converter(TestCase): + + def setUp(self): + self._seqId = 'testConverter' + + def _load_asset(self, file_name): + dirpath = os.path.dirname(os.path.abspath(__file__)) + with open(os.path.join(dirpath, file_name), 'r') as ifile: + contents = ifile.read() + return contents + + def test_simple_smpte(self): + + self.skipTest('SMPTE time conversion not yet supported') + + div = div_type( + p_type( + span_type( + 'Here we are', + br_type(), + 'in 2 lines.' + ), + id='ID001', + begin=ebuttdt.SMPTETimingType('00:00:01:00'), + end=ebuttdt.SMPTETimingType('00:00:03:00') + ) + ) + + EBUTT1Document.load_types_for_document() + try: + document = EBUTT1Document( + time_base='smpte', + lang='en-GB', + head=tt1_head_type( + styling( + style_type(id='s0') + ), + tt1_layout_type( + region_type( + id='r0', + origin='0% 0%', + extent='100% 100%') + ) + ) + ) + except PyXBException as e: + print(e.details()) + raise e + document.add_div(div) + document.validate() + + ebutt1_to_ebutt3( + document, + sequence_id=self._seqId, + use_doc_id_as_seq_id=True) + + def test_simple_media(self): + + div = div_type( + p_type( + span_type( + 'Here we are', + br_type(), + 'in 2 lines.' + ), + id='ID001', + begin=ebuttdt.FullClockTimingType(timedelta(seconds=1)), + end=ebuttdt.FullClockTimingType(timedelta(seconds=3)) + ) + ) + + EBUTT1Document.load_types_for_document() + try: + document = EBUTT1Document( + time_base='media', + lang='en-GB', + head=tt1_head_type( + styling( + style_type(id='s0') + ), + tt1_layout_type( + region_type(id='r0', origin='0% 0%', extent='100% 100%') + ) + ) + ) + except PyXBException as e: + print(e.details()) + raise e + document.add_div(div) + document.validate() + + ebutt1_to_ebutt3( + document, + sequence_id=self._seqId, + use_doc_id_as_seq_id=True) + + def test_ericsson_smpte(self): + + self.skipTest('SMPTE time conversion not supported yet') + + xml_file = self._load_asset('converter_ericsson1_smpte.xml') + + document = EBUTT1Document.create_from_xml(xml_file) + cdoc = ebutt1_to_ebutt3( + document, + sequence_id=self._seqId, + use_doc_id_as_seq_id=True) + + def test_ericsson_media(self): + + xml_file = self._load_asset('converter_ericsson1_media.xml') + + document = EBUTT1Document.create_from_xml(xml_file) + cdoc = ebutt1_to_ebutt3( + document, + sequence_id=self._seqId, + use_doc_id_as_seq_id=True) + diff --git a/ebu_tt_live/node/ebutt1_ebutt3_producer.py b/ebu_tt_live/node/ebutt1_ebutt3_producer.py index d13321a9b..dfbfa7943 100644 --- a/ebu_tt_live/node/ebutt1_ebutt3_producer.py +++ b/ebu_tt_live/node/ebutt1_ebutt3_producer.py @@ -10,14 +10,20 @@ class EBUTT1EBUTT3ProducerNode(AbstractCombinedNode): _expects = EBUTT1Document _provides = EBUTT3Document - def __init__(self, node_id, consumer_carriage=None, producer_carriage=None, **kwargs): + def __init__(self, node_id, consumer_carriage=None, + producer_carriage=None, + sequence_identifier=None, + use_document_identifier_as_sequence_identifier=True, + **kwargs): super(EBUTT1EBUTT3ProducerNode, self).__init__( node_id=node_id, consumer_carriage=consumer_carriage, producer_carriage=producer_carriage, **kwargs ) - self._ebutt3_converter = EBUTT1EBUTT3Converter() + self._ebutt3_converter = EBUTT1EBUTT3Converter( + sequence_id=sequence_identifier, + use_doc_id_as_seq_id=use_document_identifier_as_sequence_identifier) def process_document(self, document, **kwargs): # Convert each receiver document into EBU-TT-3 diff --git a/ebu_tt_live/strings.py b/ebu_tt_live/strings.py index e53fbb6fd..ac95d34cc 100644 --- a/ebu_tt_live/strings.py +++ b/ebu_tt_live/strings.py @@ -16,7 +16,7 @@ ERR_DECODING_XML_FAILED = gettext('XML document parsing failed') ERR_SEMANTIC_VALIDATION_TIMING_TYPE = gettext('{attr_type}({attr_value}) is not a valid type for {attr_name} in timeBase={time_base}') ERR_SEMANTIC_VALIDATION_MISSING_ATTRIBUTES = gettext('{elem_name} is missing attributes: {attr_names}') -ERR_SEMANTIC_VALIDATION_INVALID_ATTRIBUTES = gettext('{elem_name} has invalid attributes: {attr_names}') +ERR_SEMANTIC_VALIDATION_UNEXPECTED_ATTRIBUTES = gettext('{elem_name} has unexpected attributes: {attr_names}') ERR_SEMANTIC_STYLE_MISSING = gettext('Style: {style} is not found.') ERR_SEMANTIC_STYLE_CIRCLE = gettext('Style: {style} is in a circular reference.') ERR_SEMANTIC_VALIDATION_EXPECTED = gettext('Please run semantic validation before calling this function') diff --git a/testing/bdd/features/ebutt1/ebutt1_conversion.feature b/testing/bdd/features/ebutt1/ebutt1_conversion.feature new file mode 100644 index 000000000..68ee55915 --- /dev/null +++ b/testing/bdd/features/ebutt1/ebutt1_conversion.feature @@ -0,0 +1,57 @@ +Feature: Converting EBU-TT Part 1 files + Examples: + | xml_file | + | ebutt1_template.xml | + +@skip +# skip until documentIdentifier in head metadata is supported + Scenario: Pass conversion check with documentIdentifier in head metadata + Given an xml file + When the document head metadata contains a documentIdentifier element + And the document contains a "styling" element + And the document contains a "style" element + And the document contains a "layout" element + And the document contains a "region" element + And the XML is parsed as a valid EBU-TT-1 document + And the EBU-TT-1 document is converted to EBU-TT-3 + Then the EBU-TT-3 document is valid + And the sequenceIdentifier is "headDocId" + + Scenario: Pass conversion check with documentIdentifier in document metadata and converter set to use documentIdentifier as a sequenceIdentifier + Given an xml file + When the documentMetadata contains a documentIdentifier element + And the document contains a "styling" element + And the document contains a "style" element + And the document contains a "layout" element + And the document contains a "region" element + And the XML is parsed as a valid EBU-TT-1 document + And the EBU-TT-1 converter is set to use the documentIdentifier as a sequenceIdentifier + And the EBU-TT-1 document is converted to EBU-TT-3 + Then the EBU-TT-3 document is valid + And the sequenceIdentifier is "docMetaDocId" + + + Scenario: Pass conversion check with documentIdentifier in document metadata and converter set not to use documentIdentifier as a sequenceIdentifier + Given an xml file + When the documentMetadata contains a documentIdentifier element + And the document contains a "styling" element + And the document contains a "style" element + And the document contains a "layout" element + And the document contains a "region" element + And the XML is parsed as a valid EBU-TT-1 document + And the EBU-TT-1 converter is set not to use the documentIdentifier as a sequenceIdentifier + And the EBU-TT-1 converter sequenceIdentifier is "BDDSEQID" + And the EBU-TT-1 document is converted to EBU-TT-3 + Then the EBU-TT-3 document is valid + And the sequenceIdentifier is "BDDSEQID" + + Scenario: Pass conversion check with no documentIdentifier + Given an xml file + When the document contains a "styling" element + And the document contains a "style" element + And the document contains a "layout" element + And the document contains a "region" element + And the XML is parsed as a valid EBU-TT-1 document + And the EBU-TT-1 document is converted to EBU-TT-3 + Then the EBU-TT-3 document is valid + And the sequenceIdentifier is "TestConverter" diff --git a/testing/bdd/templates/ebutt1_template.xml b/testing/bdd/templates/ebutt1_template.xml index c30520433..80c065597 100644 --- a/testing/bdd/templates/ebutt1_template.xml +++ b/testing/bdd/templates/ebutt1_template.xml @@ -26,7 +26,14 @@ xmlns:xml="http://www.w3.org/XML/1998/namespace"> - + {% if head_metadata_documentIdentifier %} + headDocId + {% endif %} + + {% if doc_metadata_documentIdentifier %} + docMetaDocId + {% endif %} + {% if styling %} diff --git a/testing/bdd/test_ebutt1_conversion.py b/testing/bdd/test_ebutt1_conversion.py new file mode 100644 index 000000000..b3f9b3cc9 --- /dev/null +++ b/testing/bdd/test_ebutt1_conversion.py @@ -0,0 +1,66 @@ +import pytest +from pytest_bdd import parsers, scenarios, then, when +from pyxb.exceptions_ import (IncompleteElementContentError, + UnrecognizedAttributeError) + +from ebu_tt_live.documents import EBUTT1Document, EBUTT3Document +from ebu_tt_live.bindings.converters.ebutt1_ebutt3 import EBUTT1EBUTT3Converter + +scenarios('features/ebutt1/ebutt1_conversion.feature') + +@when(parsers.parse('the document contains a "{element}" element')) +def when_document_contains_element(template_dict, element): + template_dict[element] = True + +@when(parsers.parse('the document head metadata contains a documentIdentifier element')) +def when_document_head_metadata_contains_documentIdentifier(template_dict): + template_dict['head_metadata_documentIdentifier'] = True + +@when(parsers.parse('the documentMetadata contains a documentIdentifier element')) +def when_documentMetadata_contains_documentIdentifier(template_dict): + template_dict['doc_metadata_documentIdentifier'] = True + +@when('the XML is parsed as a valid EBU-TT-1 document') +def when_document_parsed_ebutt1(test_context, template_file, template_dict): + xml_text = template_file.render(template_dict) + ebutt1_document = EBUTT1Document.create_from_xml(xml_text) + ebutt1_document.validate() + test_context['ebutt1_document'] = ebutt1_document + +@when('the EBU-TT-1 converter is set to use the documentIdentifier as a sequenceIdentifier') +def when_converter_uses_docId_as_seqId(test_context): + test_context['use_doc_id_as_seq_id'] = True + +@when('the EBU-TT-1 converter is set not to use the documentIdentifier as a sequenceIdentifier') +def when_converter_uses_docId_as_seqId(test_context): + test_context['use_doc_id_as_seq_id'] = False + +@when(parsers.parse('the EBU-TT-1 converter sequenceIdentifier is "{seq_id}"')) +def when_converter_seq_id(test_context, seq_id): + test_context['converter_seq_id'] = seq_id + +@when('the EBU-TT-1 document is converted to EBU-TT-3') +def when_ebutt1_converted_to_ebutt3(test_context, template_file, template_dict): + use_doc_id_as_seq_id = False + if 'use_doc_id_as_seq_id' in test_context: + use_doc_id_as_seq_id = test_context['use_doc_id_as_seq_id'] + seq_id = 'TestConverter' + if 'converter_seq_id' in test_context: + seq_id = test_context['converter_seq_id'] + ebutt1_converter = EBUTT1EBUTT3Converter(sequence_id = seq_id, + use_doc_id_as_seq_id = use_doc_id_as_seq_id) + doc_xml = test_context["ebutt1_document"].get_xml() + ebutt1_doc = EBUTT1Document.create_from_xml(doc_xml) + converted_bindings = ebutt1_converter.convert_document(ebutt1_doc.binding) + ebutt3_document = EBUTT3Document.create_from_raw_binding(converted_bindings) + test_context['ebutt3_document'] = ebutt3_document + +@then('the EBU-TT-3 document is valid') +def then_ebutt3_doc_valid(test_context): + test_context['ebutt3_document'].validate() + assert isinstance(test_context['ebutt3_document'], EBUTT3Document) + +@then(parsers.parse('the sequenceIdentifier is "{value}"')) +def then_sequence_identifier_is_value(test_context, value): + assert test_context['ebutt3_document'].sequence_identifier == value +