diff --git a/plugins/org.obeonetwork.m2doc.html/META-INF/MANIFEST.MF b/plugins/org.obeonetwork.m2doc.html/META-INF/MANIFEST.MF index 089b42082..9f9d90721 100644 --- a/plugins/org.obeonetwork.m2doc.html/META-INF/MANIFEST.MF +++ b/plugins/org.obeonetwork.m2doc.html/META-INF/MANIFEST.MF @@ -11,7 +11,7 @@ Require-Bundle: org.eclipse.acceleo.annotations;bundle-version="[5.0.0,8.0.0)", org.eclipse.emf.ecore, org.obeonetwork.m2doc;bundle-version="[3.0.0,4.0.0)", org.apache.poi;bundle-version="[5.2.3,5.2.4)", - org.jsoup;bundle-version="[1.8.3,1.8.4)", + org.jsoup;bundle-version="[1.14.3,2.0.0)", org.eclipse.acceleo.query Export-Package: org.obeonetwork.m2doc.html.services Automatic-Module-Name: org.obeonetwork.m2doc.html diff --git a/plugins/org.obeonetwork.m2doc.html/src/org/obeonetwork/m2doc/html/services/M2DocHTMLParser.java b/plugins/org.obeonetwork.m2doc.html/src/org/obeonetwork/m2doc/html/services/M2DocHTMLParser.java index 103edb1e8..f1269aa8b 100644 --- a/plugins/org.obeonetwork.m2doc.html/src/org/obeonetwork/m2doc/html/services/M2DocHTMLParser.java +++ b/plugins/org.obeonetwork.m2doc.html/src/org/obeonetwork/m2doc/html/services/M2DocHTMLParser.java @@ -13,6 +13,7 @@ import java.awt.Color; import java.math.BigInteger; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -426,7 +427,7 @@ public List parse(URI baseURI, String htmlString) { final Document document = Jsoup.parse(htmlString, baseURI.toString()); document.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml); - document.outputSettings().charset("UTF-8"); + document.outputSettings().charset(StandardCharsets.UTF_8); final MStyle defaultStyle = new MStyleImpl(null, -1, null, null, -1); if (document.body().hasAttr("bgcolor")) { @@ -756,7 +757,7 @@ private void endElement(MList parent, MElement element) { * tells if a new paragraph is needed */ private void insertText(MList parent, final Context context, TextNode node, boolean needNewParagraph) { - final String text = node.text(); + final String text = text(node); if (!text.trim().isEmpty()) { final String textToInsert; if (needNewParagraph) { @@ -778,11 +779,11 @@ private void insertText(MList parent, final Context context, TextNode node, bool } /** - * Trims the begining of the given {@link String}. + * Trims the beginning of the given {@link String}. * * @param text * the {@link String} - * @return the trimed {@link String} + * @return the trimmed {@link String} */ private String trimFirst(String text) { final String res; @@ -801,6 +802,66 @@ private String trimFirst(String text) { return res; } + /** + * Gets the text of the given {@link TextNode}. + * Taken from JSoup see https://github.com/jhy/jsoup/issues/1063 + * + * @param textNode + * the {@link TextNode} + * @return the text of the given {@link TextNode} + */ + private String text(TextNode textNode) { + final String string = textNode.getWholeText(); + StringBuilder sb = new StringBuilder(string.length()); + appendNormalisedWhitespace(sb, string, false); + return sb.toString(); + } + + /** + * Tests if a code point is "whitespace" as defined in the HTML spec. + * Taken from JSoup see https://github.com/jhy/jsoup/issues/1063 + * + * @param c + * code point to test + * @return true if code point is whitespace, false otherwise + */ + private boolean isWhitespace(int c) { + return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r'; + } + + /** + * After normalizing the whitespace within a string, appends it to a string builder. + * Taken from JSoup see https://github.com/jhy/jsoup/issues/1063 + * + * @param accum + * builder to append to + * @param string + * string to normalize whitespace within + * @param stripLeading + * set to true if you wish to remove any leading whitespace + */ + public void appendNormalisedWhitespace(StringBuilder accum, String string, boolean stripLeading) { + boolean lastWasWhite = false; + boolean reachedNonWhite = false; + + int len = string.length(); + int c; + for (int i = 0; i < len; i += Character.charCount(c)) { + c = string.codePointAt(i); + if (isWhitespace(c)) { + if ((stripLeading && !reachedNonWhite) || lastWasWhite) { + continue; + } + accum.append(' '); + lastWasWhite = true; + } else { + accum.appendCodePoint(c); + lastWasWhite = false; + reachedNonWhite = true; + } + } + } + /** * Starts the given {@link Element}. * @@ -822,7 +883,7 @@ private MList startElement(MList parent, Context context, Element element) { } else if (BLOCKQUOTE_TAG.equals(nodeName)) { if (element.childNodeSize() > 0 && element.childNode(0) instanceof TextNode) { TextNode textNode = (TextNode) element.childNode(0); - String newText = trimFirst(textNode.text()); + String newText = trimFirst(text(textNode)); textNode.text(newText); if (!newText.isEmpty()) { res = createMParagraph(context, parent, element, null, null); diff --git a/releng/org.obeonetwork.m2doc.targetplatforms/2022-12/m2doc-2022-12.target b/releng/org.obeonetwork.m2doc.targetplatforms/2022-12/m2doc-2022-12.target index f2349b907..3845d5637 100644 --- a/releng/org.obeonetwork.m2doc.targetplatforms/2022-12/m2doc-2022-12.target +++ b/releng/org.obeonetwork.m2doc.targetplatforms/2022-12/m2doc-2022-12.target @@ -1,7 +1,7 @@ - + @@ -71,7 +71,6 @@ - diff --git a/releng/org.obeonetwork.m2doc.targetplatforms/2022-12/m2doc-2022-12.tpd b/releng/org.obeonetwork.m2doc.targetplatforms/2022-12/m2doc-2022-12.tpd index cf9fadb9a..c1087133e 100644 --- a/releng/org.obeonetwork.m2doc.targetplatforms/2022-12/m2doc-2022-12.tpd +++ b/releng/org.obeonetwork.m2doc.targetplatforms/2022-12/m2doc-2022-12.tpd @@ -80,7 +80,6 @@ location Orbit-202212 "https://download.eclipse.org/tools/orbit/downloads/drops/ location "https://download.eclipse.org/tools/orbit/downloads/drops/R20190827152740/repository/" { org.kohsuke.args4j [2.0.21,2.1.0) - org.jsoup [1.8.3,1.8.4) javax.servlet [3.1.0,3.2.0) } diff --git a/releng/org.obeonetwork.m2doc.targetplatforms/capella-6.1.0/m2doc-capella-6.1.0.target b/releng/org.obeonetwork.m2doc.targetplatforms/capella-6.1.0/m2doc-capella-6.1.0.target index 8960843a3..cb4686681 100644 --- a/releng/org.obeonetwork.m2doc.targetplatforms/capella-6.1.0/m2doc-capella-6.1.0.target +++ b/releng/org.obeonetwork.m2doc.targetplatforms/capella-6.1.0/m2doc-capella-6.1.0.target @@ -1,7 +1,7 @@ - + @@ -171,7 +171,6 @@ - diff --git a/releng/org.obeonetwork.m2doc.targetplatforms/capella-6.1.0/m2doc-capella-6.1.0.tpd b/releng/org.obeonetwork.m2doc.targetplatforms/capella-6.1.0/m2doc-capella-6.1.0.tpd index cc54d6218..334697d87 100644 --- a/releng/org.obeonetwork.m2doc.targetplatforms/capella-6.1.0/m2doc-capella-6.1.0.tpd +++ b/releng/org.obeonetwork.m2doc.targetplatforms/capella-6.1.0/m2doc-capella-6.1.0.tpd @@ -181,7 +181,6 @@ location site_Capella "https://download.eclipse.org/capella/core/updates/release location "https://download.eclipse.org/tools/orbit/downloads/drops/R20190827152740/repository/" { org.kohsuke.args4j [2.0.21,2.1.0) - org.jsoup [1.8.3,1.8.4) javax.servlet [3.1.0,3.2.0) } diff --git a/releng/org.obeonetwork.m2doc.targetplatforms/sirius-7.1.0/m2doc-sirius-7.1.0.target b/releng/org.obeonetwork.m2doc.targetplatforms/sirius-7.1.0/m2doc-sirius-7.1.0.target index c83ab87d7..1c6bd5cce 100644 --- a/releng/org.obeonetwork.m2doc.targetplatforms/sirius-7.1.0/m2doc-sirius-7.1.0.target +++ b/releng/org.obeonetwork.m2doc.targetplatforms/sirius-7.1.0/m2doc-sirius-7.1.0.target @@ -1,7 +1,7 @@ - + @@ -74,7 +74,6 @@ - diff --git a/releng/org.obeonetwork.m2doc.targetplatforms/sirius-7.1.0/m2doc-sirius-7.1.0.tpd b/releng/org.obeonetwork.m2doc.targetplatforms/sirius-7.1.0/m2doc-sirius-7.1.0.tpd index a4b053195..9701364c9 100644 --- a/releng/org.obeonetwork.m2doc.targetplatforms/sirius-7.1.0/m2doc-sirius-7.1.0.tpd +++ b/releng/org.obeonetwork.m2doc.targetplatforms/sirius-7.1.0/m2doc-sirius-7.1.0.tpd @@ -84,7 +84,6 @@ location Orbit-202212 "https://download.eclipse.org/tools/orbit/downloads/drops/ location "https://download.eclipse.org/tools/orbit/downloads/drops/R20190827152740/repository/" { org.kohsuke.args4j [2.0.21,2.1.0) - org.jsoup [1.8.3,1.8.4) javax.servlet [3.1.0,3.2.0) }