diff --git a/plugins/org.obeonetwork.m2doc.html/META-INF/MANIFEST.MF b/plugins/org.obeonetwork.m2doc.html/META-INF/MANIFEST.MF
index 089b42082..9f9d90721 100644
--- a/plugins/org.obeonetwork.m2doc.html/META-INF/MANIFEST.MF
+++ b/plugins/org.obeonetwork.m2doc.html/META-INF/MANIFEST.MF
@@ -11,7 +11,7 @@ Require-Bundle: org.eclipse.acceleo.annotations;bundle-version="[5.0.0,8.0.0)",
org.eclipse.emf.ecore,
org.obeonetwork.m2doc;bundle-version="[3.0.0,4.0.0)",
org.apache.poi;bundle-version="[5.2.3,5.2.4)",
- org.jsoup;bundle-version="[1.8.3,1.8.4)",
+ org.jsoup;bundle-version="[1.14.3,2.0.0)",
org.eclipse.acceleo.query
Export-Package: org.obeonetwork.m2doc.html.services
Automatic-Module-Name: org.obeonetwork.m2doc.html
diff --git a/plugins/org.obeonetwork.m2doc.html/src/org/obeonetwork/m2doc/html/services/M2DocHTMLParser.java b/plugins/org.obeonetwork.m2doc.html/src/org/obeonetwork/m2doc/html/services/M2DocHTMLParser.java
index 103edb1e8..f1269aa8b 100644
--- a/plugins/org.obeonetwork.m2doc.html/src/org/obeonetwork/m2doc/html/services/M2DocHTMLParser.java
+++ b/plugins/org.obeonetwork.m2doc.html/src/org/obeonetwork/m2doc/html/services/M2DocHTMLParser.java
@@ -13,6 +13,7 @@
import java.awt.Color;
import java.math.BigInteger;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -426,7 +427,7 @@ public List parse(URI baseURI, String htmlString) {
final Document document = Jsoup.parse(htmlString, baseURI.toString());
document.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml);
- document.outputSettings().charset("UTF-8");
+ document.outputSettings().charset(StandardCharsets.UTF_8);
final MStyle defaultStyle = new MStyleImpl(null, -1, null, null, -1);
if (document.body().hasAttr("bgcolor")) {
@@ -756,7 +757,7 @@ private void endElement(MList parent, MElement element) {
* tells if a new paragraph is needed
*/
private void insertText(MList parent, final Context context, TextNode node, boolean needNewParagraph) {
- final String text = node.text();
+ final String text = text(node);
if (!text.trim().isEmpty()) {
final String textToInsert;
if (needNewParagraph) {
@@ -778,11 +779,11 @@ private void insertText(MList parent, final Context context, TextNode node, bool
}
/**
- * Trims the begining of the given {@link String}.
+ * Trims the beginning of the given {@link String}.
*
* @param text
* the {@link String}
- * @return the trimed {@link String}
+ * @return the trimmed {@link String}
*/
private String trimFirst(String text) {
final String res;
@@ -801,6 +802,66 @@ private String trimFirst(String text) {
return res;
}
+ /**
+ * Gets the text of the given {@link TextNode}.
+ * Taken from JSoup see https://github.com/jhy/jsoup/issues/1063
+ *
+ * @param textNode
+ * the {@link TextNode}
+ * @return the text of the given {@link TextNode}
+ */
+ private String text(TextNode textNode) {
+ final String string = textNode.getWholeText();
+ StringBuilder sb = new StringBuilder(string.length());
+ appendNormalisedWhitespace(sb, string, false);
+ return sb.toString();
+ }
+
+ /**
+ * Tests if a code point is "whitespace" as defined in the HTML spec.
+ * Taken from JSoup see https://github.com/jhy/jsoup/issues/1063
+ *
+ * @param c
+ * code point to test
+ * @return true if code point is whitespace, false otherwise
+ */
+ private boolean isWhitespace(int c) {
+ return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r';
+ }
+
+ /**
+ * After normalizing the whitespace within a string, appends it to a string builder.
+ * Taken from JSoup see https://github.com/jhy/jsoup/issues/1063
+ *
+ * @param accum
+ * builder to append to
+ * @param string
+ * string to normalize whitespace within
+ * @param stripLeading
+ * set to true if you wish to remove any leading whitespace
+ */
+ public void appendNormalisedWhitespace(StringBuilder accum, String string, boolean stripLeading) {
+ boolean lastWasWhite = false;
+ boolean reachedNonWhite = false;
+
+ int len = string.length();
+ int c;
+ for (int i = 0; i < len; i += Character.charCount(c)) {
+ c = string.codePointAt(i);
+ if (isWhitespace(c)) {
+ if ((stripLeading && !reachedNonWhite) || lastWasWhite) {
+ continue;
+ }
+ accum.append(' ');
+ lastWasWhite = true;
+ } else {
+ accum.appendCodePoint(c);
+ lastWasWhite = false;
+ reachedNonWhite = true;
+ }
+ }
+ }
+
/**
* Starts the given {@link Element}.
*
@@ -822,7 +883,7 @@ private MList startElement(MList parent, Context context, Element element) {
} else if (BLOCKQUOTE_TAG.equals(nodeName)) {
if (element.childNodeSize() > 0 && element.childNode(0) instanceof TextNode) {
TextNode textNode = (TextNode) element.childNode(0);
- String newText = trimFirst(textNode.text());
+ String newText = trimFirst(text(textNode));
textNode.text(newText);
if (!newText.isEmpty()) {
res = createMParagraph(context, parent, element, null, null);
diff --git a/releng/org.obeonetwork.m2doc.targetplatforms/2022-12/m2doc-2022-12.target b/releng/org.obeonetwork.m2doc.targetplatforms/2022-12/m2doc-2022-12.target
index f2349b907..3845d5637 100644
--- a/releng/org.obeonetwork.m2doc.targetplatforms/2022-12/m2doc-2022-12.target
+++ b/releng/org.obeonetwork.m2doc.targetplatforms/2022-12/m2doc-2022-12.target
@@ -1,7 +1,7 @@
-
+
@@ -71,7 +71,6 @@
-
diff --git a/releng/org.obeonetwork.m2doc.targetplatforms/2022-12/m2doc-2022-12.tpd b/releng/org.obeonetwork.m2doc.targetplatforms/2022-12/m2doc-2022-12.tpd
index cf9fadb9a..c1087133e 100644
--- a/releng/org.obeonetwork.m2doc.targetplatforms/2022-12/m2doc-2022-12.tpd
+++ b/releng/org.obeonetwork.m2doc.targetplatforms/2022-12/m2doc-2022-12.tpd
@@ -80,7 +80,6 @@ location Orbit-202212 "https://download.eclipse.org/tools/orbit/downloads/drops/
location "https://download.eclipse.org/tools/orbit/downloads/drops/R20190827152740/repository/" {
org.kohsuke.args4j [2.0.21,2.1.0)
- org.jsoup [1.8.3,1.8.4)
javax.servlet [3.1.0,3.2.0)
}
diff --git a/releng/org.obeonetwork.m2doc.targetplatforms/capella-6.1.0/m2doc-capella-6.1.0.target b/releng/org.obeonetwork.m2doc.targetplatforms/capella-6.1.0/m2doc-capella-6.1.0.target
index 8960843a3..cb4686681 100644
--- a/releng/org.obeonetwork.m2doc.targetplatforms/capella-6.1.0/m2doc-capella-6.1.0.target
+++ b/releng/org.obeonetwork.m2doc.targetplatforms/capella-6.1.0/m2doc-capella-6.1.0.target
@@ -1,7 +1,7 @@
-
+
@@ -171,7 +171,6 @@
-
diff --git a/releng/org.obeonetwork.m2doc.targetplatforms/capella-6.1.0/m2doc-capella-6.1.0.tpd b/releng/org.obeonetwork.m2doc.targetplatforms/capella-6.1.0/m2doc-capella-6.1.0.tpd
index cc54d6218..334697d87 100644
--- a/releng/org.obeonetwork.m2doc.targetplatforms/capella-6.1.0/m2doc-capella-6.1.0.tpd
+++ b/releng/org.obeonetwork.m2doc.targetplatforms/capella-6.1.0/m2doc-capella-6.1.0.tpd
@@ -181,7 +181,6 @@ location site_Capella "https://download.eclipse.org/capella/core/updates/release
location "https://download.eclipse.org/tools/orbit/downloads/drops/R20190827152740/repository/" {
org.kohsuke.args4j [2.0.21,2.1.0)
- org.jsoup [1.8.3,1.8.4)
javax.servlet [3.1.0,3.2.0)
}
diff --git a/releng/org.obeonetwork.m2doc.targetplatforms/sirius-7.1.0/m2doc-sirius-7.1.0.target b/releng/org.obeonetwork.m2doc.targetplatforms/sirius-7.1.0/m2doc-sirius-7.1.0.target
index c83ab87d7..1c6bd5cce 100644
--- a/releng/org.obeonetwork.m2doc.targetplatforms/sirius-7.1.0/m2doc-sirius-7.1.0.target
+++ b/releng/org.obeonetwork.m2doc.targetplatforms/sirius-7.1.0/m2doc-sirius-7.1.0.target
@@ -1,7 +1,7 @@
-
+
@@ -74,7 +74,6 @@
-
diff --git a/releng/org.obeonetwork.m2doc.targetplatforms/sirius-7.1.0/m2doc-sirius-7.1.0.tpd b/releng/org.obeonetwork.m2doc.targetplatforms/sirius-7.1.0/m2doc-sirius-7.1.0.tpd
index a4b053195..9701364c9 100644
--- a/releng/org.obeonetwork.m2doc.targetplatforms/sirius-7.1.0/m2doc-sirius-7.1.0.tpd
+++ b/releng/org.obeonetwork.m2doc.targetplatforms/sirius-7.1.0/m2doc-sirius-7.1.0.tpd
@@ -84,7 +84,6 @@ location Orbit-202212 "https://download.eclipse.org/tools/orbit/downloads/drops/
location "https://download.eclipse.org/tools/orbit/downloads/drops/R20190827152740/repository/" {
org.kohsuke.args4j [2.0.21,2.1.0)
- org.jsoup [1.8.3,1.8.4)
javax.servlet [3.1.0,3.2.0)
}