Skip to content

Commit

Permalink
Widened JSoup range for HTML services.
Browse files Browse the repository at this point in the history
  • Loading branch information
ylussaud committed Apr 21, 2023
1 parent 5aceb77 commit 27c3dec
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 15 deletions.
2 changes: 1 addition & 1 deletion plugins/org.obeonetwork.m2doc.html/META-INF/MANIFEST.MF
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Require-Bundle: org.eclipse.acceleo.annotations;bundle-version="[5.0.0,8.0.0)",
org.eclipse.emf.ecore,
org.obeonetwork.m2doc;bundle-version="[3.0.0,4.0.0)",
org.apache.poi;bundle-version="[5.2.3,5.2.4)",
org.jsoup;bundle-version="[1.8.3,1.8.4)",
org.jsoup;bundle-version="[1.14.3,2.0.0)",
org.eclipse.acceleo.query
Export-Package: org.obeonetwork.m2doc.html.services
Automatic-Module-Name: org.obeonetwork.m2doc.html
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import java.awt.Color;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -426,7 +427,7 @@ public List<MElement> parse(URI baseURI, String htmlString) {

final Document document = Jsoup.parse(htmlString, baseURI.toString());
document.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml);
document.outputSettings().charset("UTF-8");
document.outputSettings().charset(StandardCharsets.UTF_8);

final MStyle defaultStyle = new MStyleImpl(null, -1, null, null, -1);
if (document.body().hasAttr("bgcolor")) {
Expand Down Expand Up @@ -756,7 +757,7 @@ private void endElement(MList parent, MElement element) {
* tells if a new paragraph is needed
*/
private void insertText(MList parent, final Context context, TextNode node, boolean needNewParagraph) {
final String text = node.text();
final String text = text(node);
if (!text.trim().isEmpty()) {
final String textToInsert;
if (needNewParagraph) {
Expand All @@ -778,11 +779,11 @@ private void insertText(MList parent, final Context context, TextNode node, bool
}

/**
* Trims the begining of the given {@link String}.
* Trims the beginning of the given {@link String}.
*
* @param text
* the {@link String}
* @return the trimed {@link String}
* @return the trimmed {@link String}
*/
private String trimFirst(String text) {
final String res;
Expand All @@ -801,6 +802,66 @@ private String trimFirst(String text) {
return res;
}

/**
* Gets the text of the given {@link TextNode}.
* Taken from JSoup see https://github.com/jhy/jsoup/issues/1063
*
* @param textNode
* the {@link TextNode}
* @return the text of the given {@link TextNode}
*/
private String text(TextNode textNode) {
final String string = textNode.getWholeText();
StringBuilder sb = new StringBuilder(string.length());
appendNormalisedWhitespace(sb, string, false);
return sb.toString();
}

/**
* Tests if a code point is "whitespace" as defined in the HTML spec.
* Taken from JSoup see https://github.com/jhy/jsoup/issues/1063
*
* @param c
* code point to test
* @return true if code point is whitespace, false otherwise
*/
private boolean isWhitespace(int c) {
return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r';
}

/**
* After normalizing the whitespace within a string, appends it to a string builder.
* Taken from JSoup see https://github.com/jhy/jsoup/issues/1063
*
* @param accum
* builder to append to
* @param string
* string to normalize whitespace within
* @param stripLeading
* set to true if you wish to remove any leading whitespace
*/
public void appendNormalisedWhitespace(StringBuilder accum, String string, boolean stripLeading) {
boolean lastWasWhite = false;
boolean reachedNonWhite = false;

int len = string.length();
int c;
for (int i = 0; i < len; i += Character.charCount(c)) {
c = string.codePointAt(i);
if (isWhitespace(c)) {
if ((stripLeading && !reachedNonWhite) || lastWasWhite) {
continue;
}
accum.append(' ');
lastWasWhite = true;
} else {
accum.appendCodePoint(c);
lastWasWhite = false;
reachedNonWhite = true;
}
}
}

/**
* Starts the given {@link Element}.
*
Expand All @@ -822,7 +883,7 @@ private MList startElement(MList parent, Context context, Element element) {
} else if (BLOCKQUOTE_TAG.equals(nodeName)) {
if (element.childNodeSize() > 0 && element.childNode(0) instanceof TextNode) {
TextNode textNode = (TextNode) element.childNode(0);
String newText = trimFirst(textNode.text());
String newText = trimFirst(text(textNode));
textNode.text(newText);
if (!newText.isEmpty()) {
res = createMParagraph(context, parent, element, null, null);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?pde?>
<!-- generated with https://github.com/eclipse-cbi/targetplatform-dsl -->
<target name="M2Doc 2022-12 Target Platform" sequenceNumber="1680171126">
<target name="M2Doc 2022-12 Target Platform" sequenceNumber="1682072983">
<locations>
<location includeMode="planner" includeAllPlatforms="false" includeSource="true" includeConfigurePhase="false" type="InstallableUnit">
<unit id="org.eclipse.license.feature.group" version="0.0.0"/>
Expand Down Expand Up @@ -71,7 +71,6 @@
</location>
<location includeMode="planner" includeAllPlatforms="false" includeSource="true" includeConfigurePhase="false" type="InstallableUnit">
<unit id="org.kohsuke.args4j" version="2.0.21.v201301150030"/>
<unit id="org.jsoup" version="1.8.3.v20181012-1713"/>
<unit id="javax.servlet" version="3.1.0.v201410161800"/>
<repository location="https://download.eclipse.org/tools/orbit/downloads/drops/R20190827152740/repository/"/>
</location>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ location Orbit-202212 "https://download.eclipse.org/tools/orbit/downloads/drops/

location "https://download.eclipse.org/tools/orbit/downloads/drops/R20190827152740/repository/" {
org.kohsuke.args4j [2.0.21,2.1.0)
org.jsoup [1.8.3,1.8.4)
javax.servlet [3.1.0,3.2.0)
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?pde?>
<!-- generated with https://github.com/eclipse-cbi/targetplatform-dsl -->
<target name="M2Doc Capella 6.1.0 Target Platform" sequenceNumber="1681983032">
<target name="M2Doc Capella 6.1.0 Target Platform" sequenceNumber="1682072995">
<locations>
<location includeMode="planner" includeAllPlatforms="false" includeSource="true" includeConfigurePhase="false" type="InstallableUnit">
<unit id="org.eclipse.gmf.runtime.notation.sdk.feature.group" version="0.0.0"/>
Expand Down Expand Up @@ -171,7 +171,6 @@
</location>
<location includeMode="planner" includeAllPlatforms="false" includeSource="true" includeConfigurePhase="false" type="InstallableUnit">
<unit id="org.kohsuke.args4j" version="2.0.21.v201301150030"/>
<unit id="org.jsoup" version="1.8.3.v20181012-1713"/>
<unit id="javax.servlet" version="3.1.0.v201410161800"/>
<repository location="https://download.eclipse.org/tools/orbit/downloads/drops/R20190827152740/repository/"/>
</location>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,6 @@ location site_Capella "https://download.eclipse.org/capella/core/updates/release

location "https://download.eclipse.org/tools/orbit/downloads/drops/R20190827152740/repository/" {
org.kohsuke.args4j [2.0.21,2.1.0)
org.jsoup [1.8.3,1.8.4)
javax.servlet [3.1.0,3.2.0)
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?pde?>
<!-- generated with https://github.com/eclipse-cbi/targetplatform-dsl -->
<target name="M2Doc Sirius 7.1.0 Target Platform" sequenceNumber="1680173290">
<target name="M2Doc Sirius 7.1.0 Target Platform" sequenceNumber="1682072991">
<locations>
<location includeMode="planner" includeAllPlatforms="false" includeSource="true" includeConfigurePhase="false" type="InstallableUnit">
<unit id="org.eclipse.license.feature.group" version="0.0.0"/>
Expand Down Expand Up @@ -74,7 +74,6 @@
</location>
<location includeMode="planner" includeAllPlatforms="false" includeSource="true" includeConfigurePhase="false" type="InstallableUnit">
<unit id="org.kohsuke.args4j" version="2.0.21.v201301150030"/>
<unit id="org.jsoup" version="1.8.3.v20181012-1713"/>
<unit id="javax.servlet" version="3.1.0.v201410161800"/>
<repository location="https://download.eclipse.org/tools/orbit/downloads/drops/R20190827152740/repository/"/>
</location>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ location Orbit-202212 "https://download.eclipse.org/tools/orbit/downloads/drops/

location "https://download.eclipse.org/tools/orbit/downloads/drops/R20190827152740/repository/" {
org.kohsuke.args4j [2.0.21,2.1.0)
org.jsoup [1.8.3,1.8.4)
javax.servlet [3.1.0,3.2.0)
}

Expand Down

0 comments on commit 27c3dec

Please sign in to comment.