Skip to content

Commit

Permalink
Fix issue #121 Space between attributes not ignored with xml:space="p…
Browse files Browse the repository at this point in the history
…reserve"

+ fix parsing issue : the parser context was pointing on next token, not the actual one, therefore context has been linked to token
+ update version number to 3.1.1.8
  • Loading branch information
morbac committed Oct 2, 2021
1 parent 660e92c commit ee7681f
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 55 deletions.
2 changes: 1 addition & 1 deletion QuickXmlLib/QuickXml/src/XmlFormater.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ namespace QuickXml {
lastAppliedTokenType = XmlTokenType::Whitespace;
this->out.write(token.chars, token.size);
}
else if (this->parser->getXmlContext().inOpeningTag) {
else if (token.context.inOpeningTag) {
lastAppliedTokenType = XmlTokenType::Whitespace;
this->out << " ";
}
Expand Down
116 changes: 74 additions & 42 deletions QuickXmlLib/QuickXml/src/XmlParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,18 @@ namespace QuickXml {
}

void XmlParser::reset() {
this->currcontext = { false, false, 0 };
this->hasAttrName = false;
this->currpos = 0;

this->prevtoken = { XmlTokenType::Undefined, NULL, 0, 0 };
this->currtoken = { XmlTokenType::Undefined, NULL, 0, 0 };
this->nexttoken = { XmlTokenType::Undefined, NULL, 0, 0 };
this->currcontext = { false, false, 0 };

this->prevtoken = { XmlTokenType::Undefined, NULL, 0, 0, this->currcontext };
this->currtoken = { XmlTokenType::Undefined, NULL, 0, 0, this->currcontext };
this->nexttoken = { XmlTokenType::Undefined, NULL, 0, 0, this->currcontext };
}

bool XmlParser::isSpacePreserve() {
if (this->currtoken.context.inOpeningTag || this->currtoken.context.inClosingTag) return false;
if (this->preserveSpace.empty()) return false;
return this->preserveSpace.top();
}
Expand Down Expand Up @@ -64,7 +66,7 @@ namespace QuickXml {
}
} while (res.type != XmlTokenType::EndOfFile);

return { XmlTokenType::Undefined, NULL, 0, this->currpos };
return { XmlTokenType::Undefined, NULL, 0, this->currpos, this->currcontext };
}
}

Expand Down Expand Up @@ -106,7 +108,8 @@ namespace QuickXml {
return { XmlTokenType::EndOfFile,
this->srcLength,
this->srcText + this->srcLength,
0 };
0,
this->currcontext };
}

while (this->currpos < this->srcLength) {
Expand All @@ -122,7 +125,8 @@ namespace QuickXml {
return { XmlTokenType::Instruction,
this->currpos,
startpos,
this->readUntil("?>", 0, true) };
this->readUntil("?>", 0, true),
this->currcontext };
}
else if (cursor[1] == '%') {
// not really xml, but for jsp compatibility
Expand All @@ -132,7 +136,8 @@ namespace QuickXml {
return { XmlTokenType::Instruction,
this->currpos,
startpos,
this->readUntil("%>", 0, true) };
this->readUntil("%>", 0, true),
this->currcontext };
}
else if (cursor[1] == '!' && cursor[2] == '-' && cursor[3] == '-') {
// <!--
Expand All @@ -142,7 +147,8 @@ namespace QuickXml {
return { XmlTokenType::Comment,
this->currpos,
startpos,
this->readUntil("-->", 0, true) };
this->readUntil("-->", 0, true),
this->currcontext };
}
else if (cursor[1] == '!' && cursor[2] == '[' && cursor[3] == 'C' && cursor[4] == 'D' &&
cursor[5] == 'A' && cursor[6] == 'T' && cursor[7] == 'A' && cursor[8] == '[') {
Expand All @@ -153,7 +159,8 @@ namespace QuickXml {
return { XmlTokenType::CDATA,
this->currpos,
startpos,
this->readUntil("]]>", 0, true) };
this->readUntil("]]>", 0, true),
this->currcontext };
}
else if (cursor[1] == '!') {
// <! for instance "<![INCLUDE or <!DOCTYPE
Expand Down Expand Up @@ -182,7 +189,8 @@ namespace QuickXml {
XmlToken token = { tokentype,
currpos_bak,
startpos,
ncharsread };
ncharsread,
this->currcontext };
/*if (cursor[0] == '[') {
this->readChars(1);
token.size++;
Expand All @@ -199,7 +207,8 @@ namespace QuickXml {
return { XmlTokenType::TagClosing,
this->currpos,
startpos,
this->readUntilFirstOf("> \r\n") };
this->readUntilFirstOf("> \r\n"),
this->currcontext };
}
else {
// parsing tag name like "<sample" or "<ns:sample"
Expand All @@ -214,7 +223,8 @@ namespace QuickXml {
return { XmlTokenType::TagOpening,
this->currpos,
startpos,
this->readUntilFirstOf(" />\t\r\n") };
this->readUntilFirstOf(" />\t\r\n"),
this->currcontext };
}
break;
}
Expand All @@ -226,7 +236,8 @@ namespace QuickXml {
return { XmlTokenType::DeclarationEnd,
this->currpos,
startpos,
this->readChars(2) };
this->readChars(2),
this->currcontext };
}
else if (currentchar == '>') {
if (this->currcontext.declarationObjects > 0) {
Expand All @@ -235,25 +246,29 @@ namespace QuickXml {
return { XmlTokenType::DeclarationEnd,
this->currpos,
startpos,
this->readChars(1) };
this->readChars(1),
this->currcontext };
}
else if (currentchar == ' ' || currentchar == '\t') {
return { XmlTokenType::Whitespace,
this->currpos,
startpos,
this->readUntilFirstNotOf(" \t") };
this->readUntilFirstNotOf(" \t"),
this->currcontext };
}
else if (currentchar == '\r' || currentchar == '\n') {
return { XmlTokenType::LineBreak,
this->currpos,
startpos,
this->readUntilFirstNotOf("\r\n") };
this->readUntilFirstNotOf("\r\n"),
this->currcontext };
}
else {
return { XmlTokenType::Undefined,
this->currpos,
startpos,
this->readChars(1) };
this->readChars(1),
this->currcontext };
}
}
else if (this->currcontext.inClosingTag) {
Expand All @@ -264,25 +279,29 @@ namespace QuickXml {
return { XmlTokenType::TagClosingEnd,
this->currpos,
startpos,
this->readChars(1) };
this->readChars(1),
this->currcontext };
}
else if (currentchar == ' ' || currentchar == '\t') {
return { XmlTokenType::Whitespace,
this->currpos,
startpos,
this->readUntilFirstNotOf(" \t") };
this->readUntilFirstNotOf(" \t"),
this->currcontext };
}
else if (currentchar == '\r' || currentchar == '\n') {
return { XmlTokenType::LineBreak,
this->currpos,
startpos,
this->readUntilFirstNotOf("\r\n") };
this->readUntilFirstNotOf("\r\n"),
this->currcontext };
}
else {
return { XmlTokenType::Undefined,
this->currpos,
startpos,
this->readChars(1) };
this->readChars(1),
this->currcontext };
}
}
else if (this->currcontext.inOpeningTag) {
Expand All @@ -293,19 +312,22 @@ namespace QuickXml {
return { XmlTokenType::TagOpeningEnd,
this->currpos,
startpos,
this->readChars(1) };
this->readChars(1),
this->currcontext };
}
else if (currentchar == ' ' || currentchar == '\t') {
return { XmlTokenType::Whitespace,
this->currpos,
startpos,
this->readUntilFirstNotOf(" \t") };
this->readUntilFirstNotOf(" \t"),
this->currcontext };
}
else if (currentchar == '\r' || currentchar == '\n') {
return { XmlTokenType::LineBreak,
this->currpos,
startpos,
this->readUntilFirstNotOf("\r\n") };
this->readUntilFirstNotOf("\r\n"),
this->currcontext };
}
else if (currentchar == '/') {
if (cursor[1] == '>') {
Expand All @@ -314,21 +336,24 @@ namespace QuickXml {
return { XmlTokenType::TagSelfClosingEnd,
this->currpos,
startpos,
this->readChars(2) };
this->readChars(2),
this->currcontext };
}
else {
return { XmlTokenType::Undefined,
this->currpos,
startpos,
this->readChars(1) };
this->readChars(1),
this->currcontext };
}
}
else if (currentchar == '=') {
this->expectAttrValue = true;
return { XmlTokenType::Equal,
this->currpos,
startpos,
this->readChars(1) };
this->readChars(1),
this->currcontext };
}
else if (this->hasAttrName) {
this->hasAttrName = false;
Expand All @@ -340,17 +365,19 @@ namespace QuickXml {
// normal case, let's skip the quoted/apostrophed attribute value
char valDelimiter[2] = { currentchar, '\0' };
tmp = { XmlTokenType::AttrValue,
this->currpos,
startpos,
this->readUntilFirstOf(valDelimiter, 1, true) }; // skip actual delimiter + parse content
this->currpos,
startpos,
this->readUntilFirstOf(valDelimiter, 1, true),
this->currcontext }; // skip actual delimiter + parse content
}
else {
// we have some unexpected chars between the = and the attribute value
// let's read next word of string
tmp = { XmlTokenType::AttrValue,
this->currpos,
startpos,
this->readNextWord(true) };
this->currpos,
startpos,
this->readNextWord(true),
this->currcontext };
}

if (!this->preserveSpace.empty() && !strncmp(this->attrnametoken.chars, "xml:space", this->attrnametoken.size)) {
Expand All @@ -369,19 +396,21 @@ namespace QuickXml {
// attribute with no value
this->hasAttrName = true;
XmlToken tmp = { XmlTokenType::AttrName,
this->currpos,
startpos,
this->readUntilFirstOf("= /\t\r\n") };
this->currpos,
startpos,
this->readUntilFirstOf("= /\t\r\n"),
this->currcontext };
this->attrnametoken = tmp;
return tmp;
}
}
else {
this->hasAttrName = true;
XmlToken tmp = { XmlTokenType::AttrName,
this->currpos,
startpos,
this->readUntilFirstOf("= /\t\r\n") };
this->currpos,
startpos,
this->readUntilFirstOf("= /\t\r\n"),
this->currcontext };
this->attrnametoken = tmp;
return tmp;
}
Expand All @@ -391,14 +420,16 @@ namespace QuickXml {
return { XmlTokenType::Text,
this->currpos,
startpos,
this->readUntilFirstOf("<") };
this->readUntilFirstOf("<"),
this->currcontext };
}
}

return { XmlTokenType::Undefined,
this->currpos,
startpos,
this->readChars(1) };
this->readChars(1),
this->currcontext };
}

size_t XmlParser::readChars(size_t nchars) {
Expand Down Expand Up @@ -569,6 +600,7 @@ namespace QuickXml {
case XmlTokenType::Instruction: return "_INSTRUCTION_";
case XmlTokenType::DeclarationBeg: return "_DECLARATION_";
case XmlTokenType::DeclarationEnd: return "_DECLARATION_END_";
case XmlTokenType::DeclarationSelfClosing: return "_DECLARATION_SELFCLOSING_";
case XmlTokenType::Comment: return "_COMMENT_";
case XmlTokenType::CDATA: return "_CDATA_";
case XmlTokenType::LineBreak: return "_LINEBREAK_";
Expand Down
15 changes: 8 additions & 7 deletions QuickXmlLib/QuickXml/src/XmlParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@
#include <list>

namespace QuickXml {
struct XmlContext {
bool inOpeningTag;
bool inClosingTag;
size_t declarationObjects;
};

enum XmlTokenType {
Undefined = 1 << 0,

Expand Down Expand Up @@ -36,6 +42,7 @@ namespace QuickXml {
size_t pos; // the token position in stream
const char* chars; // a pointer to token chars
size_t size; // the token chars length
XmlContext context; // the token parsing context
};

const XmlToken undefinedToken = {
Expand All @@ -45,12 +52,6 @@ namespace QuickXml {
0
};

struct XmlContext {
bool inOpeningTag;
bool inClosingTag;
size_t declarationObjects;
};

class XmlParser {
// constant elements (they no vary after having been set)
const char* srcText; // pointer to original source text
Expand Down Expand Up @@ -95,13 +96,13 @@ namespace QuickXml {
/*
* Getters
*/
XmlContext getXmlContext() { return this->currcontext; }
XmlToken getPrevToken() { return this->prevtoken; }
XmlToken getCurrToken() { return this->currtoken; }
XmlToken getNextToken() { return this->nexttoken; }

/*
* Indicates if the current node is in xml:space="preserve" context
* @param contextualized Should the current parsing context be considered
* @return True when xml:space is in preserve mode.
*/
bool isSpacePreserve();
Expand Down
2 changes: 1 addition & 1 deletion XMLTools.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

//---------------------------------------------------------------------------

#define XMLTOOLS_VERSION_NUMBER L"3.1.1.7"
#define XMLTOOLS_VERSION_NUMBER L"3.1.1.8"
#define XMLTOOLS_HOMEPAGE_URL L"https://github.com/morbac/xmltools"
#ifdef V64BIT
#define XMLTOOLS_VERSION_STATUS L"unicode 64bit"
Expand Down
Loading

0 comments on commit ee7681f

Please sign in to comment.