From ee7681fe2f12936fe6476d82b3671499cfa6e880 Mon Sep 17 00:00:00 2001 From: nc Date: Sat, 2 Oct 2021 13:03:22 +0200 Subject: [PATCH] Fix issue #121 Space between attributes not ignored with xml:space="preserve" + fix parsing issue : the parser context was pointing on next token, not the actual one, therefore context has been linked to token + update version number to 3.1.1.8 --- QuickXmlLib/QuickXml/src/XmlFormater.cpp | 2 +- QuickXmlLib/QuickXml/src/XmlParser.cpp | 116 +++++++++++++++-------- QuickXmlLib/QuickXml/src/XmlParser.h | 15 +-- XMLTools.h | 2 +- XMLTools.rc | 8 +- 5 files changed, 88 insertions(+), 55 deletions(-) diff --git a/QuickXmlLib/QuickXml/src/XmlFormater.cpp b/QuickXmlLib/QuickXml/src/XmlFormater.cpp index 88ca080..219645f 100644 --- a/QuickXmlLib/QuickXml/src/XmlFormater.cpp +++ b/QuickXmlLib/QuickXml/src/XmlFormater.cpp @@ -119,7 +119,7 @@ namespace QuickXml { lastAppliedTokenType = XmlTokenType::Whitespace; this->out.write(token.chars, token.size); } - else if (this->parser->getXmlContext().inOpeningTag) { + else if (token.context.inOpeningTag) { lastAppliedTokenType = XmlTokenType::Whitespace; this->out << " "; } diff --git a/QuickXmlLib/QuickXml/src/XmlParser.cpp b/QuickXmlLib/QuickXml/src/XmlParser.cpp index 62bdab6..7a874b7 100644 --- a/QuickXmlLib/QuickXml/src/XmlParser.cpp +++ b/QuickXmlLib/QuickXml/src/XmlParser.cpp @@ -26,16 +26,18 @@ namespace QuickXml { } void XmlParser::reset() { - this->currcontext = { false, false, 0 }; this->hasAttrName = false; this->currpos = 0; - this->prevtoken = { XmlTokenType::Undefined, NULL, 0, 0 }; - this->currtoken = { XmlTokenType::Undefined, NULL, 0, 0 }; - this->nexttoken = { XmlTokenType::Undefined, NULL, 0, 0 }; + this->currcontext = { false, false, 0 }; + + this->prevtoken = { XmlTokenType::Undefined, NULL, 0, 0, this->currcontext }; + this->currtoken = { XmlTokenType::Undefined, NULL, 0, 0, this->currcontext }; + this->nexttoken = { XmlTokenType::Undefined, NULL, 0, 0, this->currcontext }; } bool XmlParser::isSpacePreserve() { + if (this->currtoken.context.inOpeningTag || this->currtoken.context.inClosingTag) return false; if (this->preserveSpace.empty()) return false; return this->preserveSpace.top(); } @@ -64,7 +66,7 @@ namespace QuickXml { } } while (res.type != XmlTokenType::EndOfFile); - return { XmlTokenType::Undefined, NULL, 0, this->currpos }; + return { XmlTokenType::Undefined, NULL, 0, this->currpos, this->currcontext }; } } @@ -106,7 +108,8 @@ namespace QuickXml { return { XmlTokenType::EndOfFile, this->srcLength, this->srcText + this->srcLength, - 0 }; + 0, + this->currcontext }; } while (this->currpos < this->srcLength) { @@ -122,7 +125,8 @@ namespace QuickXml { return { XmlTokenType::Instruction, this->currpos, startpos, - this->readUntil("?>", 0, true) }; + this->readUntil("?>", 0, true), + this->currcontext }; } else if (cursor[1] == '%') { // not really xml, but for jsp compatibility @@ -132,7 +136,8 @@ namespace QuickXml { return { XmlTokenType::Instruction, this->currpos, startpos, - this->readUntil("%>", 0, true) }; + this->readUntil("%>", 0, true), + this->currcontext }; } else if (cursor[1] == '!' && cursor[2] == '-' && cursor[3] == '-') { // ", 0, true) }; + this->readUntil("-->", 0, true), + this->currcontext }; } else if (cursor[1] == '!' && cursor[2] == '[' && cursor[3] == 'C' && cursor[4] == 'D' && cursor[5] == 'A' && cursor[6] == 'T' && cursor[7] == 'A' && cursor[8] == '[') { @@ -153,7 +159,8 @@ namespace QuickXml { return { XmlTokenType::CDATA, this->currpos, startpos, - this->readUntil("]]>", 0, true) }; + this->readUntil("]]>", 0, true), + this->currcontext }; } else if (cursor[1] == '!') { // currcontext }; /*if (cursor[0] == '[') { this->readChars(1); token.size++; @@ -199,7 +207,8 @@ namespace QuickXml { return { XmlTokenType::TagClosing, this->currpos, startpos, - this->readUntilFirstOf("> \r\n") }; + this->readUntilFirstOf("> \r\n"), + this->currcontext }; } else { // parsing tag name like "currpos, startpos, - this->readUntilFirstOf(" />\t\r\n") }; + this->readUntilFirstOf(" />\t\r\n"), + this->currcontext }; } break; } @@ -226,7 +236,8 @@ namespace QuickXml { return { XmlTokenType::DeclarationEnd, this->currpos, startpos, - this->readChars(2) }; + this->readChars(2), + this->currcontext }; } else if (currentchar == '>') { if (this->currcontext.declarationObjects > 0) { @@ -235,25 +246,29 @@ namespace QuickXml { return { XmlTokenType::DeclarationEnd, this->currpos, startpos, - this->readChars(1) }; + this->readChars(1), + this->currcontext }; } else if (currentchar == ' ' || currentchar == '\t') { return { XmlTokenType::Whitespace, this->currpos, startpos, - this->readUntilFirstNotOf(" \t") }; + this->readUntilFirstNotOf(" \t"), + this->currcontext }; } else if (currentchar == '\r' || currentchar == '\n') { return { XmlTokenType::LineBreak, this->currpos, startpos, - this->readUntilFirstNotOf("\r\n") }; + this->readUntilFirstNotOf("\r\n"), + this->currcontext }; } else { return { XmlTokenType::Undefined, this->currpos, startpos, - this->readChars(1) }; + this->readChars(1), + this->currcontext }; } } else if (this->currcontext.inClosingTag) { @@ -264,25 +279,29 @@ namespace QuickXml { return { XmlTokenType::TagClosingEnd, this->currpos, startpos, - this->readChars(1) }; + this->readChars(1), + this->currcontext }; } else if (currentchar == ' ' || currentchar == '\t') { return { XmlTokenType::Whitespace, this->currpos, startpos, - this->readUntilFirstNotOf(" \t") }; + this->readUntilFirstNotOf(" \t"), + this->currcontext }; } else if (currentchar == '\r' || currentchar == '\n') { return { XmlTokenType::LineBreak, this->currpos, startpos, - this->readUntilFirstNotOf("\r\n") }; + this->readUntilFirstNotOf("\r\n"), + this->currcontext }; } else { return { XmlTokenType::Undefined, this->currpos, startpos, - this->readChars(1) }; + this->readChars(1), + this->currcontext }; } } else if (this->currcontext.inOpeningTag) { @@ -293,19 +312,22 @@ namespace QuickXml { return { XmlTokenType::TagOpeningEnd, this->currpos, startpos, - this->readChars(1) }; + this->readChars(1), + this->currcontext }; } else if (currentchar == ' ' || currentchar == '\t') { return { XmlTokenType::Whitespace, this->currpos, startpos, - this->readUntilFirstNotOf(" \t") }; + this->readUntilFirstNotOf(" \t"), + this->currcontext }; } else if (currentchar == '\r' || currentchar == '\n') { return { XmlTokenType::LineBreak, this->currpos, startpos, - this->readUntilFirstNotOf("\r\n") }; + this->readUntilFirstNotOf("\r\n"), + this->currcontext }; } else if (currentchar == '/') { if (cursor[1] == '>') { @@ -314,13 +336,15 @@ namespace QuickXml { return { XmlTokenType::TagSelfClosingEnd, this->currpos, startpos, - this->readChars(2) }; + this->readChars(2), + this->currcontext }; } else { return { XmlTokenType::Undefined, this->currpos, startpos, - this->readChars(1) }; + this->readChars(1), + this->currcontext }; } } else if (currentchar == '=') { @@ -328,7 +352,8 @@ namespace QuickXml { return { XmlTokenType::Equal, this->currpos, startpos, - this->readChars(1) }; + this->readChars(1), + this->currcontext }; } else if (this->hasAttrName) { this->hasAttrName = false; @@ -340,17 +365,19 @@ namespace QuickXml { // normal case, let's skip the quoted/apostrophed attribute value char valDelimiter[2] = { currentchar, '\0' }; tmp = { XmlTokenType::AttrValue, - this->currpos, - startpos, - this->readUntilFirstOf(valDelimiter, 1, true) }; // skip actual delimiter + parse content + this->currpos, + startpos, + this->readUntilFirstOf(valDelimiter, 1, true), + this->currcontext }; // skip actual delimiter + parse content } else { // we have some unexpected chars between the = and the attribute value // let's read next word of string tmp = { XmlTokenType::AttrValue, - this->currpos, - startpos, - this->readNextWord(true) }; + this->currpos, + startpos, + this->readNextWord(true), + this->currcontext }; } if (!this->preserveSpace.empty() && !strncmp(this->attrnametoken.chars, "xml:space", this->attrnametoken.size)) { @@ -369,9 +396,10 @@ namespace QuickXml { // attribute with no value this->hasAttrName = true; XmlToken tmp = { XmlTokenType::AttrName, - this->currpos, - startpos, - this->readUntilFirstOf("= /\t\r\n") }; + this->currpos, + startpos, + this->readUntilFirstOf("= /\t\r\n"), + this->currcontext }; this->attrnametoken = tmp; return tmp; } @@ -379,9 +407,10 @@ namespace QuickXml { else { this->hasAttrName = true; XmlToken tmp = { XmlTokenType::AttrName, - this->currpos, - startpos, - this->readUntilFirstOf("= /\t\r\n") }; + this->currpos, + startpos, + this->readUntilFirstOf("= /\t\r\n"), + this->currcontext }; this->attrnametoken = tmp; return tmp; } @@ -391,14 +420,16 @@ namespace QuickXml { return { XmlTokenType::Text, this->currpos, startpos, - this->readUntilFirstOf("<") }; + this->readUntilFirstOf("<"), + this->currcontext }; } } return { XmlTokenType::Undefined, this->currpos, startpos, - this->readChars(1) }; + this->readChars(1), + this->currcontext }; } size_t XmlParser::readChars(size_t nchars) { @@ -569,6 +600,7 @@ namespace QuickXml { case XmlTokenType::Instruction: return "_INSTRUCTION_"; case XmlTokenType::DeclarationBeg: return "_DECLARATION_"; case XmlTokenType::DeclarationEnd: return "_DECLARATION_END_"; + case XmlTokenType::DeclarationSelfClosing: return "_DECLARATION_SELFCLOSING_"; case XmlTokenType::Comment: return "_COMMENT_"; case XmlTokenType::CDATA: return "_CDATA_"; case XmlTokenType::LineBreak: return "_LINEBREAK_"; diff --git a/QuickXmlLib/QuickXml/src/XmlParser.h b/QuickXmlLib/QuickXml/src/XmlParser.h index 89260dc..cc66c74 100644 --- a/QuickXmlLib/QuickXml/src/XmlParser.h +++ b/QuickXmlLib/QuickXml/src/XmlParser.h @@ -5,6 +5,12 @@ #include namespace QuickXml { + struct XmlContext { + bool inOpeningTag; + bool inClosingTag; + size_t declarationObjects; + }; + enum XmlTokenType { Undefined = 1 << 0, @@ -36,6 +42,7 @@ namespace QuickXml { size_t pos; // the token position in stream const char* chars; // a pointer to token chars size_t size; // the token chars length + XmlContext context; // the token parsing context }; const XmlToken undefinedToken = { @@ -45,12 +52,6 @@ namespace QuickXml { 0 }; - struct XmlContext { - bool inOpeningTag; - bool inClosingTag; - size_t declarationObjects; - }; - class XmlParser { // constant elements (they no vary after having been set) const char* srcText; // pointer to original source text @@ -95,13 +96,13 @@ namespace QuickXml { /* * Getters */ - XmlContext getXmlContext() { return this->currcontext; } XmlToken getPrevToken() { return this->prevtoken; } XmlToken getCurrToken() { return this->currtoken; } XmlToken getNextToken() { return this->nexttoken; } /* * Indicates if the current node is in xml:space="preserve" context + * @param contextualized Should the current parsing context be considered * @return True when xml:space is in preserve mode. */ bool isSpacePreserve(); diff --git a/XMLTools.h b/XMLTools.h index 3ab0c3d..758bec8 100644 --- a/XMLTools.h +++ b/XMLTools.h @@ -21,7 +21,7 @@ //--------------------------------------------------------------------------- -#define XMLTOOLS_VERSION_NUMBER L"3.1.1.7" +#define XMLTOOLS_VERSION_NUMBER L"3.1.1.8" #define XMLTOOLS_HOMEPAGE_URL L"https://github.com/morbac/xmltools" #ifdef V64BIT #define XMLTOOLS_VERSION_STATUS L"unicode 64bit" diff --git a/XMLTools.rc b/XMLTools.rc index 44f6519..a6d69c2 100644 --- a/XMLTools.rc +++ b/XMLTools.rc @@ -25,8 +25,8 @@ LANGUAGE LANG_FRENCH, SUBLANG_FRENCH // VS_VERSION_INFO VERSIONINFO - FILEVERSION 3,1,1,7 - PRODUCTVERSION 3,1,1,7 + FILEVERSION 3,1,1,8 + PRODUCTVERSION 3,1,1,8 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -42,12 +42,12 @@ BEGIN BLOCK "040004b0" BEGIN VALUE "FileDescription", "XMLTools" - VALUE "FileVersion", "3.1.1.7" + VALUE "FileVersion", "3.1.1.8" VALUE "InternalName", "XMLTools" VALUE "LegalCopyright", "Copyright (C) 2005-2021" VALUE "OriginalFilename", "XMLTools.dll" VALUE "ProductName", "XMLTools" - VALUE "ProductVersion", "3.1.1.7" + VALUE "ProductVersion", "3.1.1.8" END END BLOCK "VarFileInfo"