Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for 8-bit ASCII and unescaped UTF8 characters #28

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 44 additions & 16 deletions JsonStreamingParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,15 @@ void JsonStreamingParser::reset() {
unicodeEscapeBufferPos = 0;
unicodeBufferPos = 0;
characterCounter = 0;
utf8Length = 0;
utf8Pos = 0;
}

void JsonStreamingParser::setListener(JsonListener* listener) {
myListener = listener;
}

void JsonStreamingParser::parse(char c) {
void JsonStreamingParser::parse(unsigned char c) {
//System.out.print(c);
// valid whitespace characters in JSON (from RFC4627 for JSON) include:
// space, horizontal tab, line feed or new line, and carriage return.
Expand All @@ -58,13 +60,39 @@ void JsonStreamingParser::parse(char c) {
endString();
} else if (c == '\\') {
state = STATE_START_ESCAPE;
} else if ((c < 0x1f) || (c == 0x7f)) {
} else if (c >= 0xc2 && c <= 0xdf) {
state = STATE_UNESCAPED_UTF8;
utf8Pos = 1;
utf8Length = 2;
buffer[bufferPos] = c;
increaseBufferPointer();
} else if (c >= 0xe0 && c <= 0xef) {
state = STATE_UNESCAPED_UTF8;
utf8Pos = 1;
utf8Length = 3;
buffer[bufferPos] = c;
increaseBufferPointer();
} else if (c >= 0xf0 && c <= 0xf4) {
state = STATE_UNESCAPED_UTF8;
utf8Pos = 1;
utf8Length = 4;
buffer[bufferPos] = c;
increaseBufferPointer();
} else if ((c < 0x1f) || (c == 0x7f) || (c >= 0x80 && c <= 0xa0) || (c == 0xad)) {
//throw new RuntimeException("Unescaped control character encountered: " + c + " at position" + characterCounter);
} else {
buffer[bufferPos] = c;
increaseBufferPointer();
}
break;
case STATE_UNESCAPED_UTF8:
utf8Pos++;
buffer[bufferPos] = c;
increaseBufferPointer();
if (utf8Pos == utf8Length) {
state = STATE_IN_STRING;
}
break;
case STATE_IN_ARRAY:
if (c == ']') {
endArray();
Expand Down Expand Up @@ -146,7 +174,7 @@ void JsonStreamingParser::parse(char c) {
buffer[bufferPos] = c;
increaseBufferPointer();
} else if (c == '+' || c == '-') {
char last = buffer[bufferPos - 1];
unsigned char last = buffer[bufferPos - 1];
if (!(last == 'e' || last == 'E')) {
//throw new RuntimeException("Can only have '+' or '-' after the 'e' or 'E' in a number." + characterCounter);
}
Expand Down Expand Up @@ -222,7 +250,7 @@ void JsonStreamingParser::endString() {
}
bufferPos = 0;
}
void JsonStreamingParser::startValue(char c) {
void JsonStreamingParser::startValue(unsigned char c) {
if (c == '[') {
startArray();
} else if (c == '{') {
Expand All @@ -249,7 +277,7 @@ void JsonStreamingParser::startValue(char c) {
}
}

boolean JsonStreamingParser::isDigit(char c) {
boolean JsonStreamingParser::isDigit(unsigned char c) {
// Only concerned with the first character in a number.
return (c >= '0' && c <= '9') || c == '-';
}
Expand Down Expand Up @@ -288,7 +316,7 @@ void JsonStreamingParser::endObject() {
}
}

void JsonStreamingParser::processEscapeCharacters(char c) {
void JsonStreamingParser::processEscapeCharacters(unsigned char c) {
if (c == '"') {
buffer[bufferPos] = '"';
increaseBufferPointer();
Expand Down Expand Up @@ -324,7 +352,7 @@ void JsonStreamingParser::processEscapeCharacters(char c) {
}
}

void JsonStreamingParser::processUnicodeCharacter(char c) {
void JsonStreamingParser::processUnicodeCharacter(unsigned char c) {
if (!isHexCharacter(c)) {
// throw new ParsingError($this->_line_number, $this->_char_number,
// "Expected hex character for escaped Unicode character. Unicode parsed: "
Expand Down Expand Up @@ -360,14 +388,14 @@ void JsonStreamingParser::processUnicodeCharacter(char c) {
}*/
}
}
boolean JsonStreamingParser::isHexCharacter(char c) {
boolean JsonStreamingParser::isHexCharacter(unsigned char c) {
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
}

int JsonStreamingParser::getHexArrayAsDecimal(char hexArray[], int length) {
int JsonStreamingParser::getHexArrayAsDecimal(unsigned char hexArray[], int length) {
int result = 0;
for (int i = 0; i < length; i++) {
char current = hexArray[length - i - 1];
unsigned char current = hexArray[length - i - 1];
int value = 0;
if (current >= 'a' && current <= 'f') {
value = current - 'a' + 10;
Expand All @@ -381,7 +409,7 @@ int JsonStreamingParser::getHexArrayAsDecimal(char hexArray[], int length) {
return result;
}

boolean JsonStreamingParser::doesCharArrayContain(char myArray[], int length, char c) {
boolean JsonStreamingParser::doesCharArrayContain(unsigned char myArray[], int length, unsigned char c) {
for (int i = 0; i < length; i++) {
if (myArray[i] == c) {
return true;
Expand Down Expand Up @@ -417,10 +445,10 @@ void JsonStreamingParser::endNumber() {
state = STATE_AFTER_VALUE;
}

int JsonStreamingParser::convertDecimalBufferToInt(char myArray[], int length) {
int JsonStreamingParser::convertDecimalBufferToInt(unsigned char myArray[], int length) {
int result = 0;
for (int i = 0; i < length; i++) {
char current = myArray[length - i - 1];
unsigned char current = myArray[length - i - 1];
result += (current - '0') * 10;
}
return result;
Expand Down Expand Up @@ -490,7 +518,7 @@ void JsonStreamingParser::startString() {
state = STATE_IN_STRING;
}

void JsonStreamingParser::startNumber(char c) {
void JsonStreamingParser::startNumber(unsigned char c) {
state = STATE_IN_NUMBER;
buffer[bufferPos] = c;
increaseBufferPointer();
Expand All @@ -504,9 +532,9 @@ void JsonStreamingParser::endUnicodeCharacter(int codepoint) {
state = STATE_IN_STRING;
}

char JsonStreamingParser::convertCodepointToCharacter(int num) {
unsigned char JsonStreamingParser::convertCodepointToCharacter(int num) {
if (num <= 0x7F)
return (char) (num);
return (unsigned char) (num);
// if(num<=0x7FF) return (char)((num>>6)+192) + (char)((num&63)+128);
// if(num<=0xFFFF) return
// chr((num>>12)+224).chr(((num>>6)&63)+128).chr((num&63)+128);
Expand Down
26 changes: 15 additions & 11 deletions JsonStreamingParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ See more at http://blog.squix.ch and https://github.com/squix78/json-streaming-p
#define STATE_IN_NULL 11
#define STATE_AFTER_VALUE 12
#define STATE_UNICODE_SURROGATE 13
#define STATE_UNESCAPED_UTF8 14

#define STACK_OBJECT 0
#define STACK_ARRAY 1
Expand All @@ -67,6 +68,9 @@ class JsonStreamingParser {

char unicodeEscapeBuffer[10];
int unicodeEscapeBufferPos = 0;

char utf8Length = 0;
char utf8Pos = 0;

char unicodeBuffer[10];
int unicodeBufferPos = 0;
Expand All @@ -81,21 +85,21 @@ class JsonStreamingParser {

void endArray();

void startValue(char c);
void startValue(unsigned char c);

void startKey();

void processEscapeCharacters(char c);
void processEscapeCharacters(unsigned char c);

boolean isDigit(char c);
boolean isDigit(unsigned char c);

boolean isHexCharacter(char c);
boolean isHexCharacter(unsigned char c);

char convertCodepointToCharacter(int num);
unsigned char convertCodepointToCharacter(int num);

void endUnicodeCharacter(int codepoint);

void startNumber(char c);
void startNumber(unsigned char c);

void startString();

Expand All @@ -111,25 +115,25 @@ class JsonStreamingParser {

void endDocument();

int convertDecimalBufferToInt(char myArray[], int length);
int convertDecimalBufferToInt(unsigned char myArray[], int length);

void endNumber();

void endUnicodeSurrogateInterstitial();

boolean doesCharArrayContain(char myArray[], int length, char c);
boolean doesCharArrayContain(unsigned char myArray[], int length, unsigned char c);

int getHexArrayAsDecimal(char hexArray[], int length);
int getHexArrayAsDecimal(unsigned char hexArray[], int length);

void processUnicodeCharacter(char c);
void processUnicodeCharacter(unsigned char c);

void endObject();



public:
JsonStreamingParser();
void parse(char c);
void parse(unsigned char c);
void setListener(JsonListener* listener);
void reset();
};