From a8530d6da51d2dd7d13802834a1863ea53c269a6 Mon Sep 17 00:00:00 2001 From: Thomas Muldowney Date: Sun, 12 Jul 2015 15:33:32 -0600 Subject: [PATCH] Restart! --- Makefile | 2 + medium.json | 88 +++++++++++++++ src/js0n-cache.cpp | 176 +++++++++++++++++++++++++++++ src/js0n.cpp | 269 +++++++++++++++++++++++++++++++++++++++++++++ src/js0n.h | 46 ++++++++ test.js | 58 ++++++++++ wscript | 28 +++++ 7 files changed, 667 insertions(+) create mode 100644 Makefile create mode 100644 medium.json create mode 100644 src/js0n-cache.cpp create mode 100644 src/js0n.cpp create mode 100644 src/js0n.h create mode 100644 test.js create mode 100644 wscript diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1c218ef --- /dev/null +++ b/Makefile @@ -0,0 +1,2 @@ +all: + node-waf build diff --git a/medium.json b/medium.json new file mode 100644 index 0000000..b059492 --- /dev/null +++ b/medium.json @@ -0,0 +1,88 @@ +{"web-app": { + "servlet": [ + { + "servlet-name": "cofaxCDS", + "servlet-class": "org.cofax.cds.CDSServlet", + "init-param": { + "configGlossary:installationAt": "Philadelphia, PA", + "configGlossary:adminEmail": "ksm@pobox.com", + "configGlossary:poweredBy": "Cofax", + "configGlossary:poweredByIcon": "/images/cofax.gif", + "configGlossary:staticPath": "/content/static", + "templateProcessorClass": "org.cofax.WysiwygTemplate", + "templateLoaderClass": "org.cofax.FilesTemplateLoader", + "templatePath": "templates", + "templateOverridePath": "", + "defaultListTemplate": "listTemplate.htm", + "defaultFileTemplate": "articleTemplate.htm", + "useJSP": false, + "jspListTemplate": "listTemplate.jsp", + "jspFileTemplate": "articleTemplate.jsp", + "cachePackageTagsTrack": 200, + "cachePackageTagsStore": 200, + "cachePackageTagsRefresh": 60, + "cacheTemplatesTrack": 100, + "cacheTemplatesStore": 50, + "cacheTemplatesRefresh": 15, + "cachePagesTrack": 200, + "cachePagesStore": 100, + "cachePagesRefresh": 10, + "cachePagesDirtyRead": 10, + "searchEngineListTemplate": "forSearchEnginesList.htm", + "searchEngineFileTemplate": "forSearchEngines.htm", + "searchEngineRobotsDb": "WEB-INF/robots.db", + "useDataStore": true, + "dataStoreClass": "org.cofax.SqlDataStore", + "redirectionClass": "org.cofax.SqlRedirection", + "dataStoreName": "cofax", + "dataStoreDriver": "com.microsoft.jdbc.sqlserver.SQLServerDriver", + "dataStoreUrl": "jdbc:microsoft:sqlserver://LOCALHOST:1433;DatabaseName=goon", + "dataStoreUser": "sa", + "dataStorePassword": "dataStoreTestQuery", + "dataStoreTestQuery": "SET NOCOUNT ON;select test='test';", + "dataStoreLogFile": "/usr/local/tomcat/logs/datastore.log", + "dataStoreInitConns": 10, + "dataStoreMaxConns": 100, + "dataStoreConnUsageLimit": 100, + "dataStoreLogLevel": "debug", + "maxUrlLength": 500}}, + { + "servlet-name": "cofaxEmail", + "servlet-class": "org.cofax.cds.EmailServlet", + "init-param": { + "mailHost": "mail1", + "mailHostOverride": "mail2"}}, + { + "servlet-name": "cofaxAdmin", + "servlet-class": "org.cofax.cds.AdminServlet"}, + + { + "servlet-name": "fileServlet", + "servlet-class": "org.cofax.cds.FileServlet"}, + { + "servlet-name": "cofaxTools", + "servlet-class": "org.cofax.cms.CofaxToolsServlet", + "init-param": { + "templatePath": "toolstemplates/", + "log": 1, + "logLocation": "/usr/local/tomcat/logs/CofaxTools.log", + "logMaxSize": "", + "dataLog": 1, + "dataLogLocation": "/usr/local/tomcat/logs/dataLog.log", + "dataLogMaxSize": "", + "removePageCache": "/content/admin/remove?cache=pages&id=", + "removeTemplateCache": "/content/admin/remove?cache=templates&id=", + "fileTransferFolder": "/usr/local/tomcat/webapps/content/fileTransferFolder", + "lookInContext": 1, + "adminGroupID": 4, + "betaServer": true}}], + "servlet-mapping": { + "cofaxCDS": "/", + "cofaxEmail": "/cofaxutil/aemail/*", + "cofaxAdmin": "/admin/*", + "fileServlet": "/static/*", + "cofaxTools": "/tools/*"}, + + "taglib": { + "taglib-uri": "cofax.tld", + "taglib-location": "/WEB-INF/tlds/cofax.tld"}}} diff --git a/src/js0n-cache.cpp b/src/js0n-cache.cpp new file mode 100644 index 0000000..7720903 --- /dev/null +++ b/src/js0n-cache.cpp @@ -0,0 +1,176 @@ +#include +#include + +#include "js0n.h" + +using namespace v8; +using namespace node; + +namespace js0n { namespace cache { + +static Persistent object_template_; + +class object : public ObjectWrap +{ +public: + object(cached_object obj) : cache_(obj) + { + } + + ~object() + { + //printf("Bye bye object!\n"); + } + + static Handle WrapResults(cached_object cache) + { + HandleScope handle_scope; + + + if (object_template_.IsEmpty()) { + Handle raw_template = PrepareTemplate(); + object_template_ = Persistent::New(raw_template); + } + + Handle new_object = object_template_->NewInstance(); + //new_object->SetInternalField(1, External::New(results)); + + //printf("Here: %x\n", buffer); + + object* obj = new object(cache); + obj->Wrap(new_object); + // TODO: Lookup the args id and wrap it + + //printf("Wrapped and returning\n"); + + return handle_scope.Close(new_object); + } + +private: + cached_object cache_; + +private: + static Handle ObjectGet(Local name, const AccessorInfo& info) + { + object* obj = ObjectWrap::Unwrap(info.Holder()); + + // Fetch the map wrapped by this object. + /* + Handle field = Handle::Cast(info.Holder()->GetInternalField(1)); + void* ptr = field->Value(); + js0n::result_vector* members = reinterpret_cast(ptr); + */ + //printf("Going to call find_key\n"); + js0n::js_value* value = js0n::find_key(*(String::Utf8Value(name)), &obj->cache_); + //printf("Found type(%u) length(%u) start(%u) depth(%u)\n", value->type, value->length, value->start, value->depth); + + switch (value->type) { + case JST_string: + return String::New(obj->cache_.buffer + value->start, value->length); + case JST_number: + char tmpBuf[value->length]; + strncpy(tmpBuf, obj->cache_.buffer + value->start, value->length); + // TODO: determine the if this is a float or an integer and return correctly + return Integer::New(atoi(tmpBuf)); + case JST_null: + return Null(); + case JST_boolean: + // This was validated by the parser already so just check the first letter + return Boolean::New((obj->cache_.buffer[value->start] == 't') ? true : false); + case JST_array: + // TODO: make it return a warpped array + return Undefined(); + case JST_object: + { + // TODO: return a new wrapped object + Local new_object = object_template_->NewInstance(); + //new_object->SetInternalField(1, External::New(results)); + + //printf("Here: %x\n", buffer); + cached_object subCache; + // -1 +sizeof(js_value) to shift into the object actually + subCache.count = obj->cache_.count - ((value - obj->cache_.values) / sizeof(js_value)) - 1; + subCache.values = &value[1]; + subCache.buffer_length = obj->cache_.buffer_length; + subCache.buffer = obj->cache_.buffer; + + object* obj = new object(subCache); + obj->Wrap(new_object); + return new_object; + } + default: + return Undefined(); + } + //js0n::js_value member = js0n::find_key(*members, *String::Utf8Value(name)); + //printf("Out of find_key\n"); + //printf("member type(%u) length(%u) start(%u) depth(%u)\n", member.type, member.length, member.start, member.depth); + + /* // Convert the JavaScript string to a std::string. */ + /* string key = ObjectToString(name); */ + + /* // Look up the value if it exists using the standard STL ideom. */ + /* map::iterator iter = obj->find(key); */ + + /* // If the key is not present return an empty handle as signal */ + /* if (iter == obj->end()) return Handle(); */ + + /* // Otherwise fetch the value and wrap it in a JavaScript string */ + /* const string& value = (*iter).second; */ + } + + static Handle ObjectSet(Local name, Local value_obj, const AccessorInfo& info) + { + // TODO: Is this the best thing to return? + return Undefined(); + } + + static Handle PrepareTemplate() + { + HandleScope handle_scope; + + Handle result = ObjectTemplate::New(); + result->SetInternalFieldCount(2); + result->SetNamedPropertyHandler(ObjectGet, ObjectSet); + + return handle_scope.Close(result); + } +}; + + + + +static Handle ParseObject(const Arguments& args) +{ + HandleScope handle_scope; + //printf("Args: %d\n", args.Length()); + if (args.Length() < 1 || !(args[0]->IsString())) { + return ThrowException(Exception::TypeError(String::New("The first argument to parse must be a string."))); + } + Local raw = Local::Cast(args[0]); + String::Utf8Value json(raw); + js0n::result_vector* results = new js0n::result_vector; + char* buffer = (char*)malloc(json.length()); + memcpy(buffer, *json, json.length()); + //printf("Buffer: (%d) %.*s\n", json.length(), json.length(), buffer); + if (!js0n::parse(buffer, json.length(), *results)) { + // TODO: Throw an exception + delete results; + return handle_scope.Close(Undefined()); + } + + cached_object cache; + // Here we shift into the object + cache.count = results->size() - 1; + cache.values = &((*results)[1]); + cache.buffer_length = json.length(); + cache.buffer = buffer; + return handle_scope.Close(object::WrapResults(cache)); +} + +}; }; // namespace js0n::cache + +extern "C" void init(Handle target) +{ + HandleScope scope; + NODE_SET_METHOD(target, "parse", js0n::cache::ParseObject); +} diff --git a/src/js0n.cpp b/src/js0n.cpp new file mode 100644 index 0000000..cee8483 --- /dev/null +++ b/src/js0n.cpp @@ -0,0 +1,269 @@ +#include "js0n.h" + +namespace js0n { + +void reset_value(js_value& value) +{ + value.type = JST_null; + value.depth = 0; + value.start = 0; + value.length = 0; +} + +static void fillmem(void* dest[255], void* address, int start, int end) +{ + for (int i = start; i <= end; ++i) { + dest[i] = address; + } +} + +static void* gostruct[255]; +static void* gobare[255]; +static void* gostring[255]; +static void* goutf8_continue[255]; +static void* goesc[255]; + +// Originally by jeremie miller - 2010 +// public domain; +// contributions/improvements welcome via github + + +bool parse(const char *js, unsigned int len, result_vector& results) +{ + const char *cur, *end; + int depth=0; + int utf8_remain=0; + js_value cur_value; + + static bool init_done = false; + + if (!init_done) { + // Setup gostruct + fillmem(gostruct, &&l_bad, 0, 255); + gostruct['\t'] = &&l_loop; + gostruct[' '] = &&l_loop; + gostruct['\r'] = &&l_loop; + gostruct['\n'] = &&l_loop; + gostruct['"'] = &&l_qup; + gostruct[':'] = &&l_loop; + gostruct[','] = &&l_loop; + gostruct['['] = &&l_arr_up; + gostruct[']'] = &&l_arr_down; // tracking [] and {} individually would allow fuller validation but is really messy + gostruct['{'] = &&l_obj_up; + gostruct['}'] = &&l_obj_down; + gostruct['-'] = &&l_bare; + //gostruct[48 ... 57] = &&l_bare, // 0-9 + fillmem(gostruct, &&l_bare, 48, 57); + gostruct['t'] = &&l_bare; + gostruct['f'] = &&l_bare; + gostruct['n'] = &&l_bare; // true, false, null + + // gobare + fillmem(gobare, &&l_bad, 0, 255); + //[32 ... 126] = &&l_loop, // could be more pedantic/validation-checking + fillmem(gobare, &&l_loop, 32, 126); + gobare['\t'] = &&l_unbare; + gobare[' '] = &&l_unbare; + gobare['\r'] = &&l_unbare; + gobare['\n'] = &&l_unbare; + gobare[','] = &&l_unbare; + gobare[']'] = &&l_unbare; + gobare['}'] = &&l_unbare; + + // gostring + fillmem(gostring, &&l_bad, 0, 255); + // gostring[32 ... 126] = &&l_loop; + fillmem(gostring, &&l_loop, 32, 126); + gostring['\\'] = &&l_esc; + gostring['"'] = &&l_qdown; + //gostring[192 ... 223] = &&l_utf8_2; + fillmem(gostring, &&l_utf8_2, 192, 223); + //[224 ... 239] = &&l_utf8_3, + fillmem(gostring, &&l_utf8_3, 224, 239); + //[240 ... 247] = &&l_utf8_4, + fillmem(gostring, &&l_utf8_4, 240, 247); + + + fillmem(goutf8_continue, &&l_bad, 0, 255); + //[128 ... 191] = &&l_utf_continue, + fillmem(goutf8_continue, &&l_utf_continue, 128, 191); + + // go escape it! + fillmem(goesc, &&l_bad, 0, 255); + goesc['"'] = &&l_unesc; + goesc['\\'] = &&l_unesc; + goesc['/'] = &&l_unesc; + goesc['b'] = &&l_unesc; + goesc['f'] = &&l_unesc; + goesc['n'] = &&l_unesc; + goesc['r'] = &&l_unesc; + goesc['t'] = &&l_unesc; + goesc['u'] = &&l_unesc; + + init_done = true; + } + + static void **go = gostruct; + + //full_obj = malloc(sizeof(struct js_value)); + //cur_value = full_obj; + for(cur=js,end=js+len; cur0 for incomplete data + + l_bad: + return 1; + + l_arr_up: + //printf("arr up at %d\n", ((cur) - js)); + cur_value.type = JST_array; + cur_value.start = cur - js; + cur_value.depth = depth; + cur_value.length = 0; + results.push_back(cur_value); + //PUSH(0); + ++depth; + goto l_loop; + + l_arr_down: + //printf("arr down at %d\n", (cur) - (js + *(out-1)) + 1); + --depth; + //CAP(0); + goto l_loop; + + l_obj_up: + cur_value.type = JST_object; + cur_value.start = cur - js; + cur_value.depth = depth; + cur_value.length = 0; + results.push_back(cur_value); + //printf("obj up at %d\n", ((cur) - js)); + //PUSH(0); + ++depth; + goto l_loop; + + l_obj_down: + //printf("obj down at %d\n", (cur) - (js + *(out-1)) + 1); + --depth; + //CAP(0); + goto l_loop; + + l_qup: + //printf("qup\n"); + cur_value.type = JST_string; + cur_value.start = (cur - js) + 1; + cur_value.depth = depth; + //PUSH(1); + go=gostring; + goto l_loop; + + l_qdown: + //printf("qdown\n"); + cur_value.length = (cur - js) - cur_value.start; + results.push_back(cur_value); + //CAP(-1); + go=gostruct; + goto l_loop; + + l_esc: + //printf("esc\n"); + go = goesc; + goto l_loop; + + l_unesc: + //printf("unesc\n"); + go = gostring; + goto l_loop; + + l_bare: + //printf("bare\n"); + switch(*cur) { + case 'n': + cur_value.type = JST_null; + break; + case 't': + case 'f': + cur_value.type = JST_boolean; + break; + default: + cur_value.type = JST_number; + }; + cur_value.start = (cur - js); + cur_value.depth = depth; + //PUSH(0); + go = gobare; + goto l_loop; + + l_unbare: + //printf("unbare\n"); + cur_value.length = ((cur - js) - cur_value.start); + results.push_back(cur_value); + //CAP(-1); + go = gostruct; + goto *go[*cur]; + + l_utf8_2: + go = goutf8_continue; + utf8_remain = 1; + goto l_loop; + + l_utf8_3: + go = goutf8_continue; + utf8_remain = 2; + goto l_loop; + + l_utf8_4: + go = goutf8_continue; + utf8_remain = 3; + goto l_loop; + + l_utf_continue: + if (!--utf8_remain) + go=gostring; + goto l_loop; + +} + +/// cache->values[0] must be at the first child of the correct depth to iterate +js_value* find_key(const char* key, cached_object* cache) +{ + //printf("Finding a key: %s\n", key); + int keylen = strlen(key); + + // TODO: Needs to watch depth! + // We assume that values[0] is already set to the correct depth here + int curDepth = cache->values[0].depth; + for (int i = 0; i < cache->count; i += 2) { + const js_value& cur(cache->values[i]); + //printf("i(%d) d(%d) Checking: %.*s\n", i, cur.depth, cur.length, cache->buffer + cur.start); + if (cur.type == JST_string && cur.length == keylen && memcmp(key, cache->buffer + cur.start, keylen) == 0) { + return &cache->values[i+1]; + } + const js_value* next = &cache->values[i + 1]; + // On an object or array iterate by 1 until we our out of that depth + if (next->type == JST_array || next->type == JST_object) { + //printf("Skipping an array or object starting at %d\n", i); + do { + next = &cache->values[++i + 1]; + //printf("Skipping curDepth(%d) i(%d) t(%d) d(%d): %.*s\n", curDepth, i, next->type, next->depth, next->length, cache->buffer + next->start); + } while (next->depth > curDepth && i < cache->count); + //printf("Back at depth at i(%d) d(%d): %.*s\n", i, next->depth, next->length, cache->buffer + next->start); + // We have to go back one to be at what should be current because we advance one at the next iteration + --i; + } + } + + return &invalid_js_value; +} + +}; // namespace js0n diff --git a/src/js0n.h b/src/js0n.h new file mode 100644 index 0000000..a0efc02 --- /dev/null +++ b/src/js0n.h @@ -0,0 +1,46 @@ +#ifndef INCL_JS0N_H +#define INCL_JS0N_H + +#include + +namespace js0n { + +typedef enum { + JST_null, + JST_boolean, + JST_number, + JST_string, + JST_array, + JST_object +} js_type; + +typedef struct { + js_type type; + unsigned int depth; + unsigned int start; + unsigned int length; +} js_value; + +static js_value invalid_js_value = {JST_null, 0, 0, 0}; + +typedef struct { + size_t count; + js_value* values; + size_t buffer_length; + const char* buffer; +} cached_object; + +typedef std::vector result_vector; + +typedef struct { + result_vector results; + const char* buffer; +} js0n_buffer; + +bool parse(const char *js, unsigned int len, result_vector& results); + +js_value* find_key(const char* key, cached_object* cache); + +}; // namespace js0n + +#endif // INCL_JS0N_H diff --git a/test.js b/test.js new file mode 100644 index 0000000..705edd4 --- /dev/null +++ b/test.js @@ -0,0 +1,58 @@ +var cache = require("./js0n-cache"); +var fs = require("fs"); + +var doSubKeyTest = false; + +var data = fs.readFileSync("medium.json").toString("utf8"); + + +var startTime = Date.now(); +for (var i = 0; i < 1000; ++i) { + var o = JSON.parse(data); + if (o === undefined) { + console.log("ZOMG"); + } + if (doSubKeyTest && o["web-app"]["servlet-mapping"]["fileServlet"] != "/static/*") { + console.log("ERROR"); + } +} +console.log("JSON took " + (Date.now() - startTime) + " to run 1000 times"); + + +var startTime = Date.now(); +for (var i = 0; i < 1000; ++i) { + var o = cache.parse(data); + if (o === undefined) { + console.log("ZOMG"); + } + if (doSubKeyTest && o["web-app"]["servlet-mapping"]["fileServlet"] != "/static/*") { + console.log("ERROR"); + } +} +console.log("js0n took " + (Date.now() - startTime) + " to run 1000 times"); + + + + +/* + +var jsonStr = '{"test":"strings", "numVal":1234, "someBool":true, "yuck":null, "subObj":{"subValue":1, "array":[1, 2, 3, 4, 5, 6, 5,5,5,5,5,5,5,5], "lastKey":"wrong"}, "lastKey":"last!"}'; +console.log("Json length: " + jsonStr.length); +var o = cache.parse(jsonStr); + +function checkKey(key) { + var val = o[key]; + console.log("-----------------------------------------------------------------------------------"); + console.log(" Key: " + key); + console.log("Value: " + val); + console.log(" Type: " + typeof(val)); + console.log("==================================================================================="); +} + +checkKey("someBool"); +checkKey("test"); +checkKey("numVal"); +checkKey("yuck"); +console.log("subValue should be 1: " + o.subObj.subValue); +checkKey("lastKey"); +*/ diff --git a/wscript b/wscript new file mode 100644 index 0000000..c4f2c77 --- /dev/null +++ b/wscript @@ -0,0 +1,28 @@ +import Options +from os import unlink, symlink, popen +from os.path import exists + +srcdir = "." +blddir = "build" +VERSION = "0.0.1" + +def set_options(opt): + opt.tool_options("compiler_cxx") + +def configure(conf): + conf.check_tool("compiler_cxx") + conf.check_tool("node_addon") + +def build(bld): + obj = bld.new_task_gen("cxx", "shlib", "node_addon") + obj.target = "js0n-cache" + obj.source = ["src/js0n-cache.cpp", "src/js0n.cpp"] + obj.cxxflags = ["-D_FILE_OFFSET_BITS=64", "-D_LARGEFILE_SOURCE"] + +def shutdown(): + if Options.commands['clean']: + if exists('js0n-cache.node'): unlink('js0n-cache.node') + else: + if exists('build/default/js0n-cache.node') and not exists('js0n-cache.node'): + symlink('build/default/js0n-cache.node', 'js0n-cache.node') +