From 592b3e7e99ba894edbb562f17e03800f83ad320a Mon Sep 17 00:00:00 2001 From: uzlopak Date: Thu, 27 Jul 2023 11:16:09 +0200 Subject: [PATCH] add optimizer v2 --- index.js | 53 ++++++++--------- lib/optimize.js | 89 +++++++++++++++++++++++++++++ lib/serializer.js | 128 ++++++++++++++++++++---------------------- test/optimize.test.js | 113 +++++++++++++++++++++++++++++++++++++ 4 files changed, 291 insertions(+), 92 deletions(-) create mode 100644 lib/optimize.js create mode 100644 test/optimize.test.js diff --git a/index.js b/index.js index a2d07dcf..dca0fe9d 100644 --- a/index.js +++ b/index.js @@ -11,6 +11,7 @@ const Serializer = require('./lib/serializer') const Validator = require('./lib/validator') const RefResolver = require('./lib/ref-resolver') const Location = require('./lib/location') +const optimize = require('./lib/optimize') let largeArraySize = 2e4 let largeArrayMechanism = 'default' @@ -27,6 +28,18 @@ const validLargeArrayMechanisms = [ 'json-stringify' ] +const serializerFns = ` +const { + asString, + asInteger, + asNumber, + asBoolean, + asDateTime, + asDate, + asTime, +} = serializer +` + const addComma = '!addComma && (addComma = true) || (json += \',\')' function isValidSchema (schema, name) { @@ -119,21 +132,8 @@ function build (schema, options) { const location = new Location(schema, context.rootSchemaId) const code = buildValue(context, location, 'input') - let contextFunctionCode - - // If we have only the invocation of the 'anonymous0' function, we would - // basically just wrap the 'anonymous0' function in the 'main' function and - // and the overhead of the intermediate variable 'json'. We can avoid the - // wrapping and the unnecessary memory allocation by aliasing 'anonymous0' to - // 'main' - if (code === 'json += anonymous0(input)') { - contextFunctionCode = ` - ${context.functions.join('\n')} - const main = anonymous0 - return main - ` - } else { - contextFunctionCode = ` + let contextFunctionCode = ` + ${serializerFns} function main (input) { let json = '' ${code} @@ -142,7 +142,8 @@ function build (schema, options) { ${context.functions.join('\n')} return main ` - } + + contextFunctionCode = optimize(contextFunctionCode) const serializer = new Serializer(options) const validator = new Validator(options.ajv) @@ -263,7 +264,7 @@ function buildExtraObjectPropertiesSerializer (context, location) { code += ` if (/${propertyKey.replace(/\\*\//g, '\\/')}/.test(key)) { ${addComma} - json += serializer.asString(key) + ':' + json += asString(key) + ':' ${buildValue(context, propertyLocation, 'value')} continue } @@ -278,13 +279,13 @@ function buildExtraObjectPropertiesSerializer (context, location) { if (additionalPropertiesSchema === true) { code += ` ${addComma} - json += serializer.asString(key) + ':' + JSON.stringify(value) + json += asString(key) + ':' + JSON.stringify(value) ` } else { const propertyLocation = location.getPropertyLocation('additionalProperties') code += ` ${addComma} - json += serializer.asString(key) + ':' + json += asString(key) + ':' ${buildValue(context, propertyLocation, 'value')} ` } @@ -743,21 +744,21 @@ function buildSingleTypeSerializer (context, location, input) { return 'json += \'null\'' case 'string': { if (schema.format === 'date-time') { - return `json += serializer.asDateTime(${input})` + return `json += asDateTime(${input})` } else if (schema.format === 'date') { - return `json += serializer.asDate(${input})` + return `json += asDate(${input})` } else if (schema.format === 'time') { - return `json += serializer.asTime(${input})` + return `json += asTime(${input})` } else { - return `json += serializer.asString(${input})` + return `json += asString(${input})` } } case 'integer': - return `json += serializer.asInteger(${input})` + return `json += asInteger(${input})` case 'number': - return `json += serializer.asNumber(${input})` + return `json += asNumber(${input})` case 'boolean': - return `json += serializer.asBoolean(${input})` + return `json += asBoolean(${input})` case 'object': { const funcName = buildObject(context, location) return `json += ${funcName}(${input})` diff --git a/lib/optimize.js b/lib/optimize.js new file mode 100644 index 00000000..fc7a98cf --- /dev/null +++ b/lib/optimize.js @@ -0,0 +1,89 @@ +'use strict' + +const returnFnRE = /^\s+return ([.a-zA-Z0-9]+)\(\w+\)$/ +const fnRE = /^\s*function\s+/ +const fnNameRE = /^\s+function ([a-zA-Z0-9_]+) \(input\) {$/ +const jsonConcatRE = /^\s*json\s*\+=/ +const letJsonRE = /^\s*let json =/ +const returnJsonRE = /^\s*return json\s*$/ +const returnEmptyStringRE = /^\s*return '' \+/ +const closingCurlyBracketRE = /^\s*}\s*$/ +/** + * @param {Array} code + * @returns {Array} + */ +function optimize (raw) { + const code = raw.split('\n') + /** + * @type {Array} + */ + const dedupedLevel1 = [] + + for (let i = 0; i < code.length; i++) { + if (i > 0 && jsonConcatRE.test(code[i]) && jsonConcatRE.test(code[i - 1])) { + const mergedEntry = code[i - 1] + ' +' + code[i].substring(code[i].indexOf('json +=') + 7) + dedupedLevel1.pop() // Remove the previous entry + dedupedLevel1.push(mergedEntry) + } else { + dedupedLevel1.push(code[i]) + } + } + + /** + * @type {Array} + */ + const dedupedLevel2 = [] + for (let i = 0; i < dedupedLevel1.length; i++) { + if (i > 0 && jsonConcatRE.test(dedupedLevel1[i]) && letJsonRE.test(dedupedLevel1[i - 1])) { + const mergedEntry = dedupedLevel1[i - 1] + ' +' + dedupedLevel1[i].substring(dedupedLevel1[i].indexOf('json +=') + 7) + dedupedLevel2.pop() // Remove the previous entry + dedupedLevel2.push(mergedEntry) + } else { + dedupedLevel2.push(dedupedLevel1[i]) + } + } + + /** + * @type {Array} + */ + const dedupedLevel3 = [] + for (let i = 0; i < dedupedLevel2.length; i++) { + if (i > 0 && returnJsonRE.test(dedupedLevel2[i]) && letJsonRE.test(dedupedLevel2[i - 1])) { + const mergedEntry = dedupedLevel2[i].slice(0, dedupedLevel2[i].indexOf('return') + 6) + dedupedLevel2[i - 1].substring(dedupedLevel2[i - 1].indexOf('let json =') + 10) + dedupedLevel3.pop() // Remove the previous entry + dedupedLevel3.push(mergedEntry) + } else { + dedupedLevel3.push(dedupedLevel2[i]) + } + } + + /** + * @type {Array} + */ + for (let i = 0; i < dedupedLevel3.length; i++) { + if (returnEmptyStringRE.test(dedupedLevel3[i])) { + dedupedLevel3[i] = dedupedLevel3[i].replace('return \'\' +', 'return') + } + } + + const dedupedLevel4 = [] + for (let i = 0; i < dedupedLevel3.length; i++) { + if ( + fnRE.test(dedupedLevel3[i]) && + returnFnRE.test(dedupedLevel3[i + 1]) && + closingCurlyBracketRE.test(dedupedLevel3[i + 2]) + ) { + const serializerFnName = dedupedLevel3[i + 1].match(returnFnRE)[1] + const fnName = dedupedLevel3[i].match(fnNameRE)[1] + const whitespace = dedupedLevel3[i].slice(0, dedupedLevel3[i].indexOf('f')) + dedupedLevel4[i] = `${whitespace}const ${fnName} = ${serializerFnName}` + i += 2 + } else { + dedupedLevel4.push(dedupedLevel3[i]) + } + } + + return dedupedLevel4.join('\n') +} + +module.exports = optimize diff --git a/lib/serializer.js b/lib/serializer.js index 922be357..f1a0155f 100644 --- a/lib/serializer.js +++ b/lib/serializer.js @@ -3,27 +3,55 @@ // eslint-disable-next-line const STR_ESCAPE = /[\u0000-\u001f\u0022\u005c\ud800-\udfff]|[\ud800-\udbff](?![\udc00-\udfff])|(?:[^\ud800-\udbff]|^)[\udc00-\udfff]/ -module.exports = class Serializer { - constructor (options) { - switch (options && options.rounding) { - case 'floor': - this.parseInteger = Math.floor - break - case 'ceil': - this.parseInteger = Math.ceil - break - case 'round': - this.parseInteger = Math.round - break - case 'trunc': - default: - this.parseInteger = Math.trunc - break +function asStringSmall (str) { + const len = str.length + let result = '' + let last = -1 + let point = 255 + + // eslint-disable-next-line + for (var i = 0; i < len; i++) { + point = str.charCodeAt(i) + if (point < 32) { + return JSON.stringify(str) + } + if (point >= 0xD800 && point <= 0xDFFF) { + // The current character is a surrogate. + return JSON.stringify(str) + } + if ( + point === 0x22 || // '"' + point === 0x5c // '\' + ) { + last === -1 && (last = 0) + result += str.slice(last, i) + '\\' + last = i } - this._options = options } - asInteger (i) { + return (last === -1 && ('"' + str + '"')) || ('"' + result + str.slice(last) + '"') +} + +function Serializer (options) { + let parseInteger + switch (options && options.rounding) { + case 'floor': + parseInteger = Math.floor + break + case 'ceil': + parseInteger = Math.ceil + break + case 'round': + parseInteger = Math.round + break + case 'trunc': + default: + parseInteger = Math.trunc + break + } + this._options = options + + this.asInteger = function asInteger (i) { if (typeof i === 'number') { if (i === Infinity || i === -Infinity) { throw new Error(`The value "${i}" cannot be converted to an integer.`) @@ -34,14 +62,14 @@ module.exports = class Serializer { if (Number.isNaN(i)) { throw new Error(`The value "${i}" cannot be converted to an integer.`) } - return this.parseInteger(i) + return '' + parseInteger(i) } else if (i === null) { return '0' } else if (typeof i === 'bigint') { return i.toString() } else { /* eslint no-undef: "off" */ - const integer = this.parseInteger(i) + const integer = parseInteger(i) if (Number.isFinite(integer)) { return '' + integer } else { @@ -50,7 +78,7 @@ module.exports = class Serializer { } } - asNumber (i) { + this.asNumber = function asNumber (i) { const num = Number(i) if (Number.isNaN(num)) { throw new Error(`The value "${i}" cannot be converted to a number.`) @@ -61,11 +89,11 @@ module.exports = class Serializer { } } - asBoolean (bool) { + this.asBoolean = function asBoolean (bool) { return bool && 'true' || 'false' // eslint-disable-line } - asDateTime (date) { + this.asDateTime = function asDateTime (date) { if (date === null) return '""' if (date instanceof Date) { return '"' + date.toISOString() + '"' @@ -76,7 +104,7 @@ module.exports = class Serializer { throw new Error(`The value "${date}" cannot be converted to a date-time.`) } - asDate (date) { + this.asDate = function asDate (date) { if (date === null) return '""' if (date instanceof Date) { return '"' + new Date(date.getTime() - (date.getTimezoneOffset() * 60000)).toISOString().slice(0, 10) + '"' @@ -87,7 +115,7 @@ module.exports = class Serializer { throw new Error(`The value "${date}" cannot be converted to a date.`) } - asTime (date) { + this.asTime = function asTime (date) { if (date === null) return '""' if (date instanceof Date) { return '"' + new Date(date.getTime() - (date.getTimezoneOffset() * 60000)).toISOString().slice(11, 19) + '"' @@ -98,7 +126,7 @@ module.exports = class Serializer { throw new Error(`The value "${date}" cannot be converted to a time.`) } - asString (str) { + this.asString = function asString (str) { if (typeof str !== 'string') { if (str === null) { return '""' @@ -114,7 +142,7 @@ module.exports = class Serializer { } if (str.length < 42) { - return this.asStringSmall(str) + return asStringSmall(str) } else if (STR_ESCAPE.test(str) === false) { return '"' + str + '"' } else { @@ -122,46 +150,14 @@ module.exports = class Serializer { } } - // magically escape strings for json - // relying on their charCodeAt - // everything below 32 needs JSON.stringify() - // every string that contain surrogate needs JSON.stringify() - // 34 and 92 happens all the time, so we - // have a fast case for them - asStringSmall (str) { - const len = str.length - let result = '' - let last = -1 - let point = 255 - - // eslint-disable-next-line - for (var i = 0; i < len; i++) { - point = str.charCodeAt(i) - if (point < 32) { - return JSON.stringify(str) - } - if (point >= 0xD800 && point <= 0xDFFF) { - // The current character is a surrogate. - return JSON.stringify(str) - } - if ( - point === 0x22 || // '"' - point === 0x5c // '\' - ) { - last === -1 && (last = 0) - result += str.slice(last, i) + '\\' - last = i - } - } - - return (last === -1 && ('"' + str + '"')) || ('"' + result + str.slice(last) + '"') - } - - getState () { + this.getState = function getState () { return this._options } +} - static restoreFromState (state) { - return new Serializer(state) - } +function restoreFromState (state) { + return new Serializer(state) } + +module.exports = Serializer +module.exports.restoreFromState = restoreFromState diff --git a/test/optimize.test.js b/test/optimize.test.js new file mode 100644 index 00000000..8651fa03 --- /dev/null +++ b/test/optimize.test.js @@ -0,0 +1,113 @@ +'use strict' + +const test = require('tap').test +const optimize = require('../lib/optimize') + +test('optimize consecutive "json +=" lines', (t) => { + t.plan(1) + + const unoptimized = ` + json += "A" + json += "B" + ` + + const optimized = optimize(unoptimized) + + t.equal(optimized, ` + json += "A" + "B" + `) +}) + +test('optimize consecutive "let json" and following "json +="', (t) => { + t.plan(1) + + const unoptimized = ` + let json = "A" + json += "B" + ` + + const optimized = optimize(unoptimized) + + t.equal(optimized, ` + let json = "A" + "B" + `) +}) + +test('optimize consecutive "let json" and following "return json"', (t) => { + t.plan(1) + + const unoptimized = ` + let json = "A" + return json + ` + + const optimized = optimize(unoptimized) + + t.equal(optimized, ` + return "A" + `) +}) + +test('optimize return \'\' + ...', (t) => { + t.plan(1) + + const unoptimized = ` + let json = '' + json += 'B' + return json + ` + + const optimized = optimize(unoptimized) + + t.equal(optimized, ` + return 'B' + `) +}) + +test('optimize function x (input) { return asX() } to const x = asX', (t) => { + t.plan(1) + + const unoptimized = ` + function main (input) { + return anonymous(input) + } + ` + + const optimized = optimize(unoptimized) + + t.equal(optimized, ` + const main = anonymous + `) +}) + +test('optimize function x (input) { return asX() } to const x = asX', (t) => { + t.plan(1) + + const unoptimized = ` + function main (input) { + return anonymous0(input) + } + ` + + const optimized = optimize(unoptimized) + + t.equal(optimized, ` + const main = anonymous0 + `) +}) + +test('optimize all cases at once', (t) => { + t.plan(1) + + const unoptimized = ` + let json = '' + json += "B" + return json + ` + + const optimized = optimize(unoptimized) + + t.equal(optimized, ` + return "B" + `) +})