Skip to content

Commit

Permalink
revert class refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
gurgunday committed Mar 16, 2024
1 parent 9e986cf commit 8b31e4d
Showing 1 changed file with 159 additions and 164 deletions.
323 changes: 159 additions & 164 deletions deps/streamsearch/sbmh.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,206 +27,201 @@
* by Hongli Lai at: https://github.com/FooBarWidget/boyer-moore-horspool
*/

const { EventEmitter } = require('node:events')
const EventEmitter = require('node:events').EventEmitter
const inherits = require('node:util').inherits

class SBMH extends EventEmitter {
constructor (needle) {
super()

if (typeof needle === 'string') {
needle = Buffer.from(needle)
}
function SBMH (needle) {
if (typeof needle === 'string') {
needle = Buffer.from(needle)
}

if (!Buffer.isBuffer(needle)) {
throw new TypeError('The needle has to be a String or a Buffer.')
}
if (!Buffer.isBuffer(needle)) {
throw new TypeError('The needle has to be a String or a Buffer.')
}

const needleLength = needle.length
const needleLength = needle.length

if (needleLength === 0) {
throw new Error('The needle cannot be an empty String/Buffer.')
}
if (needleLength === 0) {
throw new Error('The needle cannot be an empty String/Buffer.')
}

if (needleLength > 256) {
throw new Error('The needle cannot have a length bigger than 256.')
}
if (needleLength > 256) {
throw new Error('The needle cannot have a length bigger than 256.')
}

this.maxMatches = Infinity
this.matches = 0
this.maxMatches = Infinity
this.matches = 0

this._needle = needle
this._bufpos = 0
this._lookbehind_size = 0
this._lookbehind = Buffer.alloc(needleLength)
this._occ = new Array(256).fill(needleLength) // Initialize occurrence table.
this._occ = new Array(256)
.fill(needleLength) // Initialize occurrence table.
this._lookbehind_size = 0
this._needle = needle
this._bufpos = 0

// Populate occurrence table with analysis of the needle, ignoring last letter.
for (var i = 0; i < needleLength - 1; ++i) { // eslint-disable-line no-var
this._occ[needle[i]] = needleLength - 1 - i
}
}
this._lookbehind = Buffer.alloc(needleLength)

reset () {
this.matches = 0
this._bufpos = 0
this._lookbehind_size = 0
// Populate occurrence table with analysis of the needle,
// ignoring last letter.
for (var i = 0; i < needleLength - 1; ++i) { // eslint-disable-line no-var
this._occ[needle[i]] = needleLength - 1 - i
}
}
inherits(SBMH, EventEmitter)

push (chunk, pos = 0) {
if (!Buffer.isBuffer(chunk)) {
chunk = Buffer.from(chunk, 'binary')
}

this._bufpos = pos
SBMH.prototype.reset = function () {
this._lookbehind_size = 0
this.matches = 0
this._bufpos = 0
}

const chlen = chunk.length
let r
while (r !== chlen && this.matches < this.maxMatches) { r = this._sbmh_feed(chunk) }
return r
SBMH.prototype.push = function (chunk, pos) {
if (!Buffer.isBuffer(chunk)) {
chunk = Buffer.from(chunk, 'binary')
}
const chlen = chunk.length
this._bufpos = pos || 0
let r
while (r !== chlen && this.matches < this.maxMatches) { r = this._sbmh_feed(chunk) }
return r
}

_sbmh_feed (data) {
const len = data.length
const needle = this._needle
const needleLength = needle.length
const lastNeedleChar = needle[needleLength - 1]
SBMH.prototype._sbmh_feed = function (data) {
const len = data.length
const needle = this._needle
const needleLength = needle.length
const lastNeedleChar = needle[needleLength - 1]

// Positive: points to a position in `data`
// pos == 3 points to data[3]
// Negative: points to a position in the lookbehind buffer
// pos == -2 points to lookbehind[lookbehind_size - 2]
let pos = -this._lookbehind_size
let ch

if (pos < 0) {
// Lookbehind buffer is not empty. Perform Boyer-Moore-Horspool
// search with character lookup code that considers both the
// lookbehind buffer and the current round's haystack data.
//
// Loop until
// there is a match.
// or until
// we've moved past the position that requires the
// lookbehind buffer. In this case we switch to the
// optimized loop.
// or until
// the character to look at lies outside the haystack.
while (pos < 0 && pos <= len - needleLength) {
ch = this._sbmh_lookup_char(data, pos + needleLength - 1)

if (
ch === lastNeedleChar &&
this._sbmh_memcmp(data, pos, needleLength - 1)
) {
this._lookbehind_size = 0
++this.matches
this.emit('info', true)

// Positive: points to a position in `data`
// pos == 3 points to data[3]
// Negative: points to a position in the lookbehind buffer
// pos == -2 points to lookbehind[lookbehind_size - 2]
let pos = -this._lookbehind_size
let ch
return (this._bufpos = pos + needleLength)
}
pos += this._occ[ch]
}

// No match.

if (pos < 0) {
// Lookbehind buffer is not empty. Perform Boyer-Moore-Horspool
// search with character lookup code that considers both the
// lookbehind buffer and the current round's haystack data.
//
// Loop until
// there is a match.
// or until
// we've moved past the position that requires the
// lookbehind buffer. In this case we switch to the
// optimized loop.
// There's too few data for Boyer-Moore-Horspool to run,
// so let's use a different algorithm to skip as much as
// we can.
// Forward pos until
// the trailing part of lookbehind + data
// looks like the beginning of the needle
// or until
// the character to look at lies outside the haystack.
while (pos < 0 && pos <= len - needleLength) {
ch = this._sbmh_lookup_char(data, pos + needleLength - 1)

if (
ch === lastNeedleChar &&
this._sbmh_memcmp(data, pos, needleLength - 1)
) {
this._lookbehind_size = 0
++this.matches
this.emit('info', true)

return (this._bufpos = pos + needleLength)
}

pos += this._occ[ch]
}
// pos == 0
while (pos < 0 && !this._sbmh_memcmp(data, pos, len - pos)) { ++pos }
}

// No match.

if (pos < 0) {
// There's too few data for Boyer-Moore-Horspool to run,
// so let's use a different algorithm to skip as much as
// we can.
// Forward pos until
// the trailing part of lookbehind + data
// looks like the beginning of the needle
// or until
// pos == 0
while (pos < 0 && !this._sbmh_memcmp(data, pos, len - pos)) { ++pos }
if (pos >= 0) {
// Discard lookbehind buffer.
this.emit('info', false, this._lookbehind, 0, this._lookbehind_size)
this._lookbehind_size = 0
} else {
// Cut off part of the lookbehind buffer that has
// been processed and append the entire haystack
// into it.
const bytesToCutOff = this._lookbehind_size + pos
if (bytesToCutOff > 0) {
// The cut off data is guaranteed not to contain the needle.
this.emit('info', false, this._lookbehind, 0, bytesToCutOff)
}

if (pos >= 0) {
// Discard lookbehind buffer.
this.emit('info', false, this._lookbehind, 0, this._lookbehind_size)
this._lookbehind_size = 0
} else {
// Cut off part of the lookbehind buffer that has
// been processed and append the entire haystack
// into it.
const bytesToCutOff = this._lookbehind_size + pos

if (bytesToCutOff > 0) {
// The cut off data is guaranteed not to contain the needle.
this.emit('info', false, this._lookbehind, 0, bytesToCutOff)
}

this._lookbehind_size -= bytesToCutOff
this._lookbehind.copy(this._lookbehind, 0, bytesToCutOff, this._lookbehind_size)
this._lookbehind.copy(this._lookbehind, 0, bytesToCutOff,
this._lookbehind_size - bytesToCutOff)
this._lookbehind_size -= bytesToCutOff

data.copy(this._lookbehind, this._lookbehind_size)
this._lookbehind_size += len
data.copy(this._lookbehind, this._lookbehind_size)
this._lookbehind_size += len

this._bufpos = len
return len
}
this._bufpos = len
return len
}
}

// Lookbehind buffer is now empty. We only need to check if the
// needle is in the haystack.
pos = data.indexOf(needle, pos + ((pos >= 0) * this._bufpos))

if (pos !== -1) {
++this.matches

if (pos > 0) { this.emit('info', true, data, this._bufpos, pos) } else { this.emit('info', true) }

return (this._bufpos = pos + needleLength)
}
// Lookbehind buffer is now empty. We only need to check if the
// needle is in the haystack.
pos = data.indexOf(needle, pos + ((pos >= 0) * this._bufpos))

pos = len - needleLength
if (pos !== -1) {
++this.matches
if (pos > 0) { this.emit('info', true, data, this._bufpos, pos) } else { this.emit('info', true) }
return (this._bufpos = pos + needleLength)
}

// There was no match. If there's trailing haystack data that we cannot
// match yet using the Boyer-Moore-Horspool algorithm (because the trailing
// data is less than the needle size) then match using a modified
// algorithm that starts matching from the beginning instead of the end.
// Whatever trailing data is left after running this algorithm is added to
// the lookbehind buffer.
while (
pos < len &&
pos = len - needleLength

// There was no match. If there's trailing haystack data that we cannot
// match yet using the Boyer-Moore-Horspool algorithm (because the trailing
// data is less than the needle size) then match using a modified
// algorithm that starts matching from the beginning instead of the end.
// Whatever trailing data is left after running this algorithm is added to
// the lookbehind buffer.
while (
pos < len &&
(
data[pos] !== needle[0] ||
(
data[pos] !== needle[0] ||
Buffer.compare(
(Buffer.compare(
data.subarray(pos, pos + len - pos),
needle.subarray(0, len - pos)
) !== 0
) !== 0)
)
) {
++pos
}

if (pos < len) {
data.copy(this._lookbehind, 0, pos, pos + (len - pos))
this._lookbehind_size = len - pos
}

// Everything until pos is guaranteed not to contain needle data.
if (pos > 0) { this.emit('info', false, data, this._bufpos, pos < len ? pos : len) }

this._bufpos = len

return len
)
) {
++pos
}

_sbmh_lookup_char (data, pos) {
return pos < 0
? this._lookbehind[this._lookbehind_size + pos]
: data[pos]
if (pos < len) {
data.copy(this._lookbehind, 0, pos, pos + (len - pos))
this._lookbehind_size = len - pos
}

_sbmh_memcmp (data, pos, len) {
for (var i = 0; i < len; ++i) { // eslint-disable-line no-var
if (this._sbmh_lookup_char(data, pos + i) !== this._needle[i]) { return false }
}
// Everything until pos is guaranteed not to contain needle data.
if (pos > 0) { this.emit('info', false, data, this._bufpos, pos < len ? pos : len) }

this._bufpos = len
return len
}

SBMH.prototype._sbmh_lookup_char = function (data, pos) {
return (pos < 0)
? this._lookbehind[this._lookbehind_size + pos]
: data[pos]
}

return true
SBMH.prototype._sbmh_memcmp = function (data, pos, len) {
for (var i = 0; i < len; ++i) { // eslint-disable-line no-var
if (this._sbmh_lookup_char(data, pos + i) !== this._needle[i]) { return false }
}
return true
}

module.exports = SBMH

0 comments on commit 8b31e4d

Please sign in to comment.