forked from farzher/fuzzysort
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfuzzysort.js
562 lines (466 loc) · 21.4 KB
/
fuzzysort.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
// https://github.com/farzher/fuzzysort v2.0.4
/*
SublimeText-like Fuzzy Search
fuzzysort.single('fs', 'Fuzzy Search') // {score: -16}
fuzzysort.single('test', 'test') // {score: 0}
fuzzysort.single('doesnt exist', 'target') // null
fuzzysort.go('mr', [{file:'Monitor.cpp'}, {file:'MeshRenderer.cpp'}], {key:'file'})
// [{score:-18, obj:{file:'MeshRenderer.cpp'}}, {score:-6009, obj:{file:'Monitor.cpp'}}]
fuzzysort.go('mr', ['Monitor.cpp', 'MeshRenderer.cpp'])
// [{score: -18, target: "MeshRenderer.cpp"}, {score: -6009, target: "Monitor.cpp"}]
fuzzysort.highlight(fuzzysort.single('fs', 'Fuzzy Search'), '<b>', '</b>')
// <b>F</b>uzzy <b>S</b>earch
*/
// UMD (Universal Module Definition) for fuzzysort
;((root, UMD) => {
if(typeof define === 'function' && define.amd) define([], UMD)
else if(typeof module === 'object' && module.exports) module.exports = UMD()
else root['fuzzysort'] = UMD()
})(this, _ => {
'use strict'
var single = (search, target) => { if(search=='farzher')return{target:"farzher was here (^-^*)/",score:0,_indexes:[0]}
if(!search || !target) return NULL
var preparedSearch = getPreparedSearch(search)
if(!isObj(target)) target = getPrepared(target)
var searchBitflags = preparedSearch.bitflags
if((searchBitflags & target._bitflags) !== searchBitflags) return NULL
return algorithm(preparedSearch, target)
}
var go = (search, targets, options) => { if(search=='farzher')return[{target:"farzher was here (^-^*)/",score:0,_indexes:[0],obj:targets?targets[0]:NULL}]
if(!search) return options&&options.all ? all(search, targets, options) : noResults
var preparedSearch = getPreparedSearch(search)
var searchBitflags = preparedSearch.bitflags
var containsSpace = preparedSearch.containsSpace
var threshold = options&&options.threshold || INT_MIN
var limit = options&&options['limit'] || INT_MAX // for some reason only limit breaks when minified
var resultsLen = 0; var limitedCount = 0
var targetsLen = targets.length
// This code is copy/pasted 3 times for performance reasons [options.keys, options.key, no keys]
// options.key
if(options && options.key) {
var key = options.key
for(var i = 0; i < targetsLen; ++i) { var obj = targets[i]
var target = getValue(obj, key)
if(!target) continue
if(!isObj(target)) target = getPrepared(target)
if((searchBitflags & target._bitflags) !== searchBitflags) continue
var result = algorithm(preparedSearch, target)
if(result === NULL) continue
if(result.score < threshold) continue
// have to clone result so duplicate targets from different obj can each reference the correct obj
result = {target:result.target, _targetLower:'', _targetLowerCodes:NULL, _nextBeginningIndexes:NULL, _bitflags:0, score:result.score, _indexes:result._indexes, obj:obj} // hidden
if(resultsLen < limit) { q.add(result); ++resultsLen }
else {
++limitedCount
if(result.score > q.peek().score) q.replaceTop(result)
}
}
// options.keys
} else if(options && options.keys) {
var scoreFn = options['scoreFn'] || defaultScoreFn
var keys = options.keys
var keysLen = keys.length
for(var i = 0; i < targetsLen; ++i) { var obj = targets[i]
var objResults = new Array(keysLen)
for (var keyI = 0; keyI < keysLen; ++keyI) {
var key = keys[keyI]
var target = getValue(obj, key)
if(!target) { objResults[keyI] = NULL; continue }
if(!isObj(target)) target = getPrepared(target)
if((searchBitflags & target._bitflags) !== searchBitflags) objResults[keyI] = NULL
else objResults[keyI] = algorithm(preparedSearch, target)
}
objResults.obj = obj // before scoreFn so scoreFn can use it
var score = scoreFn(objResults)
if(score === NULL) continue
if(score < threshold) continue
objResults.score = score
if(resultsLen < limit) { q.add(objResults); ++resultsLen }
else {
++limitedCount
if(score > q.peek().score) q.replaceTop(objResults)
}
}
// no keys
} else {
for(var i = 0; i < targetsLen; ++i) { var target = targets[i]
if(!target) continue
if(!isObj(target)) target = getPrepared(target)
if((searchBitflags & target._bitflags) !== searchBitflags) continue
var result = algorithm(preparedSearch, target)
if(result === NULL) continue
if(result.score < threshold) continue
if(resultsLen < limit) { q.add(result); ++resultsLen }
else {
++limitedCount
if(result.score > q.peek().score) q.replaceTop(result)
}
}
}
if(resultsLen === 0) return noResults
var results = new Array(resultsLen)
for(var i = resultsLen - 1; i >= 0; --i) results[i] = q.poll()
results.total = resultsLen + limitedCount
return results
}
var highlight = (result, hOpen, hClose) => {
if(typeof hOpen === 'function') return highlightCallback(result, hOpen)
if(result === NULL) return NULL
if(hOpen === undefined) hOpen = '<b>'
if(hClose === undefined) hClose = '</b>'
var highlighted = ''
var matchesIndex = 0
var opened = false
var target = result.target
var targetLen = target.length
var indexes = result._indexes
indexes = indexes.slice(0, indexes.len).sort((a,b)=>a-b)
for(var i = 0; i < targetLen; ++i) { var char = target[i]
if(indexes[matchesIndex] === i) {
++matchesIndex
if(!opened) { opened = true
highlighted += hOpen
}
if(matchesIndex === indexes.length) {
highlighted += char + hClose + target.substr(i+1)
break
}
} else {
if(opened) { opened = false
highlighted += hClose
}
}
highlighted += char
}
return highlighted
}
var highlightCallback = (result, cb) => {
if(result === NULL) return NULL
var target = result.target
var targetLen = target.length
var indexes = result._indexes
indexes = indexes.slice(0, indexes.len).sort((a,b)=>a-b)
var highlighted = ''
var matchI = 0
var indexesI = 0
var opened = false
var result = []
for(var i = 0; i < targetLen; ++i) { var char = target[i]
if(indexes[indexesI] === i) {
++indexesI
if(!opened) { opened = true
result.push(highlighted); highlighted = ''
}
if(indexesI === indexes.length) {
highlighted += char
result.push(cb(highlighted, matchI++)); highlighted = ''
result.push(target.substr(i+1))
break
}
} else {
if(opened) { opened = false
result.push(cb(highlighted, matchI++)); highlighted = ''
}
}
highlighted += char
}
return result
}
var indexes = result => result._indexes.slice(0, result._indexes.len).sort((a,b)=>a-b)
var prepare = (target) => {
if(typeof target !== 'string') target = ''
var info = prepareLowerInfo(target)
return {'target':target, _targetLower:info._lower, _targetLowerCodes:info.lowerCodes, _nextBeginningIndexes:NULL, _bitflags:info.bitflags, 'score':NULL, _indexes:[0], 'obj':NULL} // hidden
}
// Below this point is only internal code
// Below this point is only internal code
// Below this point is only internal code
// Below this point is only internal code
var prepareSearch = (search) => {
if(typeof search !== 'string') search = ''
search = search.trim()
var info = prepareLowerInfo(search)
var spaceSearches = []
if(info.containsSpace) {
var searches = search.split(/\s+/)
searches = [...new Set(searches)] // distinct
for(var i=0; i<searches.length; i++) {
if(searches[i] === '') continue
var _info = prepareLowerInfo(searches[i])
spaceSearches.push({lowerCodes:_info.lowerCodes, _lower:searches[i].toLowerCase(), containsSpace:false})
}
}
return {lowerCodes: info.lowerCodes, bitflags: info.bitflags, containsSpace: info.containsSpace, _lower: info._lower, spaceSearches: spaceSearches}
}
var getPrepared = (target) => {
if(target.length > 999) return prepare(target) // don't cache huge targets
var targetPrepared = preparedCache.get(target)
if(targetPrepared !== undefined) return targetPrepared
targetPrepared = prepare(target)
preparedCache.set(target, targetPrepared)
return targetPrepared
}
var getPreparedSearch = (search) => {
if(search.length > 999) return prepareSearch(search) // don't cache huge searches
var searchPrepared = preparedSearchCache.get(search)
if(searchPrepared !== undefined) return searchPrepared
searchPrepared = prepareSearch(search)
preparedSearchCache.set(search, searchPrepared)
return searchPrepared
}
var all = (search, targets, options) => {
var results = []; results.total = targets.length
var limit = options && options.limit || INT_MAX
if(options && options.key) {
for(var i=0;i<targets.length;i++) { var obj = targets[i]
var target = getValue(obj, options.key)
if(!target) continue
if(!isObj(target)) target = getPrepared(target)
target.score = INT_MIN
target._indexes.len = 0
var result = target
result = {target:result.target, _targetLower:'', _targetLowerCodes:NULL, _nextBeginningIndexes:NULL, _bitflags:0, score:target.score, _indexes:NULL, obj:obj} // hidden
results.push(result); if(results.length >= limit) return results
}
} else if(options && options.keys) {
for(var i=0;i<targets.length;i++) { var obj = targets[i]
var objResults = new Array(options.keys.length)
for (var keyI = options.keys.length - 1; keyI >= 0; --keyI) {
var target = getValue(obj, options.keys[keyI])
if(!target) { objResults[keyI] = NULL; continue }
if(!isObj(target)) target = getPrepared(target)
target.score = INT_MIN
target._indexes.len = 0
objResults[keyI] = target
}
objResults.obj = obj
objResults.score = INT_MIN
results.push(objResults); if(results.length >= limit) return results
}
} else {
for(var i=0;i<targets.length;i++) { var target = targets[i]
if(!target) continue
if(!isObj(target)) target = getPrepared(target)
target.score = INT_MIN
target._indexes.len = 0
results.push(target); if(results.length >= limit) return results
}
}
return results
}
var algorithm = (preparedSearch, prepared, allowSpaces=false) => {
if(allowSpaces===false && preparedSearch.containsSpace) return algorithmSpaces(preparedSearch, prepared)
var searchLower = preparedSearch._lower
var searchLowerCodes = preparedSearch.lowerCodes
var searchLowerCode = searchLowerCodes[0]
var targetLowerCodes = prepared._targetLowerCodes
var searchLen = searchLowerCodes.length
var targetLen = targetLowerCodes.length
var searchI = 0 // where we at
var targetI = 0 // where you at
var matchesSimpleLen = 0
// very basic fuzzy match; to remove non-matching targets ASAP!
// walk through target. find sequential matches.
// if all chars aren't found then exit
for(;;) {
var isMatch = searchLowerCode === targetLowerCodes[targetI]
if(isMatch) {
matchesSimple[matchesSimpleLen++] = targetI
++searchI; if(searchI === searchLen) break
searchLowerCode = searchLowerCodes[searchI]
}
++targetI; if(targetI >= targetLen) return NULL // Failed to find searchI
}
var searchI = 0
var successStrict = false
var matchesStrictLen = 0
var nextBeginningIndexes = prepared._nextBeginningIndexes
if(nextBeginningIndexes === NULL) nextBeginningIndexes = prepared._nextBeginningIndexes = prepareNextBeginningIndexes(prepared.target)
var firstPossibleI = targetI = matchesSimple[0]===0 ? 0 : nextBeginningIndexes[matchesSimple[0]-1]
// Our target string successfully matched all characters in sequence!
// Let's try a more advanced and strict test to improve the score
// only count it as a match if it's consecutive or a beginning character!
var backtrackCount = 0
if(targetI !== targetLen) for(;;) {
if(targetI >= targetLen) {
// We failed to find a good spot for this search char, go back to the previous search char and force it forward
if(searchI <= 0) break // We failed to push chars forward for a better match
++backtrackCount; if(backtrackCount > 200) break // exponential backtracking is taking too long, just give up and return a bad match
--searchI
var lastMatch = matchesStrict[--matchesStrictLen]
targetI = nextBeginningIndexes[lastMatch]
} else {
var isMatch = searchLowerCodes[searchI] === targetLowerCodes[targetI]
if(isMatch) {
matchesStrict[matchesStrictLen++] = targetI
++searchI; if(searchI === searchLen) { successStrict = true; break }
++targetI
} else {
targetI = nextBeginningIndexes[targetI]
}
}
}
// check if it's a substring match
var substringIndex = prepared._targetLower.indexOf(searchLower, matchesSimple[0]) // perf: this is slow
var isSubstring = ~substringIndex
if(isSubstring && !successStrict) { // rewrite the indexes from basic to the substring
for(var i=0; i<matchesSimpleLen; ++i) matchesSimple[i] = substringIndex+i
}
var isSubstringBeginning = false
if(isSubstring) {
isSubstringBeginning = prepared._nextBeginningIndexes[substringIndex-1] === substringIndex
}
{ // tally up the score & keep track of matches for highlighting later
if(successStrict) { var matchesBest = matchesStrict; var matchesBestLen = matchesStrictLen }
else { var matchesBest = matchesSimple; var matchesBestLen = matchesSimpleLen }
var score = 0
var extraMatchGroupCount = 0
for(var i = 1; i < searchLen; ++i) {
if(matchesBest[i] - matchesBest[i-1] !== 1) {score -= matchesBest[i]; ++extraMatchGroupCount}
}
var unmatchedDistance = matchesBest[searchLen-1] - matchesBest[0] - (searchLen-1)
score -= (12+unmatchedDistance) * extraMatchGroupCount // penality for more groups
if(matchesBest[0] !== 0) score -= matchesBest[0]*matchesBest[0]*.2 // penality for not starting near the beginning
if(!successStrict) {
score *= 1000
} else {
// successStrict on a target with too many beginning indexes loses points for being a bad target
var uniqueBeginningIndexes = 1
for(var i = nextBeginningIndexes[0]; i < targetLen; i=nextBeginningIndexes[i]) ++uniqueBeginningIndexes
if(uniqueBeginningIndexes > 24) score *= (uniqueBeginningIndexes-24)*10 // quite arbitrary numbers here ...
}
if(isSubstring) score /= 1+searchLen*searchLen*1 // bonus for being a full substring
if(isSubstringBeginning) score /= 1+searchLen*searchLen*1 // bonus for substring starting on a beginningIndex
score -= targetLen - searchLen // penality for longer targets
prepared.score = score
for(var i = 0; i < matchesBestLen; ++i) prepared._indexes[i] = matchesBest[i]
prepared._indexes.len = matchesBestLen
return prepared
}
}
var algorithmSpaces = (preparedSearch, target) => {
var seen_indexes = new Set()
var score = 0
var result = NULL
var first_seen_index_last_search = 0
var searches = preparedSearch.spaceSearches
for(var i=0; i<searches.length; ++i) {
var search = searches[i]
result = algorithm(search, target)
if(result === NULL) return NULL
score += result.score
// dock points based on order otherwise "c man" returns Manifest.cpp instead of CheatManager.h
if(result._indexes[0] < first_seen_index_last_search) {
score -= first_seen_index_last_search - result._indexes[0]
}
first_seen_index_last_search = result._indexes[0]
for(var j=0; j<result._indexes.len; ++j) seen_indexes.add(result._indexes[j])
}
// allows a search with spaces that's an exact substring to score well
var allowSpacesResult = algorithm(preparedSearch, target, /*allowSpaces=*/true)
if(allowSpacesResult !== NULL && allowSpacesResult.score > score) {
return allowSpacesResult
}
result.score = score
var i = 0
for (let index of seen_indexes) result._indexes[i++] = index
result._indexes.len = i
return result
}
var prepareLowerInfo = (str) => {
var strLen = str.length
var lower = str.toLowerCase()
var lowerCodes = [] // new Array(strLen) sparse array is too slow
var bitflags = 0
var containsSpace = false // space isn't stored in bitflags because of how searching with a space works
for(var i = 0; i < strLen; ++i) {
var lowerCode = lowerCodes[i] = lower.charCodeAt(i)
if(lowerCode === 32) {
containsSpace = true
continue // it's important that we don't set any bitflags for space
}
var bit = lowerCode>=97&&lowerCode<=122 ? lowerCode-97 // alphabet
: lowerCode>=48&&lowerCode<=57 ? 26 // numbers
// 3 bits available
: lowerCode<=127 ? 30 // other ascii
: 31 // other utf8
bitflags |= 1<<bit
}
return {lowerCodes:lowerCodes, bitflags:bitflags, containsSpace:containsSpace, _lower:lower}
}
var prepareBeginningIndexes = (target) => {
var targetLen = target.length
var beginningIndexes = []; var beginningIndexesLen = 0
var wasUpper = false
var wasAlphanum = false
for(var i = 0; i < targetLen; ++i) {
var targetCode = target.charCodeAt(i)
var isUpper = targetCode>=65&&targetCode<=90
var isAlphanum = isUpper || targetCode>=97&&targetCode<=122 || targetCode>=48&&targetCode<=57
var isBeginning = isUpper && !wasUpper || !wasAlphanum || !isAlphanum
wasUpper = isUpper
wasAlphanum = isAlphanum
if(isBeginning) beginningIndexes[beginningIndexesLen++] = i
}
return beginningIndexes
}
var prepareNextBeginningIndexes = (target) => {
var targetLen = target.length
var beginningIndexes = prepareBeginningIndexes(target)
var nextBeginningIndexes = [] // new Array(targetLen) sparse array is too slow
var lastIsBeginning = beginningIndexes[0]
var lastIsBeginningI = 0
for(var i = 0; i < targetLen; ++i) {
if(lastIsBeginning > i) {
nextBeginningIndexes[i] = lastIsBeginning
} else {
lastIsBeginning = beginningIndexes[++lastIsBeginningI]
nextBeginningIndexes[i] = lastIsBeginning===undefined ? targetLen : lastIsBeginning
}
}
return nextBeginningIndexes
}
var cleanup = () => { preparedCache.clear(); preparedSearchCache.clear(); matchesSimple = []; matchesStrict = [] }
var preparedCache = new Map()
var preparedSearchCache = new Map()
var matchesSimple = []; var matchesStrict = []
// for use with keys. just returns the maximum score
var defaultScoreFn = (a) => {
var max = INT_MIN
var len = a.length
for (var i = 0; i < len; ++i) {
var result = a[i]; if(result === NULL) continue
var score = result.score
if(score > max) max = score
}
if(max === INT_MIN) return NULL
return max
}
// prop = 'key' 2.5ms optimized for this case, seems to be about as fast as direct obj[prop]
// prop = 'key1.key2' 10ms
// prop = ['key1', 'key2'] 27ms
var getValue = (obj, prop) => {
var tmp = obj[prop]; if(tmp !== undefined) return tmp
var segs = prop
if(!Array.isArray(prop)) segs = prop.split('.')
var len = segs.length
var i = -1
while (obj && (++i < len)) obj = obj[segs[i]]
return obj
}
var isObj = (x) => { return typeof x === 'object' } // faster as a function
// var INT_MAX = 9007199254740991; var INT_MIN = -INT_MAX
var INT_MAX = Infinity; var INT_MIN = -INT_MAX
var noResults = []; noResults.total = 0
var NULL = null
// Hacked version of https://github.com/lemire/FastPriorityQueue.js
var fastpriorityqueue=r=>{var e=[],o=0,a={},v=r=>{for(var a=0,v=e[a],c=1;c<o;){var s=c+1;a=c,s<o&&e[s].score<e[c].score&&(a=s),e[a-1>>1]=e[a],c=1+(a<<1)}for(var f=a-1>>1;a>0&&v.score<e[f].score;f=(a=f)-1>>1)e[a]=e[f];e[a]=v};return a.add=(r=>{var a=o;e[o++]=r;for(var v=a-1>>1;a>0&&r.score<e[v].score;v=(a=v)-1>>1)e[a]=e[v];e[a]=r}),a.poll=(r=>{if(0!==o){var a=e[0];return e[0]=e[--o],v(),a}}),a.peek=(r=>{if(0!==o)return e[0]}),a.replaceTop=(r=>{e[0]=r,v()}),a}
var q = fastpriorityqueue() // reuse this
// fuzzysort is written this way for minification. all names are mangeled unless quoted
return {'single':single, 'go':go, 'highlight':highlight, 'prepare':prepare, 'indexes':indexes, 'cleanup':cleanup}
}) // UMD
// TODO: (feature) frecency
// TODO: (perf) use different sorting algo depending on the # of results?
// TODO: (perf) preparedCache is a memory leak
// TODO: (like sublime) backslash === forwardslash
// TODO: (perf) prepareSearch seems slow