Skip to content

Commit

Permalink
feat(parser): remove URLs from names (ie. try to save them)
Browse files Browse the repository at this point in the history
  • Loading branch information
missinglink committed Dec 10, 2024
1 parent db9f465 commit ce57c15
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 5 deletions.
9 changes: 4 additions & 5 deletions stream/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,11 @@ function parser( precision ){
// each connected road can have one or more names
// we select one name to be the default.
function selectName( names ){
// filter out URLs
// then return the longest name
// @todo: can we improve this logic?
// remove URLs then return the longest name
return names
.filter(name => !/http(s)?:\/\//.test(name))
.reduce((a, b) => a.length > b.length ? a : b, '');
.map(name => name.replace(/(?:https?|ftp):\/\/\S*/g, '').trim())
.sort((a, b) => b.length - a.length)
.at(0);
}

module.exports = parser;
20 changes: 20 additions & 0 deletions test/stream/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,26 @@ module.exports.tests.filter_url = function(test, common) {
stream.write(row);
stream.end();
});

test('parse: URL removal', (t) => {
const stream = parser(6);
const row = [
'i{s~{AqubwJ{TxV{BlDmBnCiGhJgCbCs@dAaCfHmAnCoBpB',
'http://foo.com/bar.pdf',
'Short Example https://foo.com/bar.pdf',
'Longer Example ftp://foo.com/bar.pdf',
].join('\0');
const expected = 'Longer Example';

const assert = ( actual, enc, next ) => {
t.deepEqual( actual.properties.name, expected, 'longest non-URL name selected' );
next();
};

stream.pipe( through.obj( assert, () => t.end() ) );
stream.write(row);
stream.end();
});
};

module.exports.tests.filter_only_url = function(test, common) {
Expand Down

0 comments on commit ce57c15

Please sign in to comment.