diff --git a/stream/parser.js b/stream/parser.js index 4c3bc68..fbda09f 100644 --- a/stream/parser.js +++ b/stream/parser.js @@ -56,11 +56,9 @@ function selectName( names ){ // filter out URLs // then return the longest name // @todo: can we improve this logic? - return names.filter( function ( name) { - return !name.match(/^http(s)?:\/\//); - }).reduce( function( a, b ){ - return a.length > b.length ? a : b; - }, ''); + return names + .filter(name => !/http(s)?:\/\//.test(name)) + .reduce((a, b) => a.length > b.length ? a : b, ''); } module.exports = parser; diff --git a/test/stream/parser.js b/test/stream/parser.js index 7493272..1345409 100644 --- a/test/stream/parser.js +++ b/test/stream/parser.js @@ -111,6 +111,27 @@ module.exports.tests.filter_url = function(test, common) { stream.write(row); stream.end(); }); + + // real-world example where the URL was included with a valid name + // (ie. was preceeded by a space rather than a NULL character). + test('parse: filter URL within name', (t) => { + const stream = parser(6); + const row = [ + 'i{s~{AqubwJ{TxV{BlDmBnCiGhJgCbCs@dAaCfHmAnCoBpB', + 'Sentier des Chasupes', + 'Mairie Bouxières http://www.mairie-bouxieres-aux-dames.fr/wp-content/uploads/2005/01/Les-sentiers-de-Bouxi%C3%A8res-aux-Dames.pdf', + ].join('\0'); + const expected = 'Sentier des Chasupes'; + + const assert = ( actual, enc, next ) => { + t.deepEqual( actual.properties.name, expected, 'longest non-URL name selected' ); + next(); + }; + + stream.pipe( through.obj( assert, () => t.end() ) ); + stream.write(row); + stream.end(); + }); }; module.exports.tests.filter_only_url = function(test, common) {