Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove URL regex caret anchor #280

Merged
merged 2 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 5 additions & 8 deletions stream/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,11 @@ function parser( precision ){
// each connected road can have one or more names
// we select one name to be the default.
function selectName( names ){
// filter out URLs
// then return the longest name
// @todo: can we improve this logic?
return names.filter( function ( name) {
return !name.match(/^http(s)?:\/\//);
}).reduce( function( a, b ){
return a.length > b.length ? a : b;
}, '');
// remove URLs then return the longest name
return names
.map(name => name.replace(/(?:https?|ftp):\/\/\S*/g, '').trim())
.sort((a, b) => b.length - a.length)
.at(0);
}

module.exports = parser;
41 changes: 41 additions & 0 deletions test/stream/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,47 @@ module.exports.tests.filter_url = function(test, common) {
stream.write(row);
stream.end();
});

// real-world example where the URL was included with a valid name
// (ie. was preceeded by a space rather than a NULL character).
test('parse: filter URL within name', (t) => {
const stream = parser(6);
const row = [
'i{s~{AqubwJ{TxV{BlDmBnCiGhJgCbCs@dAaCfHmAnCoBpB',
'Sentier des Chasupes',
'Mairie Bouxières http://www.mairie-bouxieres-aux-dames.fr/wp-content/uploads/2005/01/Les-sentiers-de-Bouxi%C3%A8res-aux-Dames.pdf',
].join('\0');
const expected = 'Sentier des Chasupes';

const assert = ( actual, enc, next ) => {
t.deepEqual( actual.properties.name, expected, 'longest non-URL name selected' );
next();
};

stream.pipe( through.obj( assert, () => t.end() ) );
stream.write(row);
stream.end();
});

test('parse: URL removal', (t) => {
const stream = parser(6);
const row = [
'i{s~{AqubwJ{TxV{BlDmBnCiGhJgCbCs@dAaCfHmAnCoBpB',
'http://foo.com/bar.pdf',
'Short Example https://foo.com/bar.pdf',
'Longer Example ftp://foo.com/bar.pdf',
].join('\0');
const expected = 'Longer Example';

const assert = ( actual, enc, next ) => {
t.deepEqual( actual.properties.name, expected, 'longest non-URL name selected' );
next();
};

stream.pipe( through.obj( assert, () => t.end() ) );
stream.write(row);
stream.end();
});
};

module.exports.tests.filter_only_url = function(test, common) {
Expand Down