From 13f065a5d13d1bc7d1644dd9e1120e5011c88755 Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Wed, 3 Feb 2021 16:41:09 -0800 Subject: [PATCH 1/2] chore(tag_mapper): refactor tag mapping order This makes it easier to add custom logic by working through the tags in a specified order. --- stream/tag_mapper.js | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/stream/tag_mapper.js b/stream/tag_mapper.js index 33099690..f3d413bc 100644 --- a/stream/tag_mapper.js +++ b/stream/tag_mapper.js @@ -30,8 +30,27 @@ module.exports = function(){ return next( null, doc ); } - // Unfortunately we need to iterate over every tag, - // so we only do the iteration once to save CPU. + // handle the most likely source of name.default first + const trimmed_name = trim(tags.name); + if (trimmed_name) { + doc.setName('default', trimmed_name); + } + + // check the other tags that might go into name.default second + Object.entries(NAME_SCHEMA).forEach(([key, value]) => { + if (value === 'default' && key !== 'name') { + const trimmed_value = trim(tags[key]); + if (trimmed_value) { + if (!trimmed_name ) { + doc.setName('default', trim( tags[key])); + } else { + doc.setNameAlias('default', trim( tags[key])); + } + } + } + }); + + // iterate through all tags, catching any address/localized names _.each(tags, (value, key) => { // Map localized names which begin with 'name:' @@ -44,20 +63,6 @@ module.exports = function(){ } } - // Map name data from our name mapping schema - else if( _.has(NAME_SCHEMA, key) ){ - var val2 = trim( value ); - if( val2 ){ - if( key === NAME_SCHEMA._primary ){ - doc.setName( NAME_SCHEMA[key], val2 ); - } else if ( 'default' === NAME_SCHEMA[key] ) { - doc.setNameAlias( NAME_SCHEMA[key], val2 ); - } else { - doc.setName( NAME_SCHEMA[key], val2 ); - } - } - } - // Map address data from our address mapping schema else if( _.has(ADDRESS_SCHEMA, key) ){ var val3 = trim( value ); From 9c364a5e64df572377495381b701b429a22c0896 Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Thu, 4 Feb 2021 08:07:49 -0800 Subject: [PATCH 2/2] feat(names): Detect near duplicate names This handles the case where one alt name is a substring fully contained in another. --- stream/tag_mapper.js | 2 +- test/fixtures/combined_vancouver_queens.json | 25 ++++----------- test/stream/tag_mapper.js | 32 ++++++++++++++++++++ 3 files changed, 39 insertions(+), 20 deletions(-) diff --git a/stream/tag_mapper.js b/stream/tag_mapper.js index f3d413bc..493a8031 100644 --- a/stream/tag_mapper.js +++ b/stream/tag_mapper.js @@ -43,7 +43,7 @@ module.exports = function(){ if (trimmed_value) { if (!trimmed_name ) { doc.setName('default', trim( tags[key])); - } else { + } else if(!trimmed_name.includes(trimmed_value)) { doc.setNameAlias('default', trim( tags[key])); } } diff --git a/test/fixtures/combined_vancouver_queens.json b/test/fixtures/combined_vancouver_queens.json index 74ce26fe..df45ef5d 100644 --- a/test/fixtures/combined_vancouver_queens.json +++ b/test/fixtures/combined_vancouver_queens.json @@ -856355,16 +856355,10 @@ "_type": "_doc", "data": { "name": { - "default": [ - "Mountain Equipment Co-op (MEC)", - "MEC" - ] + "default": "Mountain Equipment Co-op (MEC)" }, "phrase": { - "default": [ - "Mountain Equipment Co-op (MEC)", - "MEC" - ] + "default": "Mountain Equipment Co-op (MEC)" }, "address_parts": { "number": "212", @@ -892394,8 +892388,7 @@ "_type": "_doc", "data": { "name": { - "default": "IPOH Asian House" - }, + "default": "IPOH Asian House" }, "phrase": { "default": "IPOH Asian House" }, @@ -892429,16 +892422,10 @@ "_type": "_doc", "data": { "name": { - "default": [ - "On Lok Restaurant & Wun Tun House", - "On Lok" - ] + "default": "On Lok Restaurant & Wun Tun House" }, "phrase": { - "default": [ - "On Lok Restaurant & Wun Tun House", - "On Lok" - ] + "default": "On Lok Restaurant & Wun Tun House" }, "address_parts": { "number": "2010", @@ -956784,4 +956771,4 @@ "bounding_box": "{\"min_lat\":49.2174915,\"max_lat\":49.2194865,\"min_lon\":-123.2018987,\"max_lon\":-123.1991481}" } } -] \ No newline at end of file +] diff --git a/test/stream/tag_mapper.js b/test/stream/tag_mapper.js index 6e896089..dba74cac 100644 --- a/test/stream/tag_mapper.js +++ b/test/stream/tag_mapper.js @@ -112,6 +112,38 @@ module.exports.tests.osm_names = function(test, common) { }); }; +// Reject alt names that are a substring of the main name +module.exports.tests.substring_alt_name = function(test, common) { + var doc = new Document('a','b',1); + doc.setMeta('tags', { 'name': 'test place', 'alt_name': 'test pl' }); + + test('rejects - substring alt name', function(t) { + var stream = mapper(); + stream.pipe( through.obj( function( doc, enc, next ){ + t.deepEqual(doc.name, { default: 'test place' }, 'substring name removed'); + t.end(); // test will fail if not called (or called twice). + next(); + })); + stream.write(doc); + }); +}; + +// Reject alt names that are a substring of the main name, even if they appear before the name in list of tags +module.exports.tests.substring_alt_name2 = function(test, common) { + var doc = new Document('a','b',1); + doc.setMeta('tags', { 'alt_name': 'test pl', name: 'test place'}); + + test('rejects - substring alt name', function(t) { + var stream = mapper(); + stream.pipe( through.obj( function( doc, enc, next ){ + t.deepEqual(doc.name, { default: 'test place' }, 'substring name removed'); + t.end(); // test will fail if not called (or called twice). + next(); + })); + stream.write(doc); + }); +}; + // Cover the case of a tag key being 'name:' eg. { 'name:': 'foo' } // Not to be confused with { 'name': 'foo' } (note the extraneous colon) module.exports.tests.extraneous_colon = function(test, common) {