Skip to content

Commit

Permalink
Merge pull request #35 from pelias/osm_housenumber_parsing
Browse files Browse the repository at this point in the history
osm: parse delimited house number lists
  • Loading branch information
missinglink authored Dec 5, 2016
2 parents 5a5682d + b05a114 commit 5759839
Show file tree
Hide file tree
Showing 4 changed files with 171 additions and 1 deletion.
1 change: 1 addition & 0 deletions api/osm.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ function osm(dataStream, addressDbPath, streetDbPath, done){
dataStream
.pipe( stream.split() ) // split file on newline
.pipe( stream.osm.parse() ) // parse openstreetmap json data
.pipe( stream.osm.delimited_ranges() ) // handle delimited ranges such as '1A,1B,1C,1D,1E'
// .pipe( stream.osm.augment( db ) ) // find streets for records with only the housenumber
.pipe( stream.osm.convert() ) // convert openstreetmap data to generic model
.pipe( stream.address.batch() ) // batch records on the same street
Expand Down
45 changes: 45 additions & 0 deletions stream/osm/delimited_ranges.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@

var through = require('through2');

// valid delimiters
var DELIMITER_REGEX = /[,;]/;

function streamFactory(){
return through.obj( function( json, _, next ){

// no-op, this record doesn't contain a delimited list of house numbers
if( !json.tags || !json.tags.hasOwnProperty('addr:housenumber') || !json.tags['addr:housenumber'].match( DELIMITER_REGEX ) ){
this.push( json );
return next();
}

// split delimited list in to array of members
var housenumbers = json.tags['addr:housenumber'].split( DELIMITER_REGEX );

// remove empty members
housenumbers = housenumbers.filter( function( e ){ return e; });

// deduplicate array
housenumbers = housenumbers.filter( function( value, index, array ) {
return array.indexOf( value ) === index;
});

// iterate over housenumbers in list
housenumbers.forEach( function( num ){

// create a copy with the house number changed
var copy = JSON.parse( JSON.stringify( json ) );
copy.tags['addr:housenumber'] = num.trim();

// push each copy downstream
this.push( copy );

}, this);

// more
next();

});
}

module.exports = streamFactory;
3 changes: 2 additions & 1 deletion test/_unit.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ var tests = [
require('./lib/project.js'),
require('./lib/interpolate.js'),
require('./lib/Street.js'),
require('./lib/Address.js')
require('./lib/Address.js'),
require('./stream/osm/delimited_ranges.js')
];

tests.map(function(t) {
Expand Down
123 changes: 123 additions & 0 deletions test/stream/osm/delimited_ranges.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@

var through = require('through2'),
delimited_ranges = require('../../../stream/osm/delimited_ranges');

module.exports.street = {};

module.exports.street.noop = function(test) {
test('missing housenumber', function(t) {
var json = { no: 'housenumber' };
var records = stream( json, function( records ){
t.equal( records.length, 1 );
t.deepEqual( records[0], json );
t.end();
});
});
test('housenumber does not contain delimiter', function(t) {
var json = { tags: { 'addr:housenumber': '1A' } };
var records = stream( json, function( records ){
t.equal( records.length, 1 );
t.deepEqual( records[0], json );
t.end();
});
});
};

module.exports.street.range = function(test) {
test('split on comma', function(t) {
var json = { tags: { 'addr:housenumber': '1A,2B' }, misc: 'value' };
var records = stream( json, function( records ){
t.equal( records.length, 2 );
t.equal( records[0].tags['addr:housenumber'], '1A' );
t.equal( records[0].misc, 'value', 'preserve values' );
t.equal( records[1].tags['addr:housenumber'], '2B' );
t.equal( records[1].misc, 'value', 'preserve values' );
t.end();
});
});
test('split on semi-colon', function(t) {
var json = { tags: { 'addr:housenumber': '1A;2B' }, misc: 'value' };
var records = stream( json, function( records ){
t.equal( records.length, 2 );
t.equal( records[0].tags['addr:housenumber'], '1A' );
t.equal( records[0].misc, 'value', 'preserve values' );
t.equal( records[1].tags['addr:housenumber'], '2B' );
t.equal( records[1].misc, 'value', 'preserve values' );
t.end();
});
});
};

module.exports.street.range_with_whitespace = function(test) {
test('superfluous whitespace', function(t) {
var json = { tags: { 'addr:housenumber': ' 1A, 2B ' }, misc: 'value' };
var records = stream( json, function( records ){
t.equal( records.length, 2 );
t.equal( records[0].tags['addr:housenumber'], '1A' );
t.equal( records[0].misc, 'value', 'preserve values' );
t.equal( records[1].tags['addr:housenumber'], '2B' );
t.equal( records[1].misc, 'value', 'preserve values' );
t.end();
});
});
};

module.exports.street.empty_members = function(test) {
test('remove empty members', function(t) {
var json = { tags: { 'addr:housenumber': '1A,,2B;' }, misc: 'value' };
var records = stream( json, function( records ){
t.equal( records.length, 2 );
t.equal( records[0].tags['addr:housenumber'], '1A' );
t.equal( records[0].misc, 'value', 'preserve values' );
t.equal( records[1].tags['addr:housenumber'], '2B' );
t.equal( records[1].misc, 'value', 'preserve values' );
t.end();
});
});
};

module.exports.street.duplicates = function(test) {
test('remove duplicates', function(t) {
var json = { tags: { 'addr:housenumber': '1A,1A,2B' }, misc: 'value' };
var records = stream( json, function( records ){
t.equal( records.length, 2 );
t.equal( records[0].tags['addr:housenumber'], '1A' );
t.equal( records[0].misc, 'value', 'preserve values' );
t.equal( records[1].tags['addr:housenumber'], '2B' );
t.equal( records[1].misc, 'value', 'preserve values' );
t.end();
});
});
};

module.exports.all = function (tape) {

function test(name, testFunction) {
return tape('delimited_ranges: ' + name, testFunction);
}

for( var testCase in module.exports.street ){
module.exports.street[testCase](test);
}
};

// generic stream test runner
function stream( json, cb ){

var xform = delimited_ranges();
var records = [];

var collect = function( chunk, _, next ){
records.push( chunk );
next();
};

var assert = function( next ){
cb( records );
next();
};

xform.pipe( through.obj( collect, assert ));
xform.write( json );
xform.end();
}

0 comments on commit 5759839

Please sign in to comment.