From 89ef34a41c56c4f4c4f867177319416395a4dea4 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Mon, 5 Dec 2016 19:28:25 +0100 Subject: [PATCH] analyze: remove common mandarin labels --- lib/analyze.js | 6 +++++- test/lib/analyze.js | 11 ++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/lib/analyze.js b/lib/analyze.js index e25c9655..5cf4dd43 100644 --- a/lib/analyze.js +++ b/lib/analyze.js @@ -51,9 +51,13 @@ function housenumber( num ){ .replace(' 1/2', '½') .replace(' 3/4', '¾'); - // remove common labels + // remove common english labels number = number.replace(/\s+(apartment|apt|lot|space|ste|suite|unit)\s+/gi, ''); + // remove common mandarin labels + // see: https://eastasiastudent.net/china/mandarin/postal-address/ + number = number.replace(/(号|號|室|宅|楼)/g, ''); + // remove spaces from housenumber. eg: '2 A' -> '2A' number = number.replace(/\s+/g, '').toLowerCase(); diff --git a/test/lib/analyze.js b/test/lib/analyze.js index cb2a23d0..67aa3e8f 100644 --- a/test/lib/analyze.js +++ b/test/lib/analyze.js @@ -59,8 +59,9 @@ module.exports.analyze.housenumber = function(test) { t.false(isNaN(analyze.housenumber('326 1/2')), 'half suffix'); t.false(isNaN(analyze.housenumber('8¼')), 'quarter suffix'); t.false(isNaN(analyze.housenumber('4701 #B')), 'hash delimited suffix'); - t.false(isNaN(analyze.housenumber('1434 UNIT #B')), 'remove common label "unit"'); - t.false(isNaN(analyze.housenumber('1434 SUITE #B')), 'remove common label "suite"'); + t.false(isNaN(analyze.housenumber('1434 UNIT #B')), 'remove common english label "unit"'); + t.false(isNaN(analyze.housenumber('1434 SUITE #B')), 'remove common english label "suite"'); + t.false(isNaN(analyze.housenumber('158號')), 'remove common mandarin label'); t.end(); }); @@ -161,13 +162,17 @@ module.exports.analyze.housenumber = function(test) { // t.equal(float, 5285.03); // t.end(); // }); - test('housenumber: removes common labels', function(t) { + test('housenumber: removes common english labels', function(t) { var float = analyze.housenumber('4701 UNIT #B'); t.equal(float, 4701.06); var float2 = analyze.housenumber('1 APT A'); t.equal(float2, 1.03); t.end(); }); + test('housenumber: removes common mandarin labels', function(t) { + t.equal(analyze.housenumber('158號'), 158.00); + t.end(); + }); }; module.exports.analyze.housenumberFloatToString = function(test) { test('housenumberFloatToString: invalid', function(t) {