diff --git a/classification/StreetNumericClassification.js b/classification/StreetNumericClassification.js new file mode 100644 index 00000000..da61e177 --- /dev/null +++ b/classification/StreetNumericClassification.js @@ -0,0 +1,10 @@ +const Classification = require('./Classification') + +class StreetNumericClassification extends Classification { + constructor (confidence, meta) { + super(confidence, meta) + this.label = 'street_numeric' + } +} + +module.exports = StreetNumericClassification diff --git a/classifier/StreetNumericClassifier.js b/classifier/StreetNumericClassifier.js new file mode 100644 index 00000000..c088e884 --- /dev/null +++ b/classifier/StreetNumericClassifier.js @@ -0,0 +1,29 @@ +const PhraseClassifier = require('./super/PhraseClassifier') +const StreetNumericClassification = require('../classification/StreetNumericClassification') +const libpostal = require('../resources/libpostal/libpostal') + + +// numeric streets languages +// languages which use street names that start with numbers +const prefix = ['pl'] + +class StreetNumericClassifier extends PhraseClassifier { + setup () { + this.index = {} + libpostal.load(this.index, ['pl'], 'numeric_streets.txt', { + lowercase: true, + }) + } + + each (span) { + // skip spans which contain numbers + if (span.contains.numerals) { return } + + // use an inverted index for full token matching as it's O(1) + if (this.index.hasOwnProperty(span.norm)) { + span.classify(new StreetNumericClassification(1)) + } + } +} + +module.exports = StreetNumericClassifier diff --git a/classifier/StreetNumericClassifier.test.js b/classifier/StreetNumericClassifier.test.js new file mode 100644 index 00000000..746eab5f --- /dev/null +++ b/classifier/StreetNumericClassifier.test.js @@ -0,0 +1,49 @@ +const StreetNumericClassifier = require('./StreetNumericClassifier') +const StreetNumericClassification = require('../classification/StreetNumericClassification') +const Span = require('../tokenization/Span') +const classifier = new StreetNumericClassifier() + +module.exports.tests = {} + +function classify (body) { + let s = new Span(body) + classifier.each(s, null, 1) + return s +} + +module.exports.tests.contains_numerals = (test) => { + test('contains numerals: honours contains.numerals boolean', (t) => { + let s = new Span('example') + s.contains.numerals = true + classifier.each(s, null, 1) + t.deepEqual(s.classifications, {}) + t.end() + }) +} + +module.exports.tests.polish_numeric_street = (test) => { + let valid = [ + 'listopada', 'maja', 'czerwca', + 'pułku', 'strzelców', 'piechoty' + ] + + valid.forEach(token => { + test(`polish numeric street: ${token}`, (t) => { + let s = classify(token) + t.deepEqual(s.classifications, { + StreetNumericClassification: new StreetNumericClassification(token.length > 1 ? 1.0 : 0.2) + }) + t.end() + }) + }) +} + +module.exports.all = (tape, common) => { + function test (name, testFunction){ + return tape(`StreetNumericClassifier: ${name}`, testFunction) + } + + for(var testCase in module.exports.tests){ + module.exports.tests[testCase](test, common) + } +} diff --git a/classifier/scheme/street.js b/classifier/scheme/street.js index d06f1a89..5e093b83 100644 --- a/classifier/scheme/street.js +++ b/classifier/scheme/street.js @@ -254,6 +254,90 @@ module.exports = [ } ] }, + { + // 11 Listopada + confidence: 0.81, + Class: StreetClassification, + scheme: [ + { + is: ['NumericClassification'], + not: ['StreetClassification', 'IntersectionClassification'] + }, + { + is: ['StreetNumericClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + } + ] + }, + { + // 11 Pułku Swoleżerów + confidence: 0.79, + Class: StreetClassification, + scheme: [ + { + is: ['NumericClassification'], + not: ['StreetClassification', 'IntersectionClassification'] + }, + { + is: ['StreetNumericClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + }, + { + is: ['StreetNumericClassification', 'AlphaClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + } + ] + }, + { + // 11 Pułku Strzelców Podhalańskich + confidence: 0.79, + Class: StreetClassification, + scheme: [ + { + is: ['NumericClassification'], + not: ['StreetClassification', 'IntersectionClassification'] + }, + { + is: ['StreetNumericClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + }, + { + is: ['StreetNumericClassification', 'AlphaClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + }, + { + is: ['StreetNumericClassification', 'AlphaClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + } + ] + }, + { + // 4 Pułku Piechoty Wojska Polskiego + confidence: 0.79, + Class: StreetClassification, + scheme: [ + { + is: ['NumericClassification'], + not: ['StreetClassification', 'IntersectionClassification'] + }, + { + is: ['StreetNumericClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + }, + { + is: ['StreetNumericClassification', 'AlphaClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + }, + { + is: ['StreetNumericClassification', 'AlphaClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + }, + { + is: ['StreetNumericClassification', 'AlphaClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + } + ] + }, { // Boulevard du Général Charles De Gaulle confidence: 0.81, diff --git a/parser/AddressParser.js b/parser/AddressParser.js index 34a84cf7..2a1937b4 100644 --- a/parser/AddressParser.js +++ b/parser/AddressParser.js @@ -3,6 +3,7 @@ const AlphaNumericClassifier = require('../classifier/AlphaNumericClassifier') const TokenPositionClassifier = require('../classifier/TokenPositionClassifier') const HouseNumberClassifier = require('../classifier/HouseNumberClassifier') const PostcodeClassifier = require('../classifier/PostcodeClassifier') +const StreetNumericClassifier = require('../classifier/StreetNumericClassifier') const StreetPrefixClassifier = require('../classifier/StreetPrefixClassifier') const StreetSuffixClassifier = require('../classifier/StreetSuffixClassifier') const StreetProperNameClassifier = require('../classifier/StreetProperNameClassifier') @@ -52,6 +53,7 @@ class AddressParser extends Parser { new PostcodeClassifier(), new StreetPrefixClassifier(), new StreetSuffixClassifier(), + new StreetNumericClassifier(), new StreetProperNameClassifier(), new RoadTypeClassifier(), new ToponymClassifier(), diff --git a/resources/pelias/dictionaries/libpostal/pl/numeric_streets.txt b/resources/pelias/dictionaries/libpostal/pl/numeric_streets.txt new file mode 100644 index 00000000..56c0cea0 --- /dev/null +++ b/resources/pelias/dictionaries/libpostal/pl/numeric_streets.txt @@ -0,0 +1,65 @@ +stycznia +lutego +marca +kwietnia +maja +czerwca +lipca +sierpnia +września +października +listopada +grudnia +pułku +brygady +batalionu +poznańskiego +kresowej +kompanii +morskiego +armii +dywizji +dywizjonu +wołyńskiej +korpusu +PLM +KDL +KDD +PP +strzelców +piechoty +eskadry +artylerii +zaodrzańskiego +lekkiej +szwoleżerów +drezdeńskiego +wspaniałych +żródeł +lotnictwa +lotniczego +lotniczej +wojska +polskiego +stawów +straconych +morskiego +kamienic +roku +elbląskiego +kaszubskiego +warszawskiego +sudeckiej +wileńskiej +praskiego +maja/łódzka +parkingowa +lecia +stufen +maja/piłsudskiego +zaodrzańskiego +południka +zakrętów +górnośląskiego +poznańskiego +kamienic diff --git a/test/address.pol.test.js b/test/address.pol.test.js index 19c52b48..2839dfcb 100644 --- a/test/address.pol.test.js +++ b/test/address.pol.test.js @@ -27,6 +27,22 @@ const testcase = (test, common) => { { street: 'Żorska' }, { housenumber: '11' }, { postcode: '47-400' } ]) + + assert('11 listopada 2/4', [ + { street: '11 listopada' }, { housenumber: '2/4' } + ]) + + assert('1 Pułku Strzelców Bytomskich 2-4', [ + { street: '1 Pułku Strzelców Bytomskich' }, { housenumber: '2-4' } + ]) + + assert('3 Warszawskiego Pułku Strzelców Polskich 11', [ + { street: '3 Warszawskiego Pułku Strzelców Polskich' }, { housenumber: '11' } + ]) + + assert('1 Pułku Szwoleżerów 178', [ + { street: '1 Pułku Szwoleżerów' }, { housenumber: '178' } + ]) } module.exports.all = (tape, common) => {