From cc8475c01da66747d0228e2d5fd5fa796e2a8f20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Fio=C5=82ka?= <35661939+JanF01@users.noreply.github.com> Date: Tue, 25 Jul 2023 17:12:56 +0200 Subject: [PATCH 01/11] Adding Poland to the place classifier --- classifier/PlaceClassifier.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/classifier/PlaceClassifier.js b/classifier/PlaceClassifier.js index 67967896..9820b6bc 100644 --- a/classifier/PlaceClassifier.js +++ b/classifier/PlaceClassifier.js @@ -9,7 +9,7 @@ class PlaceClassifier extends WordClassifier { setup () { // load index tokens this.index = {} - libpostal.load(this.index, ['fr', 'de', 'en'], 'place_names.txt') + libpostal.load(this.index, ['fr', 'de', 'en', 'pl'], 'place_names.txt') libpostal.generatePlurals(this.index) } From a746b342405865177eca16eec020118e4a269d5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Fio=C5=82ka?= <35661939+JanF01@users.noreply.github.com> Date: Tue, 25 Jul 2023 17:18:46 +0200 Subject: [PATCH 02/11] feat(poland): Added support for case prefix-place-adjective --- classifier/scheme/street.js | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/classifier/scheme/street.js b/classifier/scheme/street.js index 9df7ae43..93aa267a 100644 --- a/classifier/scheme/street.js +++ b/classifier/scheme/street.js @@ -216,6 +216,25 @@ module.exports = [ } ] }, + { + // Aleja Wojska Polskiego + confidence: 0.91, + Class: StreetClassification, + scheme: [ + { + is: ['StreetPrefixClassification'], + not: ['StreetClassification', 'IntersectionClassification'] + }, + { + is: ['PlaceClassification'], + not: ['StreetClassification', 'IntersectionClassification'] + }, + { + is: ['AlphaClassification', 'GivenNameClassification', 'PersonClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + } + ] + }, { // Boulevard du Général Charles De Gaulle confidence: 0.81, From 5c0412b92dce44c157b09e01f200a79c4294e6ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Fio=C5=82ka?= <35661939+JanF01@users.noreply.github.com> Date: Tue, 1 Aug 2023 14:24:23 +0200 Subject: [PATCH 03/11] feat(poland): Added keywords support for numeric streets --- .../libpostal/pl/numeric_streets.txt | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 resources/pelias/dictionaries/libpostal/pl/numeric_streets.txt diff --git a/resources/pelias/dictionaries/libpostal/pl/numeric_streets.txt b/resources/pelias/dictionaries/libpostal/pl/numeric_streets.txt new file mode 100644 index 00000000..56c0cea0 --- /dev/null +++ b/resources/pelias/dictionaries/libpostal/pl/numeric_streets.txt @@ -0,0 +1,65 @@ +stycznia +lutego +marca +kwietnia +maja +czerwca +lipca +sierpnia +września +października +listopada +grudnia +pułku +brygady +batalionu +poznańskiego +kresowej +kompanii +morskiego +armii +dywizji +dywizjonu +wołyńskiej +korpusu +PLM +KDL +KDD +PP +strzelców +piechoty +eskadry +artylerii +zaodrzańskiego +lekkiej +szwoleżerów +drezdeńskiego +wspaniałych +żródeł +lotnictwa +lotniczego +lotniczej +wojska +polskiego +stawów +straconych +morskiego +kamienic +roku +elbląskiego +kaszubskiego +warszawskiego +sudeckiej +wileńskiej +praskiego +maja/łódzka +parkingowa +lecia +stufen +maja/piłsudskiego +zaodrzańskiego +południka +zakrętów +górnośląskiego +poznańskiego +kamienic From 2b81f99af24b9ecaf9573ae0724b9ce41c386a52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Fio=C5=82ka?= <35661939+JanF01@users.noreply.github.com> Date: Tue, 1 Aug 2023 14:25:57 +0200 Subject: [PATCH 04/11] feat: Added new NumericStreet Classification --- classification/StreetNumericClassification.js | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 classification/StreetNumericClassification.js diff --git a/classification/StreetNumericClassification.js b/classification/StreetNumericClassification.js new file mode 100644 index 00000000..da61e177 --- /dev/null +++ b/classification/StreetNumericClassification.js @@ -0,0 +1,10 @@ +const Classification = require('./Classification') + +class StreetNumericClassification extends Classification { + constructor (confidence, meta) { + super(confidence, meta) + this.label = 'street_numeric' + } +} + +module.exports = StreetNumericClassification From 994faaf2f7f9837a7f37e53ce66c986e2ce3a3c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Fio=C5=82ka?= <35661939+JanF01@users.noreply.github.com> Date: Tue, 1 Aug 2023 14:27:30 +0200 Subject: [PATCH 05/11] Feat: Added a classifier for the NumericStreets --- classifier/StreetNumericClassifier.js | 29 +++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 classifier/StreetNumericClassifier.js diff --git a/classifier/StreetNumericClassifier.js b/classifier/StreetNumericClassifier.js new file mode 100644 index 00000000..d28e6078 --- /dev/null +++ b/classifier/StreetNumericClassifier.js @@ -0,0 +1,29 @@ +const PhraseClassifier = require('./super/PhraseClassifier') +const StreetNumericClassification = require('../classification/StreetNumericClassification') +const libpostal = require('../resources/libpostal/libpostal') + + +// numeric streets languages +// languages which +const prefix = ['pl'] + +class StreetNumericClassifier extends PhraseClassifier { + setup () { + this.index = {} + libpostal.load(this.index, ['pl'], 'numeric_streets.txt', { + lowercase: true, + }) + } + + each (span) { + // skip spans which contain numbers + if (span.contains.numerals) { return } + + // use an inverted index for full token matching as it's O(1) + if (this.index.hasOwnProperty(span.norm)) { + span.classify(new StreetNumericClassification(1)) + } + } +} + +module.exports = StreetNumericClassifier From 936660bb9ae98307fc01d3718667ae5b46c6593f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Fio=C5=82ka?= <35661939+JanF01@users.noreply.github.com> Date: Tue, 1 Aug 2023 14:29:58 +0200 Subject: [PATCH 06/11] feat: Added tests for the NumericClassifier --- classifier/StreetNumericClassifier.test.js | 49 ++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 classifier/StreetNumericClassifier.test.js diff --git a/classifier/StreetNumericClassifier.test.js b/classifier/StreetNumericClassifier.test.js new file mode 100644 index 00000000..746eab5f --- /dev/null +++ b/classifier/StreetNumericClassifier.test.js @@ -0,0 +1,49 @@ +const StreetNumericClassifier = require('./StreetNumericClassifier') +const StreetNumericClassification = require('../classification/StreetNumericClassification') +const Span = require('../tokenization/Span') +const classifier = new StreetNumericClassifier() + +module.exports.tests = {} + +function classify (body) { + let s = new Span(body) + classifier.each(s, null, 1) + return s +} + +module.exports.tests.contains_numerals = (test) => { + test('contains numerals: honours contains.numerals boolean', (t) => { + let s = new Span('example') + s.contains.numerals = true + classifier.each(s, null, 1) + t.deepEqual(s.classifications, {}) + t.end() + }) +} + +module.exports.tests.polish_numeric_street = (test) => { + let valid = [ + 'listopada', 'maja', 'czerwca', + 'pułku', 'strzelców', 'piechoty' + ] + + valid.forEach(token => { + test(`polish numeric street: ${token}`, (t) => { + let s = classify(token) + t.deepEqual(s.classifications, { + StreetNumericClassification: new StreetNumericClassification(token.length > 1 ? 1.0 : 0.2) + }) + t.end() + }) + }) +} + +module.exports.all = (tape, common) => { + function test (name, testFunction){ + return tape(`StreetNumericClassifier: ${name}`, testFunction) + } + + for(var testCase in module.exports.tests){ + module.exports.tests[testCase](test, common) + } +} From c725d88f9af916cef9c0ab241d30a96a08f9af39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Fio=C5=82ka?= <35661939+JanF01@users.noreply.github.com> Date: Tue, 1 Aug 2023 14:49:27 +0200 Subject: [PATCH 07/11] feat: Added shemes for NumericStreets --- classifier/scheme/street.js | 57 +++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/classifier/scheme/street.js b/classifier/scheme/street.js index 93aa267a..d582f02e 100644 --- a/classifier/scheme/street.js +++ b/classifier/scheme/street.js @@ -61,6 +61,63 @@ module.exports = [ } ] }, + { + // 11 Listopada + confidence: 0.81, + Class: StreetClassification, + scheme: [ + { + is: ['NumericClassification'], + not: ['StreetClassification', 'IntersectionClassification'] + }, + { + is: ['StreetNumericClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + } + ] + }, + { + // 11 Pułku Swoleżerów + confidence: 0.79, + Class: StreetClassification, + scheme: [ + { + is: ['NumericClassification'], + not: ['StreetClassification', 'IntersectionClassification'] + }, + { + is: ['StreetNumericClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + }, + { + is: ['StreetNumericClassification', 'AlphaClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + } + ] + }, + { + // 11 Pułku Strzelców Podhalańskich + confidence: 0.79, + Class: StreetClassification, + scheme: [ + { + is: ['NumericClassification'], + not: ['StreetClassification', 'IntersectionClassification'] + }, + { + is: ['StreetNumericClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + }, + { + is: ['StreetNumericClassification', 'AlphaClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + }, + { + is: ['StreetNumericClassification', 'AlphaClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + } + ] + }, { // The Stables confidence: 0.82, From be244ec8862d0e382d269a1a0ef07538a4e70feb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Fio=C5=82ka?= <35661939+JanF01@users.noreply.github.com> Date: Tue, 1 Aug 2023 15:03:05 +0200 Subject: [PATCH 08/11] feat(poland): Added tests for Poland numeric streets --- test/address.pol.test.js | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/test/address.pol.test.js b/test/address.pol.test.js index 981b3f48..041d4cf4 100644 --- a/test/address.pol.test.js +++ b/test/address.pol.test.js @@ -5,6 +5,26 @@ const testcase = (test, common) => { { street: 'Szewska' }, { housenumber: '6' }, { locality: 'Kraków' } ]) + + assert('11 listopada 2/4', [ + { street: '11 listopada' }, { housenumber: '2/4' } + ]) + + assert('1 Pułku Strzelców Bytomskich 2-4', [ + { street: '1 Pułku Strzelców Bytomskich' }, { housenumber: '2-4' } + ]) + + assert('3 Warszawskiego Pułku Strzelców Polskich 11', [ + { street: '3 Warszawskiego Pułku Strzelców Polskich' }, { housenumber: '11' } + ]) + + assert('1 Pułku Szwoleżerów 178', [ + { street: '1 Pułku Szwoleżerów' }, { housenumber: '178' } + ]) + + assert('11 listopada 2-4', [ + { street: '11 listopada' }, { housenumber: '2-4' } + ]) } module.exports.all = (tape, common) => { From 02deec643a87b1d1029bb24f5d49ce5c3ac6c7fd Mon Sep 17 00:00:00 2001 From: JanF01 Date: Fri, 18 Aug 2023 09:58:46 +0200 Subject: [PATCH 09/11] fix --- classifier/scheme/street.js | 141 +++++++++++++++++++++--------------- test/address.pol.test.js | 36 ++++----- 2 files changed, 100 insertions(+), 77 deletions(-) diff --git a/classifier/scheme/street.js b/classifier/scheme/street.js index 0c0ab399..5e093b83 100644 --- a/classifier/scheme/street.js +++ b/classifier/scheme/street.js @@ -61,63 +61,6 @@ module.exports = [ } ] }, - { - // 11 Listopada - confidence: 0.81, - Class: StreetClassification, - scheme: [ - { - is: ['NumericClassification'], - not: ['StreetClassification', 'IntersectionClassification'] - }, - { - is: ['StreetNumericClassification'], - not: ['StreetClassification', 'StreetPrefixClassification'] - } - ] - }, - { - // 11 Pułku Swoleżerów - confidence: 0.79, - Class: StreetClassification, - scheme: [ - { - is: ['NumericClassification'], - not: ['StreetClassification', 'IntersectionClassification'] - }, - { - is: ['StreetNumericClassification'], - not: ['StreetClassification', 'StreetPrefixClassification'] - }, - { - is: ['StreetNumericClassification', 'AlphaClassification'], - not: ['StreetClassification', 'StreetPrefixClassification'] - } - ] - }, - { - // 11 Pułku Strzelców Podhalańskich - confidence: 0.79, - Class: StreetClassification, - scheme: [ - { - is: ['NumericClassification'], - not: ['StreetClassification', 'IntersectionClassification'] - }, - { - is: ['StreetNumericClassification'], - not: ['StreetClassification', 'StreetPrefixClassification'] - }, - { - is: ['StreetNumericClassification', 'AlphaClassification'], - not: ['StreetClassification', 'StreetPrefixClassification'] - }, - { - is: ['StreetNumericClassification', 'AlphaClassification'], - not: ['StreetClassification', 'StreetPrefixClassification'] - } - ] - }, { // The Stables confidence: 0.82, @@ -311,6 +254,90 @@ module.exports = [ } ] }, + { + // 11 Listopada + confidence: 0.81, + Class: StreetClassification, + scheme: [ + { + is: ['NumericClassification'], + not: ['StreetClassification', 'IntersectionClassification'] + }, + { + is: ['StreetNumericClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + } + ] + }, + { + // 11 Pułku Swoleżerów + confidence: 0.79, + Class: StreetClassification, + scheme: [ + { + is: ['NumericClassification'], + not: ['StreetClassification', 'IntersectionClassification'] + }, + { + is: ['StreetNumericClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + }, + { + is: ['StreetNumericClassification', 'AlphaClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + } + ] + }, + { + // 11 Pułku Strzelców Podhalańskich + confidence: 0.79, + Class: StreetClassification, + scheme: [ + { + is: ['NumericClassification'], + not: ['StreetClassification', 'IntersectionClassification'] + }, + { + is: ['StreetNumericClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + }, + { + is: ['StreetNumericClassification', 'AlphaClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + }, + { + is: ['StreetNumericClassification', 'AlphaClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + } + ] + }, + { + // 4 Pułku Piechoty Wojska Polskiego + confidence: 0.79, + Class: StreetClassification, + scheme: [ + { + is: ['NumericClassification'], + not: ['StreetClassification', 'IntersectionClassification'] + }, + { + is: ['StreetNumericClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + }, + { + is: ['StreetNumericClassification', 'AlphaClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + }, + { + is: ['StreetNumericClassification', 'AlphaClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + }, + { + is: ['StreetNumericClassification', 'AlphaClassification'], + not: ['StreetClassification', 'StreetPrefixClassification'] + } + ] + }, { // Boulevard du Général Charles De Gaulle confidence: 0.81, diff --git a/test/address.pol.test.js b/test/address.pol.test.js index adeffa96..2839dfcb 100644 --- a/test/address.pol.test.js +++ b/test/address.pol.test.js @@ -6,26 +6,6 @@ const testcase = (test, common) => { { locality: 'Kraków' } ]) - assert('11 listopada 2/4', [ - { street: '11 listopada' }, { housenumber: '2/4' } - ]) - - assert('1 Pułku Strzelców Bytomskich 2-4', [ - { street: '1 Pułku Strzelców Bytomskich' }, { housenumber: '2-4' } - ]) - - assert('3 Warszawskiego Pułku Strzelców Polskich 11', [ - { street: '3 Warszawskiego Pułku Strzelców Polskich' }, { housenumber: '11' } - ]) - - assert('1 Pułku Szwoleżerów 178', [ - { street: '1 Pułku Szwoleżerów' }, { housenumber: '178' } - ]) - - assert('11 listopada 2-4', [ - { street: '11 listopada' }, { housenumber: '2-4' } - ]) - assert('aleja Wojska Polskiego 178', [ { street: 'aleja Wojska Polskiego' }, { housenumber: '178' } ]) @@ -47,6 +27,22 @@ const testcase = (test, common) => { { street: 'Żorska' }, { housenumber: '11' }, { postcode: '47-400' } ]) + + assert('11 listopada 2/4', [ + { street: '11 listopada' }, { housenumber: '2/4' } + ]) + + assert('1 Pułku Strzelców Bytomskich 2-4', [ + { street: '1 Pułku Strzelców Bytomskich' }, { housenumber: '2-4' } + ]) + + assert('3 Warszawskiego Pułku Strzelców Polskich 11', [ + { street: '3 Warszawskiego Pułku Strzelców Polskich' }, { housenumber: '11' } + ]) + + assert('1 Pułku Szwoleżerów 178', [ + { street: '1 Pułku Szwoleżerów' }, { housenumber: '178' } + ]) } module.exports.all = (tape, common) => { From 82b26024bb9109f1a42e371f3bcd597025967262 Mon Sep 17 00:00:00 2001 From: JanF01 Date: Fri, 18 Aug 2023 10:06:00 +0200 Subject: [PATCH 10/11] feat: Added Street Numeric classifier to AddressParser --- parser/AddressParser.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/parser/AddressParser.js b/parser/AddressParser.js index 34a84cf7..2a1937b4 100644 --- a/parser/AddressParser.js +++ b/parser/AddressParser.js @@ -3,6 +3,7 @@ const AlphaNumericClassifier = require('../classifier/AlphaNumericClassifier') const TokenPositionClassifier = require('../classifier/TokenPositionClassifier') const HouseNumberClassifier = require('../classifier/HouseNumberClassifier') const PostcodeClassifier = require('../classifier/PostcodeClassifier') +const StreetNumericClassifier = require('../classifier/StreetNumericClassifier') const StreetPrefixClassifier = require('../classifier/StreetPrefixClassifier') const StreetSuffixClassifier = require('../classifier/StreetSuffixClassifier') const StreetProperNameClassifier = require('../classifier/StreetProperNameClassifier') @@ -52,6 +53,7 @@ class AddressParser extends Parser { new PostcodeClassifier(), new StreetPrefixClassifier(), new StreetSuffixClassifier(), + new StreetNumericClassifier(), new StreetProperNameClassifier(), new RoadTypeClassifier(), new ToponymClassifier(), From 5f28f314dc25723cb3bf38819c1877d7d285e6a3 Mon Sep 17 00:00:00 2001 From: JanF01 Date: Fri, 18 Aug 2023 10:13:18 +0200 Subject: [PATCH 11/11] fix: comments --- classifier/StreetNumericClassifier.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/classifier/StreetNumericClassifier.js b/classifier/StreetNumericClassifier.js index d28e6078..c088e884 100644 --- a/classifier/StreetNumericClassifier.js +++ b/classifier/StreetNumericClassifier.js @@ -4,7 +4,7 @@ const libpostal = require('../resources/libpostal/libpostal') // numeric streets languages -// languages which +// languages which use street names that start with numbers const prefix = ['pl'] class StreetNumericClassifier extends PhraseClassifier {