From 392e584d1bc0ebe76da348561071767c6460d7d4 Mon Sep 17 00:00:00 2001 From: Hamza Kwisaba Date: Fri, 18 Jan 2019 00:52:49 -0800 Subject: [PATCH] swahili numerical support Summary: Adding support for swahili numerals Reviewed By: patapizza Differential Revision: D13637092 fbshipit-source-id: 6fe0facfa74caae6fd00e0e84b09571aca616f21 --- Duckling/Dimensions.hs | 2 + Duckling/Dimensions/SW.hs | 18 +++++ Duckling/Locale.hs | 1 + Duckling/Numeral/SW/Corpus.hs | 87 +++++++++++++++++++++ Duckling/Numeral/SW/Rules.hs | 105 ++++++++++++++++++++++++++ Duckling/Ranking/Classifiers.hs | 2 + Duckling/Ranking/Classifiers/SW_XX.hs | 22 ++++++ Duckling/Rules.hs | 4 + Duckling/Rules/SW.hs | 46 +++++++++++ duckling.cabal | 5 ++ exe/Duckling/Ranking/Generate.hs | 1 + tests/Duckling/Numeral/SW/Tests.hs | 22 ++++++ tests/Duckling/Numeral/Tests.hs | 2 + 13 files changed, 317 insertions(+) create mode 100644 Duckling/Dimensions/SW.hs create mode 100644 Duckling/Numeral/SW/Corpus.hs create mode 100644 Duckling/Numeral/SW/Rules.hs create mode 100644 Duckling/Ranking/Classifiers/SW_XX.hs create mode 100644 Duckling/Rules/SW.hs create mode 100644 tests/Duckling/Numeral/SW/Tests.hs diff --git a/Duckling/Dimensions.hs b/Duckling/Dimensions.hs index 6c11729b0..aafce5c7a 100644 --- a/Duckling/Dimensions.hs +++ b/Duckling/Dimensions.hs @@ -58,6 +58,7 @@ import qualified Duckling.Dimensions.PT as PTDimensions import qualified Duckling.Dimensions.RO as RODimensions import qualified Duckling.Dimensions.RU as RUDimensions import qualified Duckling.Dimensions.SV as SVDimensions +import qualified Duckling.Dimensions.SW as SWDimensions import qualified Duckling.Dimensions.TA as TADimensions import qualified Duckling.Dimensions.TR as TRDimensions import qualified Duckling.Dimensions.UK as UKDimensions @@ -131,6 +132,7 @@ langDimensions PT = PTDimensions.allDimensions langDimensions RO = RODimensions.allDimensions langDimensions RU = RUDimensions.allDimensions langDimensions SV = SVDimensions.allDimensions +langDimensions SW = SWDimensions.allDimensions langDimensions TA = TADimensions.allDimensions langDimensions TR = TRDimensions.allDimensions langDimensions UK = UKDimensions.allDimensions diff --git a/Duckling/Dimensions/SW.hs b/Duckling/Dimensions/SW.hs new file mode 100644 index 000000000..0ed7fe4ec --- /dev/null +++ b/Duckling/Dimensions/SW.hs @@ -0,0 +1,18 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. An additional grant +-- of patent rights can be found in the PATENTS file in the same directory. + + +module Duckling.Dimensions.SW + ( allDimensions + ) where + +import Duckling.Dimensions.Types + +allDimensions :: [Some Dimension] +allDimensions = + [ This Numeral + ] diff --git a/Duckling/Locale.hs b/Duckling/Locale.hs index 10c3eb95d..edca3405f 100644 --- a/Duckling/Locale.hs +++ b/Duckling/Locale.hs @@ -70,6 +70,7 @@ data Lang | RO | RU | SV + | SW | TA | TR | UK diff --git a/Duckling/Numeral/SW/Corpus.hs b/Duckling/Numeral/SW/Corpus.hs new file mode 100644 index 000000000..f5e0bab77 --- /dev/null +++ b/Duckling/Numeral/SW/Corpus.hs @@ -0,0 +1,87 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. An additional grant +-- of patent rights can be found in the PATENTS file in the same directory. + + +{-# LANGUAGE OverloadedStrings #-} + +module Duckling.Numeral.SW.Corpus + ( corpus ) where + +import Data.String +import Prelude + +import Duckling.Locale +import Duckling.Numeral.Types +import Duckling.Resolve +import Duckling.Testing.Types + +corpus :: Corpus +corpus = (testContext {locale = makeLocale SW Nothing}, testOptions, allExamples) + +allExamples :: [Example] +allExamples = concat + [ examples (NumeralValue 0) + [ "0" + , "sufuri" + , "zero" + ] + , examples (NumeralValue 1) + [ "moja" + ] + , examples (NumeralValue 2) + [ "mbili" + ] + , examples (NumeralValue 3) + [ "tatu" + ] + , examples (NumeralValue 4) + [ "nne" + ] + , examples (NumeralValue 5) + [ "tano" + ] + , examples (NumeralValue 6) + [ "sita" + ] + , examples (NumeralValue 7) + [ "saba" + ] + , examples (NumeralValue 8) + [ "nane" + ] + , examples (NumeralValue 9) + [ "tisa" + ] + , examples (NumeralValue 10) + [ "kumi" + ] + , examples (NumeralValue 20) + [ "ishirini" + ] + , examples (NumeralValue 30) + [ "thelathini" + ] + , examples (NumeralValue 40) + [ "arubaini" + , "arobaini" + ] + , examples (NumeralValue 50) + [ "hamsini" + ] + , examples (NumeralValue 60) + [ "sitini" + ] + , examples (NumeralValue 70) + [ "sabini" + ] + , examples (NumeralValue 80) + [ "themanini" + ] + , examples (NumeralValue 90) + [ "tisini" + ] + ] diff --git a/Duckling/Numeral/SW/Rules.hs b/Duckling/Numeral/SW/Rules.hs new file mode 100644 index 000000000..f98fa5d33 --- /dev/null +++ b/Duckling/Numeral/SW/Rules.hs @@ -0,0 +1,105 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. An additional grant +-- of patent rights can be found in the PATENTS file in the same directory. + + +{-# LANGUAGE GADTs #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE NoRebindableSyntax #-} + +module Duckling.Numeral.SW.Rules + ( rules + ) where + +import Data.HashMap.Strict (HashMap) +import Data.Maybe +import Data.String +import Data.Text (Text) +import Prelude +import qualified Data.HashMap.Strict as HashMap +import qualified Data.Text as Text + +import Duckling.Dimensions.Types +import Duckling.Numeral.Helpers +import Duckling.Numeral.Types (NumeralData (..)) +import Duckling.Regex.Types +import Duckling.Types +import qualified Duckling.Numeral.Types as TNumeral + +ruleNumeralMap :: HashMap Text Integer +ruleNumeralMap = HashMap.fromList + [ ( "sufuri", 0 ) + , ( "zero", 0 ) + , ( "moja", 1 ) + , ( "mbili", 2 ) + , ( "tatu", 3 ) + , ( "nne", 4 ) + , ( "tano", 5) + , ( "sita", 6) + , ( "saba", 7) + , ( "nane", 8) + , ( "tisa", 9) + , ( "kumi", 10) + ] + +ruleNumeral :: Rule +ruleNumeral = Rule + { name = "number (0..10)" + , pattern = + [ regex "(sufuri|zero|moja|mbili|tatu|nne|tano|sita|saba|nane|tisa|kumi)" + ] + , prod = \tokens -> case tokens of + (Token RegexMatch (GroupMatch (match:_)):_) -> + HashMap.lookup (Text.toLower match) ruleNumeralMap >>= integer + _ -> Nothing + } + +tensMap :: HashMap Text Integer +tensMap = HashMap.fromList + [ ( "ishirini", 20 ) + , ( "thelathini", 30 ) + , ( "arubaini", 40 ) + , ( "arobaini", 40 ) + , ( "hamsini", 50 ) + , ( "sitini", 60 ) + , ( "sabini", 70 ) + , ( "themanini", 80 ) + , ( "tisini", 90 ) + ] + +ruleTens :: Rule +ruleTens = Rule + { name = "integer (20,30..90)" + , pattern = + [ regex "(ishirini|thelathini|arubaini|arobaini|hamsini|sitini|sabini|themanini|tisini)" + ] + , prod = \tokens -> case tokens of + (Token RegexMatch (GroupMatch (match:_)):_) -> + HashMap.lookup (Text.toLower match) tensMap >>= integer + _ -> Nothing + } + + +ruleCompositeTens :: Rule +ruleCompositeTens = Rule + { name = "integer 11..19 21..29 .. 91..99" + , pattern = [oneOf [20,30..90] + , regex "-?na-?" + , numberBetween 1 10] + , prod = \tokens -> case tokens of + (Token Numeral NumeralData{TNumeral.value = v1}: + Token Numeral NumeralData{TNumeral.value = v2}: + _) -> double $ v1 + v2 + _ -> Nothing + } + + +rules :: [Rule] +rules = + [ ruleNumeral + , ruleTens + , ruleCompositeTens + ] diff --git a/Duckling/Ranking/Classifiers.hs b/Duckling/Ranking/Classifiers.hs index f50b0a030..85c4106bc 100644 --- a/Duckling/Ranking/Classifiers.hs +++ b/Duckling/Ranking/Classifiers.hs @@ -52,6 +52,7 @@ import qualified Duckling.Ranking.Classifiers.PT_XX as PT_XXClassifiers import qualified Duckling.Ranking.Classifiers.RO_XX as RO_XXClassifiers import qualified Duckling.Ranking.Classifiers.RU_XX as RU_XXClassifiers import qualified Duckling.Ranking.Classifiers.SV_XX as SV_XXClassifiers +import qualified Duckling.Ranking.Classifiers.SW_XX as SW_XXClassifiers import qualified Duckling.Ranking.Classifiers.TA_XX as TA_XXClassifiers import qualified Duckling.Ranking.Classifiers.TR_XX as TR_XXClassifiers import qualified Duckling.Ranking.Classifiers.UK_XX as UK_XXClassifiers @@ -97,6 +98,7 @@ classifiers (Locale PT _) = PT_XXClassifiers.classifiers classifiers (Locale RO _) = RO_XXClassifiers.classifiers classifiers (Locale RU _) = RU_XXClassifiers.classifiers classifiers (Locale SV _) = SV_XXClassifiers.classifiers +classifiers (Locale SW _) = SW_XXClassifiers.classifiers classifiers (Locale TA _) = TA_XXClassifiers.classifiers classifiers (Locale TR _) = TR_XXClassifiers.classifiers classifiers (Locale UK _) = UK_XXClassifiers.classifiers diff --git a/Duckling/Ranking/Classifiers/SW_XX.hs b/Duckling/Ranking/Classifiers/SW_XX.hs new file mode 100644 index 000000000..d6ba35ad1 --- /dev/null +++ b/Duckling/Ranking/Classifiers/SW_XX.hs @@ -0,0 +1,22 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. An additional grant +-- of patent rights can be found in the PATENTS file in the same directory. + +----------------------------------------------------------------- +-- Auto-generated by regenClassifiers +-- +-- DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING +-- @generated +----------------------------------------------------------------- +{-# LANGUAGE OverloadedStrings #-} +module Duckling.Ranking.Classifiers.SW_XX (classifiers) where +import Data.String +import Prelude +import qualified Data.HashMap.Strict as HashMap +import Duckling.Ranking.Types + +classifiers :: Classifiers +classifiers = HashMap.fromList [] \ No newline at end of file diff --git a/Duckling/Rules.hs b/Duckling/Rules.hs index 44df0ba37..2d6a8cbab 100644 --- a/Duckling/Rules.hs +++ b/Duckling/Rules.hs @@ -59,6 +59,7 @@ import qualified Duckling.Rules.PT as PTRules import qualified Duckling.Rules.RO as RORules import qualified Duckling.Rules.RU as RURules import qualified Duckling.Rules.SV as SVRules +import qualified Duckling.Rules.SW as SWRules import qualified Duckling.Rules.TA as TARules import qualified Duckling.Rules.TR as TRRules import qualified Duckling.Rules.UK as UKRules @@ -122,6 +123,7 @@ defaultRules PT = PTRules.defaultRules defaultRules RO = RORules.defaultRules defaultRules RU = RURules.defaultRules defaultRules SV = SVRules.defaultRules +defaultRules SW = SWRules.defaultRules defaultRules TA = TARules.defaultRules defaultRules TR = TRRules.defaultRules defaultRules UK = UKRules.defaultRules @@ -165,6 +167,7 @@ localeRules PT = PTRules.localeRules localeRules RO = RORules.localeRules localeRules RU = RURules.localeRules localeRules SV = SVRules.localeRules +localeRules SW = SWRules.localeRules localeRules TA = TARules.localeRules localeRules TR = TRRules.localeRules localeRules UK = UKRules.localeRules @@ -208,6 +211,7 @@ langRules PT = PTRules.langRules langRules RO = RORules.langRules langRules RU = RURules.langRules langRules SV = SVRules.langRules +langRules SW = SWRules.langRules langRules TA = TARules.langRules langRules TR = TRRules.langRules langRules UK = UKRules.langRules diff --git a/Duckling/Rules/SW.hs b/Duckling/Rules/SW.hs new file mode 100644 index 000000000..5962bb6c3 --- /dev/null +++ b/Duckling/Rules/SW.hs @@ -0,0 +1,46 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. An additional grant +-- of patent rights can be found in the PATENTS file in the same directory. + + +{-# LANGUAGE GADTs #-} + + +module Duckling.Rules.SW + ( defaultRules + , langRules + , localeRules + ) where + +import Duckling.Dimensions.Types +import Duckling.Locale +import Duckling.Types +import qualified Duckling.Numeral.SW.Rules as Numeral + +defaultRules :: Some Dimension -> [Rule] +defaultRules = langRules + +localeRules :: Region -> Some Dimension -> [Rule] +localeRules region (This (CustomDimension dim)) = dimLocaleRules region dim +localeRules _ _ = [] + +langRules :: Some Dimension -> [Rule] +langRules (This AmountOfMoney) = [] +langRules (This CreditCardNumber) = [] +langRules (This Distance) = [] +langRules (This Duration) = [] +langRules (This Email) = [] +langRules (This Numeral) = Numeral.rules +langRules (This Ordinal) = [] +langRules (This PhoneNumber) = [] +langRules (This Quantity) = [] +langRules (This RegexMatch) = [] +langRules (This Temperature) = [] +langRules (This Time) = [] +langRules (This TimeGrain) = [] +langRules (This Url) = [] +langRules (This Volume) = [] +langRules (This (CustomDimension dim)) = dimLangRules SW dim diff --git a/duckling.cabal b/duckling.cabal index 2ec285ba6..44294ac2c 100644 --- a/duckling.cabal +++ b/duckling.cabal @@ -85,6 +85,7 @@ library , Duckling.Rules.RO , Duckling.Rules.RU , Duckling.Rules.SV + , Duckling.Rules.SW , Duckling.Rules.TA , Duckling.Rules.TR , Duckling.Rules.UK @@ -138,6 +139,7 @@ library , Duckling.Ranking.Classifiers.RO_XX , Duckling.Ranking.Classifiers.RU_XX , Duckling.Ranking.Classifiers.SV_XX + , Duckling.Ranking.Classifiers.SW_XX , Duckling.Ranking.Classifiers.TA_XX , Duckling.Ranking.Classifiers.TR_XX , Duckling.Ranking.Classifiers.UK_XX @@ -442,6 +444,8 @@ library , Duckling.Numeral.RU.Rules , Duckling.Numeral.SV.Corpus , Duckling.Numeral.SV.Rules + , Duckling.Numeral.SW.Corpus + , Duckling.Numeral.SW.Rules , Duckling.Numeral.TA.Corpus , Duckling.Numeral.TA.Rules , Duckling.Numeral.TR.Corpus @@ -924,6 +928,7 @@ test-suite duckling-test , Duckling.Numeral.RO.Tests , Duckling.Numeral.RU.Tests , Duckling.Numeral.SV.Tests + , Duckling.Numeral.SW.Tests , Duckling.Numeral.TA.Tests , Duckling.Numeral.TR.Tests , Duckling.Numeral.UK.Tests diff --git a/exe/Duckling/Ranking/Generate.hs b/exe/Duckling/Ranking/Generate.hs index a2e63953d..cfecc057f 100644 --- a/exe/Duckling/Ranking/Generate.hs +++ b/exe/Duckling/Ranking/Generate.hs @@ -206,6 +206,7 @@ getCorpusForLang PT = PTTime.corpus getCorpusForLang RO = ROTime.corpus getCorpusForLang RU = (testContext, testOptions, []) getCorpusForLang SV = SVTime.corpus +getCorpusForLang SW = (testContext, testOptions, []) getCorpusForLang TA = (testContext, testOptions, []) getCorpusForLang TR = (testContext, testOptions, []) getCorpusForLang UK = UKTime.corpus diff --git a/tests/Duckling/Numeral/SW/Tests.hs b/tests/Duckling/Numeral/SW/Tests.hs new file mode 100644 index 000000000..89f437537 --- /dev/null +++ b/tests/Duckling/Numeral/SW/Tests.hs @@ -0,0 +1,22 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. An additional grant +-- of patent rights can be found in the PATENTS file in the same directory. + +module Duckling.Numeral.SW.Tests + ( tests ) where + +import Data.String +import Prelude +import Test.Tasty + +import Duckling.Dimensions.Types +import Duckling.Numeral.SW.Corpus +import Duckling.Testing.Asserts + +tests :: TestTree +tests = testGroup "SW Tests" + [ makeCorpusTest [This Numeral] corpus + ] diff --git a/tests/Duckling/Numeral/Tests.hs b/tests/Duckling/Numeral/Tests.hs index 84002d46d..0e65b20c8 100644 --- a/tests/Duckling/Numeral/Tests.hs +++ b/tests/Duckling/Numeral/Tests.hs @@ -48,6 +48,7 @@ import qualified Duckling.Numeral.PT.Tests as PT import qualified Duckling.Numeral.RO.Tests as RO import qualified Duckling.Numeral.RU.Tests as RU import qualified Duckling.Numeral.SV.Tests as SV +import qualified Duckling.Numeral.SW.Tests as SW import qualified Duckling.Numeral.TA.Tests as TA import qualified Duckling.Numeral.TR.Tests as TR import qualified Duckling.Numeral.UK.Tests as UK @@ -92,6 +93,7 @@ tests = testGroup "Numeral Tests" , RO.tests , RU.tests , SV.tests + , SW.tests , TA.tests , TR.tests , UK.tests