Initial commit

fbshipit-source-id: 301a10f448e9623aa1c953544f42de562909e192
reinfer · Mar 8, 2017 · 3f8e52e · 3f8e52e
commit 3f8e52e
Show file tree

Hide file tree

Showing 514 changed files with 87,609 additions and 0 deletions.
diff --git a/Duckling/Api.hs b/Duckling/Api.hs
@@ -0,0 +1,62 @@
+-- Copyright (c) 2016-present, Facebook, Inc.
+-- All rights reserved.
+--
+-- This source code is licensed under the BSD-style license found in the
+-- LICENSE file in the root directory of this source tree. An additional grant
+-- of patent rights can be found in the PATENTS file in the same directory.
+
+
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE NoRebindableSyntax #-}
+
+module Duckling.Api
+  ( analyze
+  , formatToken
+  , parse
+  , supportedDimensions
+  ) where
+
+import Data.HashMap.Strict (HashMap)
+import qualified Data.HashMap.Strict as HashMap
+import Data.HashSet (HashSet)
+import qualified Data.HashSet as HashSet
+import Data.Text (Text)
+import qualified Data.Text as Text
+import Prelude
+import TextShow
+
+import Duckling.Dimensions.Types
+import Duckling.Dimensions
+import Duckling.Engine
+import Duckling.Lang
+import Duckling.Ranking.Classifiers
+import Duckling.Ranking.Rank
+import Duckling.Resolve
+import Duckling.Rules
+import Duckling.Types
+
+-- | Parses `input` and returns a curated list of entities found.
+parse :: Text -> Context -> [Some Dimension] -> [Entity]
+parse input ctx = map (formatToken input) . analyze input ctx . HashSet.fromList
+
+supportedDimensions :: HashMap Lang [Some Dimension]
+supportedDimensions =
+  HashMap.fromList [ (l, allDimensions l) | l <- [minBound..maxBound] ]
+
+-- | Returns a curated list of resolved tokens found
+-- When `targets` is non-empty, returns only tokens of such dimensions.
+analyze :: Text -> Context -> HashSet (Some Dimension) -> [ResolvedToken]
+analyze input context@Context{..} targets =
+  rank (classifiers lang) targets
+  . filter (\(Resolved{node = Node{token = (Token d _)}}) ->
+      HashSet.null targets || HashSet.member (Some d) targets
+    )
+  $ parseAndResolve (rulesFor lang targets) input context
+
+-- | Converts the resolved token to the API format
+formatToken :: Text -> ResolvedToken -> Entity
+formatToken sentence (Resolved (Range start end) (Node{token=Token dimension _}) jsonValue) =
+  Entity (showt dimension) body val start end
+  where
+    body = Text.drop start $ Text.take end sentence
+    val = toJText jsonValue
diff --git a/Duckling/Api/Tests.hs b/Duckling/Api/Tests.hs
@@ -0,0 +1,137 @@
+-- Copyright (c) 2016-present, Facebook, Inc.
+-- All rights reserved.
+--
+-- This source code is licensed under the BSD-style license found in the
+-- LICENSE file in the root directory of this source tree. An additional grant
+-- of patent rights can be found in the PATENTS file in the same directory.
+
+
+{-# LANGUAGE NoRebindableSyntax #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TupleSections #-}
+
+module Duckling.Api.Tests (tests) where
+
+import qualified Data.HashMap.Strict as HashMap
+import qualified Data.HashSet as HashSet
+import Data.List (sortOn)
+import Data.Text (Text)
+import Prelude
+import Test.Tasty
+import Test.Tasty.HUnit
+
+import Duckling.Api
+import Duckling.Dimensions.Types
+import Duckling.Lang
+import qualified Duckling.Number.Types as TNumber
+import Duckling.Testing.Asserts
+import Duckling.Testing.Types
+import Duckling.Types
+
+tests :: TestTree
+tests = testGroup "API Tests"
+  [ parseTest
+  , rankTest
+  , rangeTest
+  , supportedDimensionsTest
+  ]
+
+parseTest :: TestTree
+parseTest = testCase "Parse Test" $
+  case parse sentence testContext [Some DNumber] of
+    [] -> assertFailure "empty result"
+    (Entity dim body value start end:_) -> do
+      assertEqual "dim" "number" dim
+      assertEqual "body" "42" body
+      assertEqual "value" val value
+      assertEqual "start" 4 start
+      assertEqual "end" 6 end
+  where
+    sentence = "hey 42 there"
+    val = toJText TNumber.NumberValue {TNumber.vValue = 42.0}
+
+rankTest :: TestTree
+rankTest = testGroup "Rank Tests"
+  [ rankFilterTest
+  , rankOrderTest
+  ]
+
+rankFilterTest :: TestTree
+rankFilterTest = testCase "Rank Filter Tests" $ do
+  mapM_ check
+    [ ( "in 2 minutes"
+      , [Some DNumber, Some Duration, Some Time]
+      , [Some Time]
+      )
+    , ( "in 2 minutes, about 42 degrees"
+      , [Some DNumber, Some Temperature, Some Time]
+      , [Some Time, Some Temperature]
+      )
+    , ( "today works... and tomorrow at 9pm too"
+      , [Some DNumber, Some Time]
+      , [Some Time, Some Time]
+      )
+    , ( "between 9:30 and 11:00 on thursday or Saturday and Thanksgiving Day"
+      , [Some DNumber, Some Time]
+      , [Some Time, Some Time, Some Time]
+      )
+    , ("the day after tomorrow 5pm", [Some Time], [Some Time])
+    , ("the day after tomorrow 5pm", [Some Time, Some DNumber], [Some Time])
+    , ("the day after tomorrow 5pm", [], [Some Time])
+    ]
+  where
+    check :: (Text, [Some Dimension], [Some Dimension]) -> IO ()
+    check (sentence, targets, expected) =
+      let go = analyze sentence testContext $ HashSet.fromList targets
+          actual = flip map go $
+                     \(Resolved{node=Node{token=Token d _}}) -> Some d
+      in assertEqual ("wrong winners for " ++ show sentence) expected actual
+
+rankOrderTest :: TestTree
+rankOrderTest = testCase "Rank Order Tests" $ do
+  mapM_ check
+    [ ("tomorrow at 5PM or 8PM", [Some Time])
+    , ("321 12 3456 ... 7", [Some DNumber])
+    , ("42 today 23 tomorrow", [Some DNumber, Some Time])
+    ]
+  where
+    check (s, targets) =
+      let tokens = analyze s testContext $ HashSet.fromList targets
+        in assertEqual "wrong ordering" (sortOn range tokens) tokens
+
+rangeTest :: TestTree
+rangeTest = testCase "Range Tests" $ do
+  mapM_ (analyzedFirstTest testContext) xs
+  where
+    xs = map (\(input, targets, range) -> (input, targets, f range))
+             [ ( "order status 3233763377", [Some PhoneNumber], Range 13 23 )
+             , ( "  3233763377  "         , [Some PhoneNumber], Range  2 12 )
+             , ( " -3233763377"           , [Some PhoneNumber], Range  2 12 )
+             , ( "  now"                  , [Some Time]       , Range  2  5 )
+             , ( "   Monday  "            , [Some Time]       , Range  3  9 )
+             , ( "  next   week "         , [Some Time]       , Range  2 13 )
+             , ( "   42\n\n"              , [Some DNumber]    , Range  3  5 )
+             ]
+    f :: Range -> TestPredicate
+    f expected _ (Resolved {range = actual}) = expected == actual
+
+supportedDimensionsTest :: TestTree
+supportedDimensionsTest = testCase "Supported Dimensions Test" $ do
+  mapM_ check
+    [ ( AR
+      , [ Some Email, Some Finance, Some PhoneNumber, Some Url, Some DNumber
+        , Some Ordinal
+        ]
+      )
+    , ( PL
+      , [ Some Email, Some Finance, Some PhoneNumber, Some Url, Some Duration
+        , Some DNumber, Some Ordinal, Some Time
+        ]
+      )
+    ]
+  where
+    check :: (Lang, [Some Dimension]) -> IO ()
+    check (l, expected) = case HashMap.lookup l supportedDimensions of
+      Nothing -> assertFailure $ "no dimensions for " ++ show l
+      Just actual ->
+        assertEqual ("wrong dimensions for " ++ show l) expected actual
diff --git a/Duckling/Core.hs b/Duckling/Core.hs
@@ -0,0 +1,56 @@
+-- Copyright (c) 2016-present, Facebook, Inc.
+-- All rights reserved.
+--
+-- This source code is licensed under the BSD-style license found in the
+-- LICENSE file in the root directory of this source tree. An additional grant
+-- of patent rights can be found in the PATENTS file in the same directory.
+
+
+{-# LANGUAGE NoRebindableSyntax #-}
+
+-- | Everything needed to run Duckling.
+
+module Duckling.Core
+  ( Context(..)
+  , Dimension(..)
+  , Entity(..)
+  , Lang(..)
+  , Some(..)
+
+  -- Duckling API
+  , parse
+  , supportedDimensions
+
+  -- Reference time builders
+  , currentReftime
+  , makeReftime
+  ) where
+
+import Data.HashMap.Strict (HashMap)
+import qualified Data.HashMap.Strict as HashMap
+import Data.Maybe
+import Data.Text (Text)
+import Data.Time
+import Data.Time.LocalTime.TimeZone.Series
+import Prelude
+
+import Duckling.Api
+import Duckling.Dimensions.Types
+import Duckling.Lang
+import Duckling.Resolve
+import Duckling.Types
+
+-- | Builds a `DucklingTime` for timezone `tz` at `utcTime`.
+-- If no `series` found for `tz`, uses UTC.
+makeReftime :: HashMap Text TimeZoneSeries -> Text -> UTCTime -> DucklingTime
+makeReftime series tz utcTime = DucklingTime $ ZoneSeriesTime ducklingTime tzs
+  where
+    tzs = fromMaybe (TimeZoneSeries utc []) $ HashMap.lookup tz series
+    ducklingTime = toUTC $ utcToLocalTime' tzs utcTime
+
+-- | Builds a `DucklingTime` for timezone `tz` at current time.
+-- If no `series` found for `tz`, uses UTC.
+currentReftime :: HashMap Text TimeZoneSeries -> Text -> IO DucklingTime
+currentReftime series tz = do
+  utcNow <- getCurrentTime
+  return $ makeReftime series tz utcNow
diff --git a/Duckling/Data/TimeZone.hs b/Duckling/Data/TimeZone.hs
@@ -0,0 +1,71 @@
+-- Copyright (c) 2016-present, Facebook, Inc.
+-- All rights reserved.
+--
+-- This source code is licensed under the BSD-style license found in the
+-- LICENSE file in the root directory of this source tree. An additional grant
+-- of patent rights can be found in the PATENTS file in the same directory.
+
+
+{-# LANGUAGE OverloadedStrings #-}
+
+module Duckling.Data.TimeZone
+  ( loadTimeZoneSeries
+  ) where
+
+import qualified Control.Exception as E
+import Control.Monad.Extra
+import Data.Either
+import Data.HashMap.Strict (HashMap)
+import qualified Data.HashMap.Strict as HashMap
+import qualified Data.HashSet as HashSet
+import Data.String
+import qualified Data.Text as Text
+import Data.Time (TimeZone(..))
+import Data.Text (Text)
+import Data.Time.LocalTime.TimeZone.Olson
+import Data.Time.LocalTime.TimeZone.Series
+import System.Directory
+import System.FilePath
+
+import Prelude
+
+-- | Reference implementation for pulling TimeZoneSeries data from local
+-- Olson files.
+-- Many linux distros have Olson data in "/usr/share/zoneinfo/"
+loadTimeZoneSeries :: FilePath -> IO (HashMap Text TimeZoneSeries)
+loadTimeZoneSeries base = do
+  files <- getFiles base
+  tzSeries <- mapM parseOlsonFile files
+  -- This data is large, will live a long time, and essentially be constant,
+  -- so it's a perfect candidate for compact regions
+  return $ HashMap.fromList $ rights tzSeries
+  where
+    -- Multiple versions of the data can exist. We intentionally ignore the
+    -- posix and right formats
+    ignored_dirs = HashSet.fromList $ map (base </>)
+      [ "posix", "right" ]
+
+    -- Recursively crawls a directory to list every file underneath it,
+    -- ignoring certain directories as needed
+    getFiles :: FilePath -> IO [FilePath]
+    getFiles dir = do
+      fsAll <- getDirectoryContents dir
+      let
+        fs = filter notDotFile fsAll
+        full_fs = map (dir </>) fs
+      (dirs, files) <- partitionM doesDirectoryExist full_fs
+
+      subdirs <- concatMapM getFiles
+        [ d | d <- dirs, not $ HashSet.member d ignored_dirs ]
+
+      return $ files ++ subdirs
+
+    -- Attempts to read a file in Olson format and returns its
+    -- canonical name (file path relative to the base) and the data
+    parseOlsonFile :: FilePath
+                   -> IO (Either E.ErrorCall (Text, TimeZoneSeries))
+    parseOlsonFile f = E.try $ do
+      r <- getTimeZoneSeriesFromOlsonFile f
+      return (Text.pack $ makeRelative base f, r)
+
+    notDotFile s = not $ elem s [".", ".."]