Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
fbshipit-source-id: 301a10f448e9623aa1c953544f42de562909e192
  • Loading branch information
FBShipIt committed Mar 8, 2017
0 parents commit 3f8e52e
Show file tree
Hide file tree
Showing 514 changed files with 87,609 additions and 0 deletions.
62 changes: 62 additions & 0 deletions Duckling/Api.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.


{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE NoRebindableSyntax #-}

module Duckling.Api
( analyze
, formatToken
, parse
, supportedDimensions
) where

import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HashMap
import Data.HashSet (HashSet)
import qualified Data.HashSet as HashSet
import Data.Text (Text)
import qualified Data.Text as Text
import Prelude
import TextShow

import Duckling.Dimensions.Types
import Duckling.Dimensions
import Duckling.Engine
import Duckling.Lang
import Duckling.Ranking.Classifiers
import Duckling.Ranking.Rank
import Duckling.Resolve
import Duckling.Rules
import Duckling.Types

-- | Parses `input` and returns a curated list of entities found.
parse :: Text -> Context -> [Some Dimension] -> [Entity]
parse input ctx = map (formatToken input) . analyze input ctx . HashSet.fromList

supportedDimensions :: HashMap Lang [Some Dimension]
supportedDimensions =
HashMap.fromList [ (l, allDimensions l) | l <- [minBound..maxBound] ]

-- | Returns a curated list of resolved tokens found
-- When `targets` is non-empty, returns only tokens of such dimensions.
analyze :: Text -> Context -> HashSet (Some Dimension) -> [ResolvedToken]
analyze input context@Context{..} targets =
rank (classifiers lang) targets
. filter (\(Resolved{node = Node{token = (Token d _)}}) ->
HashSet.null targets || HashSet.member (Some d) targets
)
$ parseAndResolve (rulesFor lang targets) input context

-- | Converts the resolved token to the API format
formatToken :: Text -> ResolvedToken -> Entity
formatToken sentence (Resolved (Range start end) (Node{token=Token dimension _}) jsonValue) =
Entity (showt dimension) body val start end
where
body = Text.drop start $ Text.take end sentence
val = toJText jsonValue
137 changes: 137 additions & 0 deletions Duckling/Api/Tests.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.


{-# LANGUAGE NoRebindableSyntax #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}

module Duckling.Api.Tests (tests) where

import qualified Data.HashMap.Strict as HashMap
import qualified Data.HashSet as HashSet
import Data.List (sortOn)
import Data.Text (Text)
import Prelude
import Test.Tasty
import Test.Tasty.HUnit

import Duckling.Api
import Duckling.Dimensions.Types
import Duckling.Lang
import qualified Duckling.Number.Types as TNumber
import Duckling.Testing.Asserts
import Duckling.Testing.Types
import Duckling.Types

tests :: TestTree
tests = testGroup "API Tests"
[ parseTest
, rankTest
, rangeTest
, supportedDimensionsTest
]

parseTest :: TestTree
parseTest = testCase "Parse Test" $
case parse sentence testContext [Some DNumber] of
[] -> assertFailure "empty result"
(Entity dim body value start end:_) -> do
assertEqual "dim" "number" dim
assertEqual "body" "42" body
assertEqual "value" val value
assertEqual "start" 4 start
assertEqual "end" 6 end
where
sentence = "hey 42 there"
val = toJText TNumber.NumberValue {TNumber.vValue = 42.0}

rankTest :: TestTree
rankTest = testGroup "Rank Tests"
[ rankFilterTest
, rankOrderTest
]

rankFilterTest :: TestTree
rankFilterTest = testCase "Rank Filter Tests" $ do
mapM_ check
[ ( "in 2 minutes"
, [Some DNumber, Some Duration, Some Time]
, [Some Time]
)
, ( "in 2 minutes, about 42 degrees"
, [Some DNumber, Some Temperature, Some Time]
, [Some Time, Some Temperature]
)
, ( "today works... and tomorrow at 9pm too"
, [Some DNumber, Some Time]
, [Some Time, Some Time]
)
, ( "between 9:30 and 11:00 on thursday or Saturday and Thanksgiving Day"
, [Some DNumber, Some Time]
, [Some Time, Some Time, Some Time]
)
, ("the day after tomorrow 5pm", [Some Time], [Some Time])
, ("the day after tomorrow 5pm", [Some Time, Some DNumber], [Some Time])
, ("the day after tomorrow 5pm", [], [Some Time])
]
where
check :: (Text, [Some Dimension], [Some Dimension]) -> IO ()
check (sentence, targets, expected) =
let go = analyze sentence testContext $ HashSet.fromList targets
actual = flip map go $
\(Resolved{node=Node{token=Token d _}}) -> Some d
in assertEqual ("wrong winners for " ++ show sentence) expected actual

rankOrderTest :: TestTree
rankOrderTest = testCase "Rank Order Tests" $ do
mapM_ check
[ ("tomorrow at 5PM or 8PM", [Some Time])
, ("321 12 3456 ... 7", [Some DNumber])
, ("42 today 23 tomorrow", [Some DNumber, Some Time])
]
where
check (s, targets) =
let tokens = analyze s testContext $ HashSet.fromList targets
in assertEqual "wrong ordering" (sortOn range tokens) tokens

rangeTest :: TestTree
rangeTest = testCase "Range Tests" $ do
mapM_ (analyzedFirstTest testContext) xs
where
xs = map (\(input, targets, range) -> (input, targets, f range))
[ ( "order status 3233763377", [Some PhoneNumber], Range 13 23 )
, ( " 3233763377 " , [Some PhoneNumber], Range 2 12 )
, ( " -3233763377" , [Some PhoneNumber], Range 2 12 )
, ( " now" , [Some Time] , Range 2 5 )
, ( " Monday " , [Some Time] , Range 3 9 )
, ( " next week " , [Some Time] , Range 2 13 )
, ( " 42\n\n" , [Some DNumber] , Range 3 5 )
]
f :: Range -> TestPredicate
f expected _ (Resolved {range = actual}) = expected == actual

supportedDimensionsTest :: TestTree
supportedDimensionsTest = testCase "Supported Dimensions Test" $ do
mapM_ check
[ ( AR
, [ Some Email, Some Finance, Some PhoneNumber, Some Url, Some DNumber
, Some Ordinal
]
)
, ( PL
, [ Some Email, Some Finance, Some PhoneNumber, Some Url, Some Duration
, Some DNumber, Some Ordinal, Some Time
]
)
]
where
check :: (Lang, [Some Dimension]) -> IO ()
check (l, expected) = case HashMap.lookup l supportedDimensions of
Nothing -> assertFailure $ "no dimensions for " ++ show l
Just actual ->
assertEqual ("wrong dimensions for " ++ show l) expected actual
56 changes: 56 additions & 0 deletions Duckling/Core.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.


{-# LANGUAGE NoRebindableSyntax #-}

-- | Everything needed to run Duckling.

module Duckling.Core
( Context(..)
, Dimension(..)
, Entity(..)
, Lang(..)
, Some(..)

-- Duckling API
, parse
, supportedDimensions

-- Reference time builders
, currentReftime
, makeReftime
) where

import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HashMap
import Data.Maybe
import Data.Text (Text)
import Data.Time
import Data.Time.LocalTime.TimeZone.Series
import Prelude

import Duckling.Api
import Duckling.Dimensions.Types
import Duckling.Lang
import Duckling.Resolve
import Duckling.Types

-- | Builds a `DucklingTime` for timezone `tz` at `utcTime`.
-- If no `series` found for `tz`, uses UTC.
makeReftime :: HashMap Text TimeZoneSeries -> Text -> UTCTime -> DucklingTime
makeReftime series tz utcTime = DucklingTime $ ZoneSeriesTime ducklingTime tzs
where
tzs = fromMaybe (TimeZoneSeries utc []) $ HashMap.lookup tz series
ducklingTime = toUTC $ utcToLocalTime' tzs utcTime

-- | Builds a `DucklingTime` for timezone `tz` at current time.
-- If no `series` found for `tz`, uses UTC.
currentReftime :: HashMap Text TimeZoneSeries -> Text -> IO DucklingTime
currentReftime series tz = do
utcNow <- getCurrentTime
return $ makeReftime series tz utcNow
71 changes: 71 additions & 0 deletions Duckling/Data/TimeZone.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.


{-# LANGUAGE OverloadedStrings #-}

module Duckling.Data.TimeZone
( loadTimeZoneSeries
) where

import qualified Control.Exception as E
import Control.Monad.Extra
import Data.Either
import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HashMap
import qualified Data.HashSet as HashSet
import Data.String
import qualified Data.Text as Text
import Data.Time (TimeZone(..))
import Data.Text (Text)
import Data.Time.LocalTime.TimeZone.Olson
import Data.Time.LocalTime.TimeZone.Series
import System.Directory
import System.FilePath

import Prelude

-- | Reference implementation for pulling TimeZoneSeries data from local
-- Olson files.
-- Many linux distros have Olson data in "/usr/share/zoneinfo/"
loadTimeZoneSeries :: FilePath -> IO (HashMap Text TimeZoneSeries)
loadTimeZoneSeries base = do
files <- getFiles base
tzSeries <- mapM parseOlsonFile files
-- This data is large, will live a long time, and essentially be constant,
-- so it's a perfect candidate for compact regions
return $ HashMap.fromList $ rights tzSeries
where
-- Multiple versions of the data can exist. We intentionally ignore the
-- posix and right formats
ignored_dirs = HashSet.fromList $ map (base </>)
[ "posix", "right" ]

-- Recursively crawls a directory to list every file underneath it,
-- ignoring certain directories as needed
getFiles :: FilePath -> IO [FilePath]
getFiles dir = do
fsAll <- getDirectoryContents dir
let
fs = filter notDotFile fsAll
full_fs = map (dir </>) fs
(dirs, files) <- partitionM doesDirectoryExist full_fs

subdirs <- concatMapM getFiles
[ d | d <- dirs, not $ HashSet.member d ignored_dirs ]

return $ files ++ subdirs

-- Attempts to read a file in Olson format and returns its
-- canonical name (file path relative to the base) and the data
parseOlsonFile :: FilePath
-> IO (Either E.ErrorCall (Text, TimeZoneSeries))
parseOlsonFile f = E.try $ do
r <- getTimeZoneSeriesFromOlsonFile f
return (Text.pack $ makeRelative base f, r)

notDotFile s = not $ elem s [".", ".."]
Loading

0 comments on commit 3f8e52e

Please sign in to comment.