forked from facebook/duckling
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fbshipit-source-id: 301a10f448e9623aa1c953544f42de562909e192
- Loading branch information
FBShipIt
committed
Mar 8, 2017
0 parents
commit 3f8e52e
Showing
514 changed files
with
87,609 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
-- Copyright (c) 2016-present, Facebook, Inc. | ||
-- All rights reserved. | ||
-- | ||
-- This source code is licensed under the BSD-style license found in the | ||
-- LICENSE file in the root directory of this source tree. An additional grant | ||
-- of patent rights can be found in the PATENTS file in the same directory. | ||
|
||
|
||
{-# LANGUAGE RecordWildCards #-} | ||
{-# LANGUAGE NoRebindableSyntax #-} | ||
|
||
module Duckling.Api | ||
( analyze | ||
, formatToken | ||
, parse | ||
, supportedDimensions | ||
) where | ||
|
||
import Data.HashMap.Strict (HashMap) | ||
import qualified Data.HashMap.Strict as HashMap | ||
import Data.HashSet (HashSet) | ||
import qualified Data.HashSet as HashSet | ||
import Data.Text (Text) | ||
import qualified Data.Text as Text | ||
import Prelude | ||
import TextShow | ||
|
||
import Duckling.Dimensions.Types | ||
import Duckling.Dimensions | ||
import Duckling.Engine | ||
import Duckling.Lang | ||
import Duckling.Ranking.Classifiers | ||
import Duckling.Ranking.Rank | ||
import Duckling.Resolve | ||
import Duckling.Rules | ||
import Duckling.Types | ||
|
||
-- | Parses `input` and returns a curated list of entities found. | ||
parse :: Text -> Context -> [Some Dimension] -> [Entity] | ||
parse input ctx = map (formatToken input) . analyze input ctx . HashSet.fromList | ||
|
||
supportedDimensions :: HashMap Lang [Some Dimension] | ||
supportedDimensions = | ||
HashMap.fromList [ (l, allDimensions l) | l <- [minBound..maxBound] ] | ||
|
||
-- | Returns a curated list of resolved tokens found | ||
-- When `targets` is non-empty, returns only tokens of such dimensions. | ||
analyze :: Text -> Context -> HashSet (Some Dimension) -> [ResolvedToken] | ||
analyze input context@Context{..} targets = | ||
rank (classifiers lang) targets | ||
. filter (\(Resolved{node = Node{token = (Token d _)}}) -> | ||
HashSet.null targets || HashSet.member (Some d) targets | ||
) | ||
$ parseAndResolve (rulesFor lang targets) input context | ||
|
||
-- | Converts the resolved token to the API format | ||
formatToken :: Text -> ResolvedToken -> Entity | ||
formatToken sentence (Resolved (Range start end) (Node{token=Token dimension _}) jsonValue) = | ||
Entity (showt dimension) body val start end | ||
where | ||
body = Text.drop start $ Text.take end sentence | ||
val = toJText jsonValue |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
-- Copyright (c) 2016-present, Facebook, Inc. | ||
-- All rights reserved. | ||
-- | ||
-- This source code is licensed under the BSD-style license found in the | ||
-- LICENSE file in the root directory of this source tree. An additional grant | ||
-- of patent rights can be found in the PATENTS file in the same directory. | ||
|
||
|
||
{-# LANGUAGE NoRebindableSyntax #-} | ||
{-# LANGUAGE OverloadedStrings #-} | ||
{-# LANGUAGE TupleSections #-} | ||
|
||
module Duckling.Api.Tests (tests) where | ||
|
||
import qualified Data.HashMap.Strict as HashMap | ||
import qualified Data.HashSet as HashSet | ||
import Data.List (sortOn) | ||
import Data.Text (Text) | ||
import Prelude | ||
import Test.Tasty | ||
import Test.Tasty.HUnit | ||
|
||
import Duckling.Api | ||
import Duckling.Dimensions.Types | ||
import Duckling.Lang | ||
import qualified Duckling.Number.Types as TNumber | ||
import Duckling.Testing.Asserts | ||
import Duckling.Testing.Types | ||
import Duckling.Types | ||
|
||
tests :: TestTree | ||
tests = testGroup "API Tests" | ||
[ parseTest | ||
, rankTest | ||
, rangeTest | ||
, supportedDimensionsTest | ||
] | ||
|
||
parseTest :: TestTree | ||
parseTest = testCase "Parse Test" $ | ||
case parse sentence testContext [Some DNumber] of | ||
[] -> assertFailure "empty result" | ||
(Entity dim body value start end:_) -> do | ||
assertEqual "dim" "number" dim | ||
assertEqual "body" "42" body | ||
assertEqual "value" val value | ||
assertEqual "start" 4 start | ||
assertEqual "end" 6 end | ||
where | ||
sentence = "hey 42 there" | ||
val = toJText TNumber.NumberValue {TNumber.vValue = 42.0} | ||
|
||
rankTest :: TestTree | ||
rankTest = testGroup "Rank Tests" | ||
[ rankFilterTest | ||
, rankOrderTest | ||
] | ||
|
||
rankFilterTest :: TestTree | ||
rankFilterTest = testCase "Rank Filter Tests" $ do | ||
mapM_ check | ||
[ ( "in 2 minutes" | ||
, [Some DNumber, Some Duration, Some Time] | ||
, [Some Time] | ||
) | ||
, ( "in 2 minutes, about 42 degrees" | ||
, [Some DNumber, Some Temperature, Some Time] | ||
, [Some Time, Some Temperature] | ||
) | ||
, ( "today works... and tomorrow at 9pm too" | ||
, [Some DNumber, Some Time] | ||
, [Some Time, Some Time] | ||
) | ||
, ( "between 9:30 and 11:00 on thursday or Saturday and Thanksgiving Day" | ||
, [Some DNumber, Some Time] | ||
, [Some Time, Some Time, Some Time] | ||
) | ||
, ("the day after tomorrow 5pm", [Some Time], [Some Time]) | ||
, ("the day after tomorrow 5pm", [Some Time, Some DNumber], [Some Time]) | ||
, ("the day after tomorrow 5pm", [], [Some Time]) | ||
] | ||
where | ||
check :: (Text, [Some Dimension], [Some Dimension]) -> IO () | ||
check (sentence, targets, expected) = | ||
let go = analyze sentence testContext $ HashSet.fromList targets | ||
actual = flip map go $ | ||
\(Resolved{node=Node{token=Token d _}}) -> Some d | ||
in assertEqual ("wrong winners for " ++ show sentence) expected actual | ||
|
||
rankOrderTest :: TestTree | ||
rankOrderTest = testCase "Rank Order Tests" $ do | ||
mapM_ check | ||
[ ("tomorrow at 5PM or 8PM", [Some Time]) | ||
, ("321 12 3456 ... 7", [Some DNumber]) | ||
, ("42 today 23 tomorrow", [Some DNumber, Some Time]) | ||
] | ||
where | ||
check (s, targets) = | ||
let tokens = analyze s testContext $ HashSet.fromList targets | ||
in assertEqual "wrong ordering" (sortOn range tokens) tokens | ||
|
||
rangeTest :: TestTree | ||
rangeTest = testCase "Range Tests" $ do | ||
mapM_ (analyzedFirstTest testContext) xs | ||
where | ||
xs = map (\(input, targets, range) -> (input, targets, f range)) | ||
[ ( "order status 3233763377", [Some PhoneNumber], Range 13 23 ) | ||
, ( " 3233763377 " , [Some PhoneNumber], Range 2 12 ) | ||
, ( " -3233763377" , [Some PhoneNumber], Range 2 12 ) | ||
, ( " now" , [Some Time] , Range 2 5 ) | ||
, ( " Monday " , [Some Time] , Range 3 9 ) | ||
, ( " next week " , [Some Time] , Range 2 13 ) | ||
, ( " 42\n\n" , [Some DNumber] , Range 3 5 ) | ||
] | ||
f :: Range -> TestPredicate | ||
f expected _ (Resolved {range = actual}) = expected == actual | ||
|
||
supportedDimensionsTest :: TestTree | ||
supportedDimensionsTest = testCase "Supported Dimensions Test" $ do | ||
mapM_ check | ||
[ ( AR | ||
, [ Some Email, Some Finance, Some PhoneNumber, Some Url, Some DNumber | ||
, Some Ordinal | ||
] | ||
) | ||
, ( PL | ||
, [ Some Email, Some Finance, Some PhoneNumber, Some Url, Some Duration | ||
, Some DNumber, Some Ordinal, Some Time | ||
] | ||
) | ||
] | ||
where | ||
check :: (Lang, [Some Dimension]) -> IO () | ||
check (l, expected) = case HashMap.lookup l supportedDimensions of | ||
Nothing -> assertFailure $ "no dimensions for " ++ show l | ||
Just actual -> | ||
assertEqual ("wrong dimensions for " ++ show l) expected actual |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
-- Copyright (c) 2016-present, Facebook, Inc. | ||
-- All rights reserved. | ||
-- | ||
-- This source code is licensed under the BSD-style license found in the | ||
-- LICENSE file in the root directory of this source tree. An additional grant | ||
-- of patent rights can be found in the PATENTS file in the same directory. | ||
|
||
|
||
{-# LANGUAGE NoRebindableSyntax #-} | ||
|
||
-- | Everything needed to run Duckling. | ||
|
||
module Duckling.Core | ||
( Context(..) | ||
, Dimension(..) | ||
, Entity(..) | ||
, Lang(..) | ||
, Some(..) | ||
|
||
-- Duckling API | ||
, parse | ||
, supportedDimensions | ||
|
||
-- Reference time builders | ||
, currentReftime | ||
, makeReftime | ||
) where | ||
|
||
import Data.HashMap.Strict (HashMap) | ||
import qualified Data.HashMap.Strict as HashMap | ||
import Data.Maybe | ||
import Data.Text (Text) | ||
import Data.Time | ||
import Data.Time.LocalTime.TimeZone.Series | ||
import Prelude | ||
|
||
import Duckling.Api | ||
import Duckling.Dimensions.Types | ||
import Duckling.Lang | ||
import Duckling.Resolve | ||
import Duckling.Types | ||
|
||
-- | Builds a `DucklingTime` for timezone `tz` at `utcTime`. | ||
-- If no `series` found for `tz`, uses UTC. | ||
makeReftime :: HashMap Text TimeZoneSeries -> Text -> UTCTime -> DucklingTime | ||
makeReftime series tz utcTime = DucklingTime $ ZoneSeriesTime ducklingTime tzs | ||
where | ||
tzs = fromMaybe (TimeZoneSeries utc []) $ HashMap.lookup tz series | ||
ducklingTime = toUTC $ utcToLocalTime' tzs utcTime | ||
|
||
-- | Builds a `DucklingTime` for timezone `tz` at current time. | ||
-- If no `series` found for `tz`, uses UTC. | ||
currentReftime :: HashMap Text TimeZoneSeries -> Text -> IO DucklingTime | ||
currentReftime series tz = do | ||
utcNow <- getCurrentTime | ||
return $ makeReftime series tz utcNow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
-- Copyright (c) 2016-present, Facebook, Inc. | ||
-- All rights reserved. | ||
-- | ||
-- This source code is licensed under the BSD-style license found in the | ||
-- LICENSE file in the root directory of this source tree. An additional grant | ||
-- of patent rights can be found in the PATENTS file in the same directory. | ||
|
||
|
||
{-# LANGUAGE OverloadedStrings #-} | ||
|
||
module Duckling.Data.TimeZone | ||
( loadTimeZoneSeries | ||
) where | ||
|
||
import qualified Control.Exception as E | ||
import Control.Monad.Extra | ||
import Data.Either | ||
import Data.HashMap.Strict (HashMap) | ||
import qualified Data.HashMap.Strict as HashMap | ||
import qualified Data.HashSet as HashSet | ||
import Data.String | ||
import qualified Data.Text as Text | ||
import Data.Time (TimeZone(..)) | ||
import Data.Text (Text) | ||
import Data.Time.LocalTime.TimeZone.Olson | ||
import Data.Time.LocalTime.TimeZone.Series | ||
import System.Directory | ||
import System.FilePath | ||
|
||
import Prelude | ||
|
||
-- | Reference implementation for pulling TimeZoneSeries data from local | ||
-- Olson files. | ||
-- Many linux distros have Olson data in "/usr/share/zoneinfo/" | ||
loadTimeZoneSeries :: FilePath -> IO (HashMap Text TimeZoneSeries) | ||
loadTimeZoneSeries base = do | ||
files <- getFiles base | ||
tzSeries <- mapM parseOlsonFile files | ||
-- This data is large, will live a long time, and essentially be constant, | ||
-- so it's a perfect candidate for compact regions | ||
return $ HashMap.fromList $ rights tzSeries | ||
where | ||
-- Multiple versions of the data can exist. We intentionally ignore the | ||
-- posix and right formats | ||
ignored_dirs = HashSet.fromList $ map (base </>) | ||
[ "posix", "right" ] | ||
|
||
-- Recursively crawls a directory to list every file underneath it, | ||
-- ignoring certain directories as needed | ||
getFiles :: FilePath -> IO [FilePath] | ||
getFiles dir = do | ||
fsAll <- getDirectoryContents dir | ||
let | ||
fs = filter notDotFile fsAll | ||
full_fs = map (dir </>) fs | ||
(dirs, files) <- partitionM doesDirectoryExist full_fs | ||
|
||
subdirs <- concatMapM getFiles | ||
[ d | d <- dirs, not $ HashSet.member d ignored_dirs ] | ||
|
||
return $ files ++ subdirs | ||
|
||
-- Attempts to read a file in Olson format and returns its | ||
-- canonical name (file path relative to the base) and the data | ||
parseOlsonFile :: FilePath | ||
-> IO (Either E.ErrorCall (Text, TimeZoneSeries)) | ||
parseOlsonFile f = E.try $ do | ||
r <- getTimeZoneSeriesFromOlsonFile f | ||
return (Text.pack $ makeRelative base f, r) | ||
|
||
notDotFile s = not $ elem s [".", ".."] |
Oops, something went wrong.