From a707be9eae0fdd98b66ed1b658be92c51151f065 Mon Sep 17 00:00:00 2001 From: Enzo Lebrun Date: Mon, 14 Jan 2019 09:25:38 -0800 Subject: [PATCH] Value before month in zh can be an integers or a chinese char (#301) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: May 2019 is more often written like' 2019年5月' than ' 2019年五月' so the corresponding integer are added in the regex's rule ruleMonths. Pull Request resolved: https://github.com/facebook/duckling/pull/301 Reviewed By: haoxuany Differential Revision: D13606127 Pulled By: patapizza fbshipit-source-id: 7dd2536e36711b4be42a93419b08dbd66cf31523 --- Duckling/Ranking/Classifiers/ZH_CN.hs | 226 ++++++++++++-------------- Duckling/Ranking/Classifiers/ZH_HK.hs | 226 ++++++++++++-------------- Duckling/Ranking/Classifiers/ZH_MO.hs | 226 ++++++++++++-------------- Duckling/Ranking/Classifiers/ZH_TW.hs | 226 ++++++++++++-------------- Duckling/Ranking/Classifiers/ZH_XX.hs | 224 ++++++++++++------------- Duckling/Time/ZH/Corpus.hs | 13 +- Duckling/Time/ZH/Rules.hs | 21 +-- 7 files changed, 514 insertions(+), 648 deletions(-) diff --git a/Duckling/Ranking/Classifiers/ZH_CN.hs b/Duckling/Ranking/Classifiers/ZH_CN.hs index 8ba4e8947..8ef1e9aa7 100644 --- a/Duckling/Ranking/Classifiers/ZH_CN.hs +++ b/Duckling/Ranking/Classifiers/ZH_CN.hs @@ -41,10 +41,11 @@ classifiers likelihoods = HashMap.fromList [], n = 0}}), ("integer (numeric)", Classifier{okData = - ClassData{prior = -0.5020919437972361, unseen = -3.871201010907891, - likelihoods = HashMap.fromList [("", 0.0)], n = 46}, + ClassData{prior = -0.48550781578170077, + unseen = -3.912023005428146, + likelihoods = HashMap.fromList [("", 0.0)], n = 48}, koData = - ClassData{prior = -0.9295359586241757, + ClassData{prior = -0.9555114450274363, unseen = -3.4657359027997265, likelihoods = HashMap.fromList [("", 0.0)], n = 30}}), ("the day before yesterday", @@ -100,22 +101,15 @@ classifiers koData = ClassData{prior = -infinity, unseen = -0.6931471805599453, likelihoods = HashMap.fromList [], n = 0}}), - ("October", - Classifier{okData = - ClassData{prior = 0.0, unseen = -1.791759469228055, - likelihoods = HashMap.fromList [("", 0.0)], n = 4}, - koData = - ClassData{prior = -infinity, unseen = -0.6931471805599453, - likelihoods = HashMap.fromList [], n = 0}}), ("month (grain)", Classifier{okData = - ClassData{prior = -1.0076405104623831, + ClassData{prior = -1.0541605260972757, unseen = -3.2188758248682006, likelihoods = HashMap.fromList [("", 0.0)], n = 23}, koData = - ClassData{prior = -0.45425527227759643, - unseen = -3.7376696182833684, - likelihoods = HashMap.fromList [("", 0.0)], n = 40}}), + ClassData{prior = -0.42845462633286313, + unseen = -3.8066624897703196, + likelihoods = HashMap.fromList [("", 0.0)], n = 43}}), (" o'clock", Classifier{okData = ClassData{prior = -0.4418327522790392, unseen = -3.044522437723423, @@ -169,45 +163,45 @@ classifiers likelihoods = HashMap.fromList [], n = 0}}), ("intersect", Classifier{okData = - ClassData{prior = -0.3519764231571781, unseen = -4.983606621708336, + ClassData{prior = -0.6061358035703156, unseen = -4.61512051684126, likelihoods = HashMap.fromList [("\20799\31461\33410 ", - -3.367295829986474), + -2.995732273553991), ("year (numeric with year symbol) ", - -1.7578579175523736), - ("dayday", -1.6094379124341003), - ("hourhour", -3.367295829986474), - ("hourminute", -3.367295829986474), + -1.7719568419318752), + ("dayday", -1.8971199848858813), + ("hourhour", -2.995732273553991), + ("hourminute", -2.995732273553991), ("absorption of , after named day ", - -1.6094379124341003), - ("dayminute", -3.367295829986474), - ("tonight o'clock", -3.367295829986474), + -1.8971199848858813), + ("dayminute", -2.995732273553991), + ("tonight o'clock", -2.995732273553991), (" relative minutes after|past (hour-of-day)", - -3.367295829986474), - ("yearday", -1.7578579175523736)], - n = 64}, + -2.995732273553991), + ("yearday", -1.7719568419318752)], + n = 42}, koData = - ClassData{prior = -1.215022640512521, unseen = -4.276666119016055, + ClassData{prior = -0.7884573603642702, unseen = -4.465908118654584, likelihoods = HashMap.fromList [("\20799\31461\33410 ", - -2.065455299705096), - ("dayhour", -2.653241964607215), - ("daymonth", -2.1832383353614793), - ("year (numeric with year symbol)February", -3.164067588373206), + -2.257122718917288), + ("dayhour", -2.8449093838194073), + ("daymonth", -2.374905754573672), (" o'clock", - -3.164067588373206), - ("hourhour", -3.164067588373206), - ("year (numeric with year symbol)March", -2.653241964607215), - ("hourminute", -2.653241964607215), - ("yearmonth", -2.316769727986002), - ("dayminute", -2.653241964607215), + -3.355735007585398), + ("year (numeric with year symbol)month (numeric with month symbol)", + -1.7462970951512977), + ("hourhour", -3.355735007585398), + ("hourminute", -2.8449093838194073), + ("absorption of , after named daymonth (numeric with month symbol)", + -2.374905754573672), + ("yearmonth", -1.7462970951512977), + ("dayminute", -2.8449093838194073), (" relative minutes after|past (hour-of-day)", - -2.653241964607215), - ("absorption of , after named dayFebruary", - -2.1832383353614793)], - n = 27}}), + -2.8449093838194073)], + n = 35}}), ("year (grain)", Classifier{okData = ClassData{prior = -0.8209805520698302, @@ -329,33 +323,33 @@ classifiers n = 50}}), ("nth