Skip to content

Commit

Permalink
Support Chinese next next week day - first commit (#3184)
Browse files Browse the repository at this point in the history
Co-authored-by: Michael Wang (Centific Technologies Inc) <[email protected]>
  • Loading branch information
MichaelMWW and Michael Wang (Centific Technologies Inc) authored Dec 4, 2024
1 parent 25afa66 commit 90e968e
Show file tree
Hide file tree
Showing 13 changed files with 344 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ public static class DateTimeDefinitions
public static readonly string DateThisRegex = $@"(这个|这一个|这|这一|本){WeekDayRegex}";
public static readonly string DateLastRegex = $@"(上一个|上个|上一|上|最后一个|最后)(的)?{WeekDayRegex}";
public static readonly string DateNextRegex = $@"(下一个|下个|下一|下)(的)?{WeekDayRegex}";
public static readonly string DateNextNextRegex = $@"(下下|下下[个個]){WeekDayRegex}";
public static readonly string DateLastLastRegex = $@"(上上|上上[个個]){WeekDayRegex}";
public const string WeekWithWeekDayRangeRegex = @"^[.]";
public const string WoMLastRegex = @"最后一";
public const string WoMPreviousRegex = @"上个";
Expand Down Expand Up @@ -78,7 +80,8 @@ public static class DateTimeDefinitions
public const string DatePeriodThisRegex = @"这个|这一个|这|这一|本";
public const string DatePeriodLastRegex = @"上个|上一个|上|上一";
public const string DatePeriodNextRegex = @"下个|下一个|下|下一";
public const string DatePeriodNextNextRegex = @"下下";
public const string DatePeriodNextNextRegex = @"下下|下下[个個]";
public const string DatePeriodLastLastRegex = @"上上|上上[个個]";
public static readonly string RelativeMonthRegex = $@"(?<relmonth>({DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex})\s*月)";
public const string HalfYearRegex = @"((?<firstHalf>(上|前)半年)|(?<secondHalf>(下|后)半年))";
public static readonly string YearRegex = $@"(({YearNumRegex})(\s*年)?|({SimpleYearRegex})\s*年){HalfYearRegex}?";
Expand All @@ -90,7 +93,7 @@ public static class DateTimeDefinitions
public static readonly string YearAndMonth = $@"({DatePeriodYearInCJKRegex}|{YearRegex}|(?<yearrel>明年|今年|去年))\s*({MonthRegex}|的?(?<cardinal>第一|第二|第三|第四|第五|第六|第七|第八|第九|第十|第十一|第十二|最后一)\s*个月\s*)";
public static readonly string SimpleYearAndMonth = $@"({YearNumRegex}[/\\\-]{MonthNumRegex}\b$)";
public static readonly string PureNumYearAndMonth = $@"({YearRegexInNumber}\s*[-\.\/]\s*{MonthNumRegex})|({MonthNumRegex}\s*\/\s*{YearRegexInNumber})";
public static readonly string OneWordPeriodRegex = $@"(((?<yearrel>(明|今|去)年)\s*)?{MonthRegex}|({DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextNextRegex}|{DatePeriodNextRegex})(?<halfTag>半)?\s*(周末|周|月|年)|周末|(今|明|去|前|后)年(\s*{HalfYearRegex})?)";
public static readonly string OneWordPeriodRegex = $@"(((?<yearrel>(明|今|去)年)\s*)?{MonthRegex}|({DatePeriodThisRegex}|{DatePeriodLastLastRegex}|{DatePeriodLastRegex}|{DatePeriodNextNextRegex}|{DatePeriodNextRegex})(?<halfTag>半)?\s*([周週]末|[周週]|月|年)|[周週]末|(今|明|去|前|后)年(\s*{HalfYearRegex})?)";
public const string LaterEarlyPeriodRegex = @"^[.]";
public const string DatePointWithAgoAndLater = @"^[.]";
public static readonly string WeekOfMonthRegex = $@"(?<wom>{MonthSuffixRegex}的(?<cardinal>第一|第二|第三|第四|第五|最后一)\s*周\s*)";
Expand Down Expand Up @@ -286,7 +289,8 @@ public static class DateTimeDefinitions
};
public static readonly IList<string> WeekendTerms = new List<string>
{
@"周末"
@"周末",
@"週末"
};
public static readonly IList<string> WeekTerms = new List<string>
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ public class ChineseDateExtractorConfiguration : BaseDateTimeOptionsConfiguratio

public static readonly Regex NextRegex = new Regex(DateTimeDefinitions.DateNextRegex, RegexFlags, RegexTimeOut);

public static readonly Regex NextNextRegex = new Regex(DateTimeDefinitions.DateNextNextRegex, RegexFlags, RegexTimeOut);

public static readonly Regex LastLastRegex = new Regex(DateTimeDefinitions.DateLastLastRegex, RegexFlags, RegexTimeOut);

public static readonly Regex SpecialDayRegex = new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags, RegexTimeOut);

public static readonly Regex WeekDayOfMonthRegex = new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags, RegexTimeOut);
Expand Down Expand Up @@ -78,7 +82,7 @@ public ChineseDateExtractorConfiguration(IDateTimeOptionsConfiguration config)

ImplicitDateList = new List<Regex>
{
LunarRegex, SpecialDayRegex, ThisRegex, LastRegex, NextRegex,
LunarRegex, SpecialDayRegex, ThisRegex, LastLastRegex, LastRegex, NextNextRegex, NextRegex,
WeekDayRegex, WeekDayOfMonthRegex, SpecialDate,
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ public class ChineseDatePeriodExtractorConfiguration : BaseDateTimeOptionsConfig
public static readonly Regex DateUnitRegex = new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut);
public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.DatePeriodLastRegex, RegexFlags, RegexTimeOut);
public static readonly Regex NextNextRegex = new Regex(DateTimeDefinitions.DatePeriodNextNextRegex, RegexFlags, RegexTimeOut);
public static readonly Regex LastLastRegex = new Regex(DateTimeDefinitions.DatePeriodLastLastRegex, RegexFlags, RegexTimeOut);
public static readonly Regex NextRegex = new Regex(DateTimeDefinitions.DatePeriodNextRegex, RegexFlags, RegexTimeOut);
public static readonly Regex RelativeMonthRegex = new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut);
public static readonly Regex LaterEarlyPeriodRegex = new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags, RegexTimeOut);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ public ChineseDateParserConfiguration(ICJKCommonDateTimeParserConfiguration conf
WeekDayAndDayRegex = ChineseDateExtractorConfiguration.WeekDayAndDayRegex;
DurationRelativeDurationUnitRegex = ChineseDateExtractorConfiguration.DurationRelativeDurationUnitRegex;
SpecialDayWithNumRegex = ChineseDateExtractorConfiguration.SpecialDayWithNumRegex;
NextNextRegex = ChineseDateExtractorConfiguration.NextNextRegex;
LastLastRegex = ChineseDateExtractorConfiguration.LastLastRegex;

CardinalMap = config.CardinalMap;
UnitMap = config.UnitMap;
Expand Down Expand Up @@ -94,6 +96,10 @@ public ChineseDateParserConfiguration(ICJKCommonDateTimeParserConfiguration conf

public Regex NextRegex { get; }

public Regex NextNextRegex { get; }

public Regex LastLastRegex { get; }

public Regex ThisRegex { get; }

public Regex LastRegex { get; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public ChineseDatePeriodParserConfiguration(ICJKCommonDateTimeParserConfiguratio
SimpleCasesRegex = ChineseDatePeriodExtractorConfiguration.SimpleCasesRegex;
ThisRegex = ChineseDatePeriodExtractorConfiguration.ThisRegex;
NextNextRegex = ChineseDatePeriodExtractorConfiguration.NextNextRegex;
LastLastRegex = ChineseDatePeriodExtractorConfiguration.LastLastRegex;
NextRegex = ChineseDatePeriodExtractorConfiguration.NextRegex;
LastRegex = ChineseDatePeriodExtractorConfiguration.LastRegex;
YearToYear = ChineseDatePeriodExtractorConfiguration.YearToYear;
Expand Down Expand Up @@ -124,6 +125,8 @@ public ChineseDatePeriodParserConfiguration(ICJKCommonDateTimeParserConfiguratio

public Regex NextNextRegex { get; }

public Regex LastLastRegex { get; }

public Regex NextRegex { get; }

public Regex LastRegex { get; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Collections.Generic;
using System.Globalization;
using System.Text.RegularExpressions;
using Microsoft.Recognizers.Text.DateTime.Chinese;
using Microsoft.Recognizers.Text.Utilities;
using DateObject = System.DateTime;

Expand Down Expand Up @@ -355,11 +356,21 @@ protected DateTimeResolutionResult ParseImplicitDate(string text, DateObject ref
ret = MatchThisWeekday(text, referenceDate);
}

if (!ret.Success)
{
ret = MatchNextNextWeekday(text, referenceDate);
}

if (!ret.Success)
{
ret = MatchNextWeekday(text, referenceDate);
}

if (!ret.Success)
{
ret = MatchLastLastWeekday(text, referenceDate);
}

if (!ret.Success)
{
ret = MatchLastWeekday(text, referenceDate);
Expand Down Expand Up @@ -497,6 +508,28 @@ protected DateTimeResolutionResult MatchNextWeekday(string text, DateObject refe
return result;
}

protected DateTimeResolutionResult MatchNextNextWeekday(string text, DateObject reference)
{
var result = new DateTimeResolutionResult();
var cnConfig = this.config as ChineseDateParserConfiguration;
if (cnConfig != null)
{
var match = cnConfig.NextNextRegex.MatchExact(text, trim: true);
if (match.Success)
{
var weekdayKey = match.Groups["weekday"].Value;
var value = reference.Next((DayOfWeek)this.config.DayOfWeek[weekdayKey]);
value = value.Next((DayOfWeek)this.config.DayOfWeek[weekdayKey]);

result.Timex = DateTimeFormatUtil.LuisDate(value);
result.FutureValue = result.PastValue = DateObject.MinValue.SafeCreateFromValue(value.Year, value.Month, value.Day);
result.Success = true;
}
}

return result;
}

protected DateTimeResolutionResult MatchThisWeekday(string text, DateObject reference)
{
var result = new DateTimeResolutionResult();
Expand Down Expand Up @@ -533,6 +566,29 @@ protected DateTimeResolutionResult MatchLastWeekday(string text, DateObject refe
return result;
}

protected DateTimeResolutionResult MatchLastLastWeekday(string text, DateObject reference)
{
var result = new DateTimeResolutionResult();
var cnConfig = this.config as ChineseDateParserConfiguration;
if (cnConfig != null)
{
var match = cnConfig.LastLastRegex.MatchExact(text, trim: true);

if (match.Success)
{
var weekdayKey = match.Groups["weekday"].Value;
var value = reference.Last((DayOfWeek)this.config.DayOfWeek[weekdayKey]);
value = value.Last((DayOfWeek)this.config.DayOfWeek[weekdayKey]);

result.Timex = DateTimeFormatUtil.LuisDate(value);
result.FutureValue = result.PastValue = DateObject.MinValue.SafeCreateFromValue(value.Year, value.Month, value.Day);
result.Success = true;
}
}

return result;
}

protected DateTimeResolutionResult MatchWeekdayAlone(string text, DateObject reference)
{
var result = new DateTimeResolutionResult();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1023,8 +1023,10 @@ private DateTimeResolutionResult ParseOneWordPeriod(string text, DateObject refe
}

// In Chinese, "下" means next, "下下周" means next next week, "下下周末" means next next weekend, need to check whether the text match "下下"
// "上" means last, "上上周" means last last week, "上上周末" means last last weekend, need to check whether the text match "上上"
ChineseDatePeriodParserConfiguration config = this.config as ChineseDatePeriodParserConfiguration;
bool nextNextMatch = config == null ? false : config.NextNextRegex.Match(trimmedText).Success;
bool lastlastMatch = config == null ? false : config.LastLastRegex.Match(trimmedText).Success;

var nextMatch = this.config.NextRegex.Match(trimmedText);
var lastMatch = this.config.LastRegex.Match(trimmedText);
Expand Down Expand Up @@ -1081,6 +1083,11 @@ private DateTimeResolutionResult ParseOneWordPeriod(string text, DateObject refe
// If it is Chinese "下下周" (next next week), "下下周末" (next next weekend), then swift is 2
swift = 2;
}
else if (lastlastMatch)
{
// If it is Chinese "上上周" (last last week), "上上周末" (last last weekend), then swift is -2
swift = -2;
}
else if (nextMatch.Success)
{
if (nextMatch.Groups[Constants.AfterGroupName].Success)
Expand Down
15 changes: 12 additions & 3 deletions Patterns/Chinese/Chinese-DateTime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ DateLastRegex: !nestedRegex
DateNextRegex: !nestedRegex
def: (下一个|下个|下一|下)(的)?{WeekDayRegex}
references: [WeekDayRegex]
DateNextNextRegex: !nestedRegex
def: (下下|下下[个個]){WeekDayRegex}
references: [WeekDayRegex]
DateLastLastRegex: !nestedRegex
def: (上上|上上[个個]){WeekDayRegex}
references: [WeekDayRegex]
WeekWithWeekDayRangeRegex: !simpleRegex
# TODO: modify below regex according to the counterpart in Japanese
def: ^[.]
Expand Down Expand Up @@ -150,7 +156,9 @@ DatePeriodLastRegex: !simpleRegex
DatePeriodNextRegex: !simpleRegex
def: 下个|下一个|下|下一
DatePeriodNextNextRegex: !simpleRegex
def: 下下
def: 下下|下下[个個]
DatePeriodLastLastRegex: !simpleRegex
def: 上上|上上[个個]
RelativeMonthRegex: !nestedRegex
def: (?<relmonth>({DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex})\s*月)
references: [DatePeriodThisRegex, DatePeriodLastRegex, DatePeriodNextRegex]
Expand Down Expand Up @@ -183,8 +191,8 @@ PureNumYearAndMonth: !nestedRegex
def: ({YearRegexInNumber}\s*[-\.\/]\s*{MonthNumRegex})|({MonthNumRegex}\s*\/\s*{YearRegexInNumber})
references: [YearRegexInNumber, MonthNumRegex]
OneWordPeriodRegex: !nestedRegex
def: (((?<yearrel>(明|今|去)年)\s*)?{MonthRegex}|({DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextNextRegex}|{DatePeriodNextRegex})(?<halfTag>半)?\s*(周末|周|月|年)|周末|(今|明|去|前|后)年(\s*{HalfYearRegex})?)
references: [MonthRegex, DatePeriodThisRegex, DatePeriodLastRegex, DatePeriodNextNextRegex, DatePeriodNextRegex, HalfYearRegex]
def: (((?<yearrel>(明|今|去)年)\s*)?{MonthRegex}|({DatePeriodThisRegex}|{DatePeriodLastLastRegex}|{DatePeriodLastRegex}|{DatePeriodNextNextRegex}|{DatePeriodNextRegex})(?<halfTag>半)?\s*([周週]末|[周週]|月|年)|[周週]末|(今|明|去|前|后)年(\s*{HalfYearRegex})?)
references: [MonthRegex, DatePeriodThisRegex, DatePeriodLastLastRegex, DatePeriodLastRegex, DatePeriodNextNextRegex, DatePeriodNextRegex, HalfYearRegex]
LaterEarlyPeriodRegex: !simpleRegex
# TODO: modify below regex according to the counterpart in Japanese
def: ^[.]
Expand Down Expand Up @@ -567,6 +575,7 @@ WeekendTerms: !list
types: [ string ]
entries:
- 周末
- 週末
WeekTerms: !list
types: [ string ]
entries:
Expand Down
24 changes: 24 additions & 0 deletions Specs/DateTime/Chinese/DateExtractor.json
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,30 @@
}
]
},
{
"Input": "马拉松在下下周日举行",
"NotSupported": "java, javascript, python",
"Results": [
{
"Text": "下下周日",
"Type": "date",
"Start": 4,
"Length": 4
}
]
},
{
"Input": "任務是在上上個週三完成的",
"NotSupported": "java, javascript, python",
"Results": [
{
"Text": "上上個週三",
"Type": "date",
"Start": 4,
"Length": 5
}
]
},
{
"Input": "下次的12号",
"Results": [
Expand Down
48 changes: 48 additions & 0 deletions Specs/DateTime/Chinese/DateParser.json
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,54 @@
}
]
},
{
"Input": "马拉松在下下周日举行",
"Context": {
"ReferenceDateTime": "2024-11-15T00:00:00"
},
"NotSupported": "java, javascript, python",
"Results": [
{
"Text": "下下周日",
"Type": "date",
"Value": {
"Timex": "2024-12-01",
"FutureResolution": {
"date": "2024-12-01"
},
"PastResolution": {
"date": "2024-12-01"
}
},
"Start": 4,
"Length": 4
}
]
},
{
"Input": "任務是在上上個週三完成的",
"Context": {
"ReferenceDateTime": "2024-11-15T00:00:00"
},
"NotSupported": "java, javascript, python",
"Results": [
{
"Text": "上上個週三",
"Type": "date",
"Value": {
"Timex": "2024-10-30",
"FutureResolution": {
"date": "2024-10-30"
},
"PastResolution": {
"date": "2024-10-30"
}
},
"Start": 4,
"Length": 5
}
]
},
{
"Input": "12号",
"Context": {
Expand Down
24 changes: 24 additions & 0 deletions Specs/DateTime/Chinese/DatePeriodExtractor.json
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,30 @@
}
]
},
{
"Input": "我是上上周回来的",
"NotSupported": "java, javascript, python",
"Results": [
{
"Text": "上上周",
"Type": "daterange",
"Start": 2,
"Length": 3
}
]
},
{
"Input": "你上上個週末干嘛了",
"NotSupported": "java, javascript, python",
"Results": [
{
"Text": "上上個週末",
"Type": "daterange",
"Start": 1,
"Length": 5
}
]
},
{
"Input": "下个月完工",
"Results": [
Expand Down
Loading

0 comments on commit 90e968e

Please sign in to comment.