From 338e5e75d85b2b61beb7de257bbb17babfa3cf16 Mon Sep 17 00:00:00 2001 From: Bearscience Date: Sun, 21 Jan 2018 17:30:38 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E4=BA=86=E5=8E=9F=E4=BD=9C?= =?UTF-8?q?=E8=80=85=E7=9A=84=E9=83=A8=E5=88=86=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- LunarSolarConverter.py | 222 +++++++++++ README.md | 24 ++ RangeTimeEnum.py | 23 ++ StringPreHandler.py | 183 +++++++++ Test.py | 78 ++++ TimeNormalizer.py | 133 +++++++ TimePoint.py | 15 + TimeUnit.py | 823 +++++++++++++++++++++++++++++++++++++++ __init__.py | 6 + resource/__init__.py | 6 + resource/holi_lunar.json | 10 + resource/holi_solar.json | 15 + resource/reg.pkl | Bin 0 -> 86547 bytes resource/regex.txt | 1 + setup.py | 31 ++ 15 files changed, 1570 insertions(+) create mode 100644 LunarSolarConverter.py create mode 100644 README.md create mode 100644 RangeTimeEnum.py create mode 100644 StringPreHandler.py create mode 100644 Test.py create mode 100644 TimeNormalizer.py create mode 100644 TimePoint.py create mode 100644 TimeUnit.py create mode 100644 __init__.py create mode 100644 resource/__init__.py create mode 100644 resource/holi_lunar.json create mode 100644 resource/holi_solar.json create mode 100644 resource/reg.pkl create mode 100644 resource/regex.txt create mode 100644 setup.py diff --git a/LunarSolarConverter.py b/LunarSolarConverter.py new file mode 100644 index 0000000..19b90ae --- /dev/null +++ b/LunarSolarConverter.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2017/12/11 11:08 +# @Author : zhm +# @File : LunarSolarConverter.py +# @Software: PyCharm +from pprint import pprint + + +class Lunar: + def __init__(self, lunarYear, lunarMonth, lunarDay, isleap): + self.isleap = isleap + self.lunarDay = lunarDay + self.lunarMonth = lunarMonth + self.lunarYear = lunarYear + + +class Solar: + def __init__(self, solarYear, solarMonth, solarDay): + self.solarDay = solarDay + self.solarMonth = solarMonth + self.solarYear = solarYear + + +def GetBitInt(data, length, shift): + return (data & (((1 << length) - 1) << shift)) >> shift + + +def SolarToInt(y, m, d): + m = (m + 9) % 12 + y -= m / 10 + return 365 * y + y / 4 - y / 100 + y / 400 + (m * 306 + 5) / 10 + (d - 1) + + +def SolarFromInt(g): + y = (10000 * g + 14780) / 3652425 + ddd = g - (365 * y + y / 4 - y / 100 + y / 400) + if ddd < 0: + y -= 1 + ddd = g - (365 * y + y / 4 - y / 100 + y / 400) + + mi = (100 * ddd + 52) / 3060 + mm = (mi + 2) % 12 + 1 + y += (mi + 2) / 12 + dd = ddd - (mi * 306 + 5) / 10 + 1 + solar = Solar(y, mm, dd) + return solar + + +class LunarSolarConverter: + ##################################################################################### + # 1888~2111年农历数据表 + # 农历数据 每个元素的存储格式如下: + # 16~13 12 11~0 + # 闰几月 闰月日数 1~12月份农历日数(大小月) + # 注:1、bit0表示农历1月份日数,为1表示30天,为0表示29天。bit1表示农历2月份日数,依次类推。 + # 2、bit12表示闰月日数,1为30天,0为29天。bit16~bit13表示第几月是闰月(注:为0表示该年无闰月) + # 数据来源参考: http://data.weather.gov.hk/gts/time/conversion1_text_c.htm + ##################################################################################### + lunar_month_days = [1887, 0x1694, 0x16aa, 0x4ad5, 0xab6, 0xc4b7, 0x4ae, 0xa56, 0xb52a, + 0x1d2a, 0xd54, 0x75aa, 0x156a, 0x1096d, 0x95c, 0x14ae, 0xaa4d, 0x1a4c, 0x1b2a, 0x8d55, + 0xad4, 0x135a, 0x495d, + 0x95c, 0xd49b, 0x149a, 0x1a4a, 0xbaa5, 0x16a8, 0x1ad4, 0x52da, 0x12b6, 0xe937, 0x92e, + 0x1496, 0xb64b, 0xd4a, + 0xda8, 0x95b5, 0x56c, 0x12ae, 0x492f, 0x92e, 0xcc96, 0x1a94, 0x1d4a, 0xada9, 0xb5a, 0x56c, + 0x726e, 0x125c, + 0xf92d, 0x192a, 0x1a94, 0xdb4a, 0x16aa, 0xad4, 0x955b, 0x4ba, 0x125a, 0x592b, 0x152a, + 0xf695, 0xd94, 0x16aa, + 0xaab5, 0x9b4, 0x14b6, 0x6a57, 0xa56, 0x1152a, 0x1d2a, 0xd54, 0xd5aa, 0x156a, 0x96c, + 0x94ae, 0x14ae, 0xa4c, + 0x7d26, 0x1b2a, 0xeb55, 0xad4, 0x12da, 0xa95d, 0x95a, 0x149a, 0x9a4d, 0x1a4a, 0x11aa5, + 0x16a8, 0x16d4, + 0xd2da, 0x12b6, 0x936, 0x9497, 0x1496, 0x1564b, 0xd4a, 0xda8, 0xd5b4, 0x156c, 0x12ae, + 0xa92f, 0x92e, 0xc96, + 0x6d4a, 0x1d4a, 0x10d65, 0xb58, 0x156c, 0xb26d, 0x125c, 0x192c, 0x9a95, 0x1a94, 0x1b4a, + 0x4b55, 0xad4, + 0xf55b, 0x4ba, 0x125a, 0xb92b, 0x152a, 0x1694, 0x96aa, 0x15aa, 0x12ab5, 0x974, 0x14b6, + 0xca57, 0xa56, 0x1526, + 0x8e95, 0xd54, 0x15aa, 0x49b5, 0x96c, 0xd4ae, 0x149c, 0x1a4c, 0xbd26, 0x1aa6, 0xb54, + 0x6d6a, 0x12da, 0x1695d, + 0x95a, 0x149a, 0xda4b, 0x1a4a, 0x1aa4, 0xbb54, 0x16b4, 0xada, 0x495b, 0x936, 0xf497, + 0x1496, 0x154a, 0xb6a5, + 0xda4, 0x15b4, 0x6ab6, 0x126e, 0x1092f, 0x92e, 0xc96, 0xcd4a, 0x1d4a, 0xd64, 0x956c, + 0x155c, 0x125c, 0x792e, + 0x192c, 0xfa95, 0x1a94, 0x1b4a, 0xab55, 0xad4, 0x14da, 0x8a5d, 0xa5a, 0x1152b, 0x152a, + 0x1694, 0xd6aa, + 0x15aa, 0xab4, 0x94ba, 0x14b6, 0xa56, 0x7527, 0xd26, 0xee53, 0xd54, 0x15aa, 0xa9b5, 0x96c, + 0x14ae, 0x8a4e, + 0x1a4c, 0x11d26, 0x1aa4, 0x1b54, 0xcd6a, 0xada, 0x95c, 0x949d, 0x149a, 0x1a2a, 0x5b25, + 0x1aa4, 0xfb52, + 0x16b4, 0xaba, 0xa95b, 0x936, 0x1496, 0x9a4b, 0x154a, 0x136a5, 0xda4, 0x15ac] + # 额外添加数据,方便快速计算阴历转阳历 每个元素的存储格式如下: + # 12~7 6~5 4~0 + # 离元旦多少天 春节月 春节日 + ##################################################################################### + solar_1_1 = [1887, 0xec04c, 0xec23f, 0xec435, 0xec649, 0xec83e, 0xeca51, 0xecc46, 0xece3a, + 0xed04d, 0xed242, 0xed436, 0xed64a, 0xed83f, 0xeda53, 0xedc48, 0xede3d, 0xee050, 0xee244, 0xee439, + 0xee64d, + 0xee842, 0xeea36, 0xeec4a, 0xeee3e, 0xef052, 0xef246, 0xef43a, 0xef64e, 0xef843, 0xefa37, 0xefc4b, + 0xefe41, + 0xf0054, 0xf0248, 0xf043c, 0xf0650, 0xf0845, 0xf0a38, 0xf0c4d, 0xf0e42, 0xf1037, 0xf124a, 0xf143e, + 0xf1651, + 0xf1846, 0xf1a3a, 0xf1c4e, 0xf1e44, 0xf2038, 0xf224b, 0xf243f, 0xf2653, 0xf2848, 0xf2a3b, 0xf2c4f, + 0xf2e45, + 0xf3039, 0xf324d, 0xf3442, 0xf3636, 0xf384a, 0xf3a3d, 0xf3c51, 0xf3e46, 0xf403b, 0xf424e, 0xf4443, + 0xf4638, + 0xf484c, 0xf4a3f, 0xf4c52, 0xf4e48, 0xf503c, 0xf524f, 0xf5445, 0xf5639, 0xf584d, 0xf5a42, 0xf5c35, + 0xf5e49, + 0xf603e, 0xf6251, 0xf6446, 0xf663b, 0xf684f, 0xf6a43, 0xf6c37, 0xf6e4b, 0xf703f, 0xf7252, 0xf7447, + 0xf763c, + 0xf7850, 0xf7a45, 0xf7c39, 0xf7e4d, 0xf8042, 0xf8254, 0xf8449, 0xf863d, 0xf8851, 0xf8a46, 0xf8c3b, + 0xf8e4f, + 0xf9044, 0xf9237, 0xf944a, 0xf963f, 0xf9853, 0xf9a47, 0xf9c3c, 0xf9e50, 0xfa045, 0xfa238, 0xfa44c, + 0xfa641, + 0xfa836, 0xfaa49, 0xfac3d, 0xfae52, 0xfb047, 0xfb23a, 0xfb44e, 0xfb643, 0xfb837, 0xfba4a, 0xfbc3f, + 0xfbe53, + 0xfc048, 0xfc23c, 0xfc450, 0xfc645, 0xfc839, 0xfca4c, 0xfcc41, 0xfce36, 0xfd04a, 0xfd23d, 0xfd451, + 0xfd646, + 0xfd83a, 0xfda4d, 0xfdc43, 0xfde37, 0xfe04b, 0xfe23f, 0xfe453, 0xfe648, 0xfe83c, 0xfea4f, 0xfec44, + 0xfee38, + 0xff04c, 0xff241, 0xff436, 0xff64a, 0xff83e, 0xffa51, 0xffc46, 0xffe3a, 0x10004e, 0x100242, + 0x100437, + 0x10064b, 0x100841, 0x100a53, 0x100c48, 0x100e3c, 0x10104f, 0x101244, 0x101438, 0x10164c, + 0x101842, 0x101a35, + 0x101c49, 0x101e3d, 0x102051, 0x102245, 0x10243a, 0x10264e, 0x102843, 0x102a37, 0x102c4b, + 0x102e3f, 0x103053, + 0x103247, 0x10343b, 0x10364f, 0x103845, 0x103a38, 0x103c4c, 0x103e42, 0x104036, 0x104249, + 0x10443d, 0x104651, + 0x104846, 0x104a3a, 0x104c4e, 0x104e43, 0x105038, 0x10524a, 0x10543e, 0x105652, 0x105847, + 0x105a3b, 0x105c4f, + 0x105e45, 0x106039, 0x10624c, 0x106441, 0x106635, 0x106849, 0x106a3d, 0x106c51, 0x106e47, + 0x10703c, 0x10724f, + 0x107444, 0x107638, 0x10784c, 0x107a3f, 0x107c53, 0x107e48] + + def LunarToSolar(self, lunar): + days = LunarSolarConverter.lunar_month_days[lunar.lunarYear - LunarSolarConverter.lunar_month_days[0]] + leap = GetBitInt(days, 4, 13) + offset = 0 + loopend = leap + if not lunar.isleap: + + if lunar.lunarMonth <= leap or leap == 0: + + loopend = lunar.lunarMonth - 1 + + else: + + loopend = lunar.lunarMonth + + for i in range(0, loopend): + offset += GetBitInt(days, 1, 12 - i) == 1 and 30 or 29 + + offset += lunar.lunarDay + + solar11 = LunarSolarConverter.solar_1_1[lunar.lunarYear - LunarSolarConverter.solar_1_1[0]] + + y = GetBitInt(solar11, 12, 9) + m = GetBitInt(solar11, 4, 5) + d = GetBitInt(solar11, 5, 0) + + return SolarFromInt(SolarToInt(y, m, d) + offset - 1) + + def SolarToLunar(self, solar): + + lunar = Lunar(0, 0, 0, False) + index = solar.solarYear - LunarSolarConverter.solar_1_1[0] + data = (solar.solarYear << 9) | (solar.solarMonth << 5) | solar.solarDay + if LunarSolarConverter.solar_1_1[index] > data: + index -= 1 + + solar11 = LunarSolarConverter.solar_1_1[index] + y = GetBitInt(solar11, 12, 9) + m = GetBitInt(solar11, 4, 5) + d = GetBitInt(solar11, 5, 0) + offset = SolarToInt(solar.solarYear, solar.solarMonth, solar.solarDay) - SolarToInt(y, m, d) + + days = LunarSolarConverter.lunar_month_days[index] + leap = GetBitInt(days, 4, 13) + + lunarY = index + LunarSolarConverter.solar_1_1[0] + lunarM = 1 + offset += 1 + + for i in range(0, 13): + + dm = GetBitInt(days, 1, 12 - i) == 1 and 30 or 29 + if offset > dm: + + lunarM += 1 + offset -= dm + + else: + + break + + lunarD = int(offset) + lunar.lunarYear = lunarY + lunar.lunarMonth = lunarM + lunar.isleap = False + if leap != 0 and lunarM > leap: + + lunar.lunarMonth = lunarM - 1 + if lunarM == leap + 1: + lunar.isleap = True + + lunar.lunarDay = lunarD + return lunar + + def __init__(self): + pass + + +if __name__ == '__main__': + converter = LunarSolarConverter() + solar = Solar(2111, 1, 25) + pprint(vars(solar)) + lunar = converter.SolarToLunar(solar) + pprint(vars(lunar)) + solar = converter.LunarToSolar(lunar) + pprint(vars(solar)) + print(len(converter.solar_1_1)) + print("Done") \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..0a36923 --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +## 说明: +Time-NLP的python3版本,由于原作者sunfiyes的是python2版本,无法在python3上使用,故修改部分代码,使其可在Python3上使用(本人新手,可能有bug) +原项目地址:https://github.com/sunfiyes/Time-NLPY + +## 安装方式: +1) cd到当前目录 +2) python setup.py install + +PS~ : +window下可能出现安装regex错误,可到 +https://www.lfd.uci.edu/~gohlke/pythonlibs/#regex +下载对应版本的regex手动安装。 + +## 使用方法 +将中文时间描述转换为三种标准的时间格式的时间字符串: +1) 时间点(timestamp,表示某一具体时间时间描述); +2) 时间量(timedelta,表示时间的增量的时间描述); +3) 时间区间(timespan,有具体起始和结束时间点的时间区间)。 +调用示例见Test.py + +关于节假日的增加方法: +1) 在resource目录下的holi_lunar(阴历)或holi_solar(阳历)文件内按照格式加入新增的节日名称和日期 +2) 在resource目录下的regex.txt文件内加入相应节日的正则匹配,并删除regex.pkl缓存文件 +3) 在TimeUnit类中的norm_setHoliday方法同样加入节日的正则匹配 \ No newline at end of file diff --git a/RangeTimeEnum.py b/RangeTimeEnum.py new file mode 100644 index 0000000..7040102 --- /dev/null +++ b/RangeTimeEnum.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2017/11/20 16:27 +# @Author : zhm +# @File : RangeTimeEnum.py +# @Software: PyCharm + + + +# 范围时间的默认时间点 +class RangeTimeEnum(): + day_break = 3 # 黎明 + early_morning = 8 # 早 + morning = 10 # 上午 + noon = 12 # 中午、午间 + afternoon = 15 # 下午、午后 + night = 18 # 晚上、傍晚 + lateNight = 20 # 晚、晚间 + midNight = 23 # 深夜 + + +if __name__ == "__main__": + print(RangeTimeEnum.afternoon) diff --git a/StringPreHandler.py b/StringPreHandler.py new file mode 100644 index 0000000..2d24c03 --- /dev/null +++ b/StringPreHandler.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2017/11/20 15:42 +# @Author : zhm +# @File : StringPreHandler.py +# @Software: PyCharm +import regex as re + +# * 字符串预处理模块,为分析器TimeNormalizer提供相应的字符串预处理服务 +class StringPreHandler: + @classmethod + def delKeyword(cls, target, rules): + """ + 该方法删除一字符串中所有匹配某一规则字串 + 可用于清理一个字符串中的空白符和语气助词 + :param target: 待处理字符串 + :param rules: 删除规则 + :return: 清理工作完成后的字符串 + """ + pattern = re.compile(rules) + res = pattern.sub('', target) + # print res + return res + + + @classmethod + def numberTranslator(cls, target): + """ + 该方法可以将字符串中所有的用汉字表示的数字转化为用阿拉伯数字表示的数字 + 如"这里有一千两百个人,六百零五个来自中国"可以转化为 + "这里有1200个人,605个来自中国" + 此外添加支持了部分不规则表达方法 + 如两万零六百五可转化为20650 + 两百一十四和两百十四都可以转化为214 + 一六零加一五八可以转化为160+158 + 该方法目前支持的正确转化范围是0-99999999 + 该功能模块具有良好的复用性 + :param target: 待转化的字符串 + :return: 转化完毕后的字符串 + """ + pattern = re.compile("[一二两三四五六七八九123456789]万[一二两三四五六七八九123456789](?!(千|百|十))") + match = pattern.finditer(target) + for m in match: + group = m.group() + s = group.split("万") + s = [_f for _f in s if _f] + num = 0 + if len(s) == 2: + num += cls.wordToNumber(s[0]) * 10000 + cls.wordToNumber(s[1]) * 1000 + target = pattern.sub(str(num), target, 1) + + pattern = re.compile("[一二两三四五六七八九123456789]千[一二两三四五六七八九123456789](?!(百|十))") + match = pattern.finditer(target) + for m in match: + group = m.group() + s = group.split("千") + s = [_f for _f in s if _f] + num = 0 + if len(s) == 2: + num += cls.wordToNumber(s[0]) * 1000 + cls.wordToNumber(s[1]) * 100 + target = pattern.sub(str(num), target, 1) + + pattern = re.compile("[一二两三四五六七八九123456789]百[一二两三四五六七八九123456789](?!十)") + match = pattern.finditer(target) + for m in match: + group = m.group() + s = group.split("百") + s = [_f for _f in s if _f] + num = 0 + if len(s) == 2: + num += cls.wordToNumber(s[0]) * 100 + cls.wordToNumber(s[1]) * 10 + target = pattern.sub(str(num), target, 1) + + pattern = re.compile("[零一二两三四五六七八九]") + match = pattern.finditer(target) + for m in match: + target = pattern.sub(str(cls.wordToNumber(m.group())), target, 1) + + pattern = re.compile("(?<=(周|星期))[末天日]") + match = pattern.finditer(target) + for m in match: + target = pattern.sub(str(cls.wordToNumber(m.group())), target, 1) + + pattern = re.compile("(?> file_out, json.dumps(out, indent=2, ensure_ascii=False).encode('utf-8') +# +# with open('resource/holi_lunar.json') as file_out: +# print json.load(file_out) + + +# dset = [] +# with open('C:/Users/zhm/Desktop/test.txt') as testfile: +# for each in testfile: +# dset.append(each) +# +# def run(query): +# tn = TimeNormalizer() +# res = tn.parse(target=query, timeBase='2013-02-28 16:30:29') +# print res +# if __name__ == '__main__': +# while True: +# query = random.choice(dset) +# lp = LineProfiler() +# lp_wrapper = lp(run) +# lp_wrapper(query) +# lp.print_stats() +# cProfile.run("run(query)") + +# with open(os.path.dirname(__file__) + '/resource/regex.txt', 'wb') as f: +# f.write(u'((前|昨|今|明|后)(天|日)?(早|晚)(晨|上|间)?)|(\\d+个?[年月日天][以之]?[前后])|(\\d+个?半?(小时|钟头|h|H))|(半个?(小时|钟头))|(\\d+(分钟|min))|([13]刻钟)|((上|这|本|下)+(周|星期)([一二三四五六七天日]|[1-7])?)|((周|星期)([一二三四五六七天日]|[1-7]))|((早|晚)?([0-2]?[0-9](点|时)半)(am|AM|pm|PM)?)|((早|晚)?(\\d+[::]\\d+([::]\\d+)*)\\s*(am|AM|pm|PM)?)|((早|晚)?([0-2]?[0-9](点|时)[13一三]刻)(am|AM|pm|PM)?)|((早|晚)?(\\d+[时点](\\d+)?分?(\\d+秒?)?)\\s*(am|AM|pm|PM)?)|(大+(前|后)天)|(([零一二三四五六七八九十百千万]+|\\d+)世)|([0-9]?[0-9]?[0-9]{2}\\.((10)|(11)|(12)|([1-9]))\\.((? 0: + days += 365 * self.tp.tunit[0] + if self.tp.tunit[1] > 0: + days += 30 * self.tp.tunit[1] + if self.tp.tunit[2] > 0: + days += self.tp.tunit[2] + tunit = self.tp.tunit + for i in range(3, 6): + if self.tp.tunit[i] < 0: + tunit[i] = 0 + seconds = tunit[3] * 3600 + tunit[4] * 60 + tunit[5] + if seconds == 0 and days == 0: + self.normalizer.invalidSpan = True + self.normalizer.timeSpan = self.genSpan(days, seconds) + return + + time_grid = self.normalizer.timeBase.split('-') + tunitpointer = 5 + while tunitpointer >= 0 and self.tp.tunit[tunitpointer] < 0: + tunitpointer -= 1 + for i in range(0, tunitpointer): + if self.tp.tunit[i] < 0: + self.tp.tunit[i] = int(time_grid[i]) + + self.time = self.genTime(self.tp.tunit) + + def genSpan(self, days, seconds): + day = seconds // (3600*24) + h = (seconds % (3600*24)) // 3600 + m = ((seconds % (3600*24)) % 3600) // 60 + s = ((seconds % (3600*24)) % 3600) % 60 + return str(days+day) + ' days, ' + "%d:%02d:%02d" % (h, m, s) + + def genTime(self, tunit): + time = arrow.get('1970-01-01 00:00:00') + if tunit[0] > 0: + time = time.replace(year=tunit[0]) + if tunit[1] > 0: + time = time.replace(month=tunit[1]) + if tunit[2] > 0: + time = time.replace(day=tunit[2]) + if tunit[3] > 0: + time = time.replace(hour=tunit[3]) + if tunit[4] > 0: + time = time.replace(minute=tunit[4]) + if tunit[5] > 0: + time = time.replace(second=tunit[5]) + return time + + def norm_setyear(self): + """ + 年-规范化方法--该方法识别时间表达式单元的年字段 + :return: + """ + # 一位数表示的年份 + rule = "(? weekday: + cur = cur.shift(days=7) + return cur + + def preferFuture(self, checkTimeIndex): + """ + 如果用户选项是倾向于未来时间,检查checkTimeIndex所指的时间是否是过去的时间,如果是的话,将大一级的时间设为当前时间的+1。 + 如在晚上说“早上8点看书”,则识别为明天早上; + 12月31日说“3号买菜”,则识别为明年1月的3号。 + :param checkTimeIndex: _tp.tunit时间数组的下标 + :return: + """ + # 1. 检查被检查的时间级别之前,是否没有更高级的已经确定的时间,如果有,则不进行处理. + for i in range(0, checkTimeIndex): + if self.tp.tunit[i] != -1: + return + # 2. 根据上下文补充时间 + self.checkContextTime(checkTimeIndex) + # 3. 根据上下文补充时间后再次检查被检查的时间级别之前,是否没有更高级的已经确定的时间,如果有,则不进行倾向处理. + for i in range(0, checkTimeIndex): + if self.tp.tunit[i] != -1: + return + # 4. 确认用户选项 + if not self.normalizer.isPreferFuture: + return + # 5. 获取当前时间,如果识别到的时间小于当前时间,则将其上的所有级别时间设置为当前时间,并且其上一级的时间步长+1 + time_arr = self.normalizer.timeBase.split('-') + cur = arrow.get(self.normalizer.timeBase, "YYYY-M-D-H-m-s") + cur_unit = int(time_arr[checkTimeIndex]) + if cur_unit < self.tp.tunit[checkTimeIndex]: + return + # 准备增加的时间单位是被检查的时间的上一级,将上一级时间+1 + cur = self.addTime(cur, checkTimeIndex - 1) + time_arr = cur.format("YYYY-M-D-H-m-s").split('-') + for i in range(0, checkTimeIndex): + self.tp.tunit[i] = int(time_arr[i]) + # if i == 1: + # self.tp.tunit[i] += 1 + + def checkContextTime(self, checkTimeIndex): + """ + 根据上下文时间补充时间信息 + :param checkTimeIndex: + :return: + """ + for i in range(0, checkTimeIndex): + if self.tp.tunit[i] == -1 and self.tp_origin.tunit[i] != -1: + self.tp.tunit[i] = self.tp_origin.tunit[i] + # 在处理小时这个级别时,如果上文时间是下午的且下文没有主动声明小时级别以上的时间,则也把下文时间设为下午 + if self.isFirstTimeSolveContext is True and checkTimeIndex == 3 and self.tp_origin.tunit[ + checkTimeIndex] >= 12 and self.tp.tunit[checkTimeIndex] < 12: + self.tp.tunit[checkTimeIndex] += 12 + self.isFirstTimeSolveContext = False + + def addTime(self, cur, fore_unit): + if fore_unit == 0: + cur = cur.shift(years=1) + elif fore_unit == 1: + cur = cur.shift(months=1) + elif fore_unit == 2: + cur = cur.shift(days=1) + elif fore_unit == 3: + cur = cur.shift(hours=1) + elif fore_unit == 4: + cur = cur.shift(minutes=1) + elif fore_unit == 5: + cur = cur.shift(seconds=1) + return cur diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..ce9ccd7 --- /dev/null +++ b/__init__.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2017/11/23 13:22 +# @Author : zhm +# @File : __init__.py +# @Software: PyCharm \ No newline at end of file diff --git a/resource/__init__.py b/resource/__init__.py new file mode 100644 index 0000000..065f46b --- /dev/null +++ b/resource/__init__.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Time : 2017/12/5 17:29 +# @Author : zhm +# @File : __init__.py +# @Software: PyCharm \ No newline at end of file diff --git a/resource/holi_lunar.json b/resource/holi_lunar.json new file mode 100644 index 0000000..ec58527 --- /dev/null +++ b/resource/holi_lunar.json @@ -0,0 +1,10 @@ +{ + "中和节": "02-02", + "中秋节": "08-15", + "中元节": "07-15", + "端午节": "05-05", + "春节": "01-01", + "元宵节": "01-15", + "重阳节": "09-09", + "七夕节": "07-07" +} diff --git a/resource/holi_solar.json b/resource/holi_solar.json new file mode 100644 index 0000000..729c4b3 --- /dev/null +++ b/resource/holi_solar.json @@ -0,0 +1,15 @@ +{ + "植树节": "03-12", + "圣诞节": "12-25", + "青年节": "05-04", + "教师节": "09-10", + "儿童节": "06-01", + "元旦节": "01-01", + "国庆节": "10-01", + "劳动节": "05-01", + "妇女节": "03-08", + "建军节": "08-01", + "航海日节": "07-11", + "建党节": "07-01", + "记者节": "11-08" +} diff --git a/resource/reg.pkl b/resource/reg.pkl new file mode 100644 index 0000000000000000000000000000000000000000..0798ac8f3b13c5bd54f39fe79ef7205aa886f521 GIT binary patch literal 86547 zcmeHw30$4ob>G}?|GuYYouo}$zdFq%m1a8jjGfl8n^tksrmj;bZj!cXHA?KnZk#4g zJW1N74Q@dQu?Pf6NFV`1h(!niLMsRaXy5mJUwIGEzVA!_=id8m4>Xc^&(Ujt__OAH z-(Al==iGD8J@=eTruqlJ_vhdK!*BnU^9O(S-S7R;pM2Z-_^t83qkiwLx5kf;%m9`1o9u=`6??DqlHa;)IOxAARe+ z{?6u!lVWRD#;fvB7hEsoUo(9&lJkqu(qzA2tf*k(_*i>!ynJ=W(9D`x{mP8* zWPD`;3LuAC3TILVx)>kJn}*UE-~FRMrIbk@etvQ+uNx`&7*F5~_RY?eEM6lz??|#-tD5|H5QGkk_t6UAt(R zIDXOx?|%;C{owsCP98rnWew1_dICl?Vf;70oAKqZWqj|u8DIaJ=*O>vfld0-^|kXR z(+It{3GYt$=r7*=q_ZfrFoAL={^C!6^wGB_fR-P;|M|(hJ`s+xGsX|hXpdDj z;U%`LfRLUs9=rfX0ypA)e0+9$My!4w{fo7iX2cqLVz~=4_`i;}{>~!G8|*`d7#!#A zSVv>5c*O+b{0uPsy#B6Av>eNvi%-lCusU9bX7>?)QAz_U<7KcfH#VaiFDNXs@Y3I1 zhm=^)EEl%Ycr`UU3iQS5t3*?xJAjwW=m-WP3itr8VpyX93U$$n$GkLAGl(p_?Jx2F ziXxvyG5UwEcvZ7RLM$gI-d#J;JEOm|=VE47tg|p)Rf(5)VQDS*v!U6!VH@H-XtMV0WszsnwUHZk^-m|Q)%+Mgs-K=p;bbl z8TUPb|>* zfyw|eTs4$iNI#Ilg}jN5Wm2$J&X4uXB%Z+prwVs%1KJ12RtHMo`6b%A^0*e2`B+#Fr~fmP97iYnhl*|372NCPXmv~2>o&_$GRE`{$$oPp%uy2W}`LGe>z_-TPD9ZxT-SNO6!$e4hUy1Mj_Iu?}Vge zU;%fC=J={Dx=dVcJL?HH7ih7brcIz_ps5IuyZC|V$5)a~0w2k= zAXuMkDpDVEHJD^t1u?wU5$ka>O(jzoQvtXZ0>neQKcOX8dHm{WgT!N`_$M_RiQFwM zFA}Op&1zm zqku-LdQ!R)`leOIW=z9NKNo=>B4|x7Z5l-nXbM}F{xvyxiFf8;>BWnX5?owSaz(>n z!4&>NduJi>V^wv7OXpMBWR{(_RcL{>YGUTnc8E{UB-(^dAPfLNnro@Z0B*2mLkB@W z<4FsFJ|Xra@j1DJ`CYO4qF4^pkdBq{m6fra>6AY>y?8LIjNULcGUD@A#tJH9Ghs29 z70X+l5zi|@W0)?$2vq<2fu8cA#VtrK?eA`h=V!+1r(&NHpIscES~KB2QI4=hq?D=? zWDZvXf%Wpm$OB0Q9+E*RR!=mQ66N9zP9)9_saWcO3&E*^gJZ3YVhav3(DuM%mK3NE z9A>HZb5jT@wOqDKL^k*<)9dSvw14U%#f;VsVQJw~LClvISV>R`HIiYcWa*f&485vigH#j^u_y68`8EPPtgBAEfJAvQ7f_?Mpn+abOoByw8u zosHim88C8TualJlGZ%idk)A6IOE4+XmWRm3jia9|ouGgU2{o8q70X*pMh^(bSZfJJ zI8;3oLRoNfQlk<)o1Ee^ibV&?LNZ_j1Kis8*FXP#!DA5bv?BxQUw>jj3rq$UZv`Gd zi7Z4AFcUIjdDYnSPRtluQo(;=CPn(hjDZDZFW*;lZiT@WS{Dr8(RWk|iMb#CqYD zl&2z8#tl4SP?5| z5?Nn})z9`by1QaI^F&4r@6#&%wq_Q}bfHYTZJ`@5p4Wn>f1+>jo0mSA*BoEfA=S~T zeFICRni{L@iFHl$ipA@T;tNV;M>*N?ax#_-WMKG@bin3*HfT?5i6 zW*yK+$2@#rNAF~$q1D@)U_xO>N^l?eH*60~iWj-f3}f^JAnkjEW%~7nFe(B?zy=X! zurUkDdM4TXCX#%@!PTfFJJv@b@Mw8(4{eeeGr%BiM%W==e~`iLHJuU&LHpt7aCG%W zaeRd%FKN%4ICk_347}!fEwKrw=p5NQ+|-sp4^x#*MjH+BRYuw#uK!Fs2e z-d(5!{sv}c9 zhj6$hiFqR2nB|E*KZ%Cp7sWE+ESejQ-<*%T=d|uA%@ptgN1=(xK|n4yXOq9{gn+Z! z2##uS;BhgL%t9|o$bAx%YDYNi@QpcK-Cjf?5Hv~OK@57-UKZ$@s=JX%|euKSm;z76uH$JDzdoSKz+TYnNJoXa3xS^gD^>^nYzkf|WT)<;F zt7C21v7%+MwiW$dT?5Oy!LkEcy+iYQyS zd18|)P9EG=MMnt2b&n)Q*R%^V)+BcUps-|j7|5oX{rXS~N4t-{HIYKj#oG1yjga(r zquyMIMgfahZ^ii%oJ?EcWJ-?Wm{rNha1M}&14^bDsSJz?Im!1MD=8cWLIXMnqWnC? zg(3iQ3yC55aac0Y`EGUn|cwVW=3}X~y5ZH`$H1zjW2?2(%P3h#vwbbH{WFV4Li{EW98t8OR-tDWb@A$#nt-4hT)+9thk5Fp;nlz6HQV6|wc}bCu941+LwZkE$68=o?i<+Kq<8 zXptZ{&96Tmsc8`Jgmi}_I5;VKavqQ8FN&AZH95jPdlI?8=NCbLghEG-(c~Ewh(6&Y zPeh8R0VV^E%PX{9=2P5Pu11AwX`xF3xk3oUE6T)5B=iOtC4}Ia6f0_vwXe;H6_)pR zwPer^p|WT4#9#;kXO{V*HoVN-)mS8qQgh`*<8y)nx40bQw$@3A9Hz}DZh8su8BM%o zFc1=uxQ#?i$=I|G+ElvQ&M2mqK&|wtag}H>(||OwIrZXuFf0*XH)^p5Wjg2(#nCnz zsKr(x34{tBk}H=K1fKjK1wxrwhOT}-4 zOQHSbfhy$u$_&(v* zlL?5K8~L(G(3KaW(jR?m(np{9=zU!7`J&PyUsM=7_!>L-8u=DbAvs9}O2vc}=J0@t zoU@-;9^BXD4^Bp}%Z?p~V+UVj2VZ2?CCA3GgD-9^Pe!jDJNSB~zh?tx z8$0+)!axuFjU9a9ynLj;l?2?39elm^UznmJAHKgFJNTjt=fOlc^PxZcgPAsV0>)3y z{>DzgK4xG0^rwG$@A)DK5>}}_(Z_(>J z_(3B5palIOlm9fax8MHMTWCO4sR3p2au(Yu>fx1&(C6E4N13Ma0z*653E41SsQ`V7 zHguhes0CGQa2LC?MB&}zl}aIzekc|Hdnd{a<5TQ8{`bQ_%WSGXjx|@Yx4+E3%CtLc zS>7J%7$8s?<)d0+aAYI+XE}g9ET4d#Zh;}Hbl)xBmhp7jT^_SeRo>DsDxiMfu2P7z^AQ}cQGmtiRjg%?ppe8V(g}2=(hu^{ zZj}AVj|hNwu|Z&p7y-X1pw#SQHE2MP>_>mpq2BGxDppv{@~ha%R#sNct}bNRRfK+` z{yXoo55Ism3929au_*iTAAd>9QMCT*&7#$x_z7S-%6{@EdBZPYF^uVN|810emwoWL zxAC7ZM%mx_JG{c0^F&QPM!XUd=<{t(LNUyr`Y9@?effJ||0bv1H@{1m`1052zu)8U zU;mmM;otqc9G1WL_g(_a-~amoLURg!Y2GvS?LgwE-s1cnWuN-gi23MH7WrA0mH90J zX*BxEkp6>zz#;wgr(Xi;PycinYya>c@(M4&8iA^P_<1k{xB)l>M+(S(=4W0RvUlF$ zkp1k>z67$L`?&zRIF3L#z$H5TNB=00%lUO2qj|*J9zQ;iiIj2dz-?lD)E8 zR|fpucV(;N*doZDD0}ZcJ_IRm0^*d5pK)A5fr0<=KaR5Z-*;qX8p#jcYgI;^RF@aB z(^YJJBYSp*ZER$@G+o~bjD`4i4ieGoXFekc_eB~qas1n#QWX9>j(sM|KKoe+{|`O@ z3xD_l760%eXa&>RYU z@I{*BR0=iz=|4r~_5s$^#-aGa7x1}*3rx!WgZZG2VolC1I`)B{`t8ZyomMW`B5X@b`q5Q>Pq+wJuEG`7}KmX_WeBP{( zD8%Q#_!s!Z5`zxtbK*pNZmm(e(JnrJ>6h?{rextSe_4E9tme7D{LA<}*vo|u3jfNl z;1lr1M9cs3Uq;!#`d2AqjVhoKo&4&riasU0|N38#BT*e?U-`$fn*X6P`22Y&mvxy<_a|Ng6QMK&*F=TEa4 z&sk3u%lby)T2{FSzolDP#U?iOD%dzw^FRCtd}i(;{-<{TWpS3yA~HlUZx{fC%hA{Zl?3%#w*2@6uifd@be+ zdLPG@pb8cHvp-Y-~T(B5yhkIfBcUWK}i7q)n7%~|NNgRYS3fi|MkC!I^(dn z0>fbojb$E`!aGC{umSx5Tv%7DKp`S-p~5d^1y1iEYjVu|^B% zV=ecPJq=GV+f(IpSuV2k@PxDFJjiGH$S%MW&ITM&HXTnmS*pl|tdPb-lvlws;2g5f z2w5OF0or&G9=`0&?%~s($c5fpFi;R&E30l?R zfp!dW<&(dj)f2?9Jc2u-tO3uPhnRqE!~?*J?r>`_5*Fc!GAxN9a-j%RZICC*7UM|| z*qd21gg!VY%9h}H^FT#e3!X5WV-X-cx8eaO3-gL^yJ%x=fPN{SHxE5{7R>2y;o4a{ z(2FfwDE6u`31?FHSeI}I>IxLAvat7H;-S{@@K?jJnE>3ec5 zlxt=W>!CSAnt2;-iRg$44fo=a)4q@OrNL;UY#p8yscjx}t!L|rxx{{e#=QX#&;b;# zIvPO7Mz&F$qhY}*fc1!*APCu?uvy#0Hc{JxaieTAo*<8T7QHQO3zdUn%eCOGY->U} z&cWN*wuEHK!Q0t(s*bw17jh}MgYBS%DJw{7)0lSRiLzaI-u$!%V7~C%&2|%{;5{sQ z1nC|;QMMOP5K}0EvGR3sAKMqo@^!&v4QG?X z>~I1mIGY?{M-q}Hn;d0FfuLh}QiLGS3U{0xN6RPh1c}C#ZKCN(Jb=5Zj!hHC0jJn0 zBuIVJXNJ@4G?l{ygNherXYiy5Qi9?v9<+`li_OCLdybu>gq%~fQKu5;@kH4LJi%e0 zh|tNX$VEIzXxPQ3kaU(Sz)c0R0M{0EwdPDjtH7ES}&Uz>ldw!y*#^ z(#QmWMEXHC{UD!ys0=Iz*YHqLVd^2}DmKsO-w! ztaKCaOGEObsY=muc4r~AucA!l3AR&wpy=Klqv)e><%mOTdRPzoVBw#a0$U(wgJW+NJ&zolhPZr*1_X$eZ((-^k zNEhg$>>-{Y*1aX_5qktoKE{(GxfW$l@T3D-B(FSWPhF+)8GDunkVV;ZJV9y@OJ^#X z3Z|8i&{C3TDOpHU;;Y?L>M6<;B~X!@Q|SN#g?v$ztz=Uz*G4i`naUl>-tt zL*o=C9h&hN`!r=5K@5|$RP=L|Tm=SOB`-Kyj8cWf^!Hr(N;s%t-?{ysc5WQcl@XKEDMzWL6Q?jcRJQVpJ%ugp)VjX4!%HA+oF zGB_#97AOl6k}-LrtX8Q_15|j6=0atm3nlEQN}WPFkI+KMdlAY7!?I65a9yJxI5W~u znaG1vBYnZKk$zxKpda{R(GUEw=;zrL{sX@(`hss3{Rn>~*Aoew=dimLop1@~)hMf1 z>N&zjtH?Dd4SY}mMGgSK9xU~_Mx_z7U8F1mW2>jKl*RSjX9TPadhHWu2u(^8A zd`i=#|DdEnyV6cF61xg=G(f3k$}(`ua%Fky@Bl+(l&w%!qyZiUuhPpO|rM6`iF|%z!f}(kEt=-Qe!0BzRwX%+H%%t*mxM7Pner zpF@?UBexJ8QEnmCjVr0ImC5t2N3RA~_CKr2^HZbWuX>5qHPbt#p$F z!6^i4N%zUM%37pp!UzHv2+2P^N{?8EAZv=3vz@nDOC9;m_A0%c+!RnicG|#3mOlB` ztxxI0FxM&TLKyN6;@2zd35tyi*{TlLT(LpffGQi6jcLG3!WkJ;W|OiBKy6kwCyaxy ztXq^V3CVnA-KuO&iy_FP^=-;FSN++pY)=EoqHKqMwo(?oe9b0OadzGQg#s+ z7uC9^*xkx*jES4#_9%N&HI=BVy~D0j8GMqW10dxt_^fi4(vI(7d0yH% zxg<<2F%^$W@cBzdi0R4!7j6%DMZ!L8M=^i!=b%*Ixf*NG{y zV4b)Ur#g>%*a@$v0cAj@Ro1!f3@U?^28dublMoFlLr8P2uDtb2$|WRu5M5R-Qwh|9 zm=*nBQLY3_Tve`83E#YXO}R!1LWYVe*OlwRDmRoH_*9#xD)&#ghSQtMO-k!q$YOV0 z@NOx$sHLZ^Y#LlzIC!^}+rgIZD0iq<(E(Pv&#iS=xl6Use_UAS?kV@E(C#{AMT;Uw zeqXsyX}K%d?d5L82g(CVYcE%FGTm|ym50oND%IXhH2_FpdnYyO%)o@I+_N} zsr5WHPv!icujY#&$;>?r3`{zWg5RuJ+#rrba)@;8*c3K}$^~ixK%1^kPa$a7(@D*( zP%R`(09Gg!)LoHUq)Ic!40T2t5R!l@R*MNrOeEX4#_4!dWnGpR=` z9Fm5!)LAOmyAcz`e*>{pu%fADs>97jpz}CJ-i0u#sj~s(9CZ%IViJ+HqrW zrhqC;pLBjxp;m|@*bLJ_G8Mrq86W6Vh}*g9TwdrE<7?-s^VCH1FLq*l_`cDI%#|24 z$4Tr35w?|T<%lhDWRb9hZ=qVnVSBwO+iJBM+=>=>O9UM3R_c6#ikC*()~Gc>uwcYe z7l^8#6m46p)=ExDh__v+E_9oNaY?NcyLfX0=%~ z{{l7yd6uY4s4R>KZVYaV+9E2wAfA*HSPZ*WZRJH@DGs+yZ4*!}RhPaDs&=*Ar5)|E zIXAyB8h4qxOmIMA36OAj@aS@N`K!acLR}$XUa78p8O*EHRSB3}t*#c8UVup&)`*e0 z9css`!`P{I3K-X@YhDIpm)bQ9uh>^mARm|@%DUC=V7`oUp)Cf_UaPJRX7gG8!YJJy zwdd6b(W~~lExVDreQKW=4o;?NouRphSDK!DUH0q>>(q6aO6%42sak!!c-;-^21(Kv zMC@)x=B*`#iwb*t(f_#obQo4QR!^zL}_1dp=q z>UK3T-i;*14s{1L;m7apRCiK>AHlmz-Gzj34DW7rH;}SN-NSMCrjb0gVXC z)9PuA|BQOZor=;glf(nE&#Gr>5bp8YIrUts@IHv+|kNh1}2Zgj9QiqW9n$f?P)JxPH zwiYt}_p*AK60kl)40_PwrNM?yFf_=b9e3i;c* zo9aza=9YSEm{^Ol+v;s0+~LYb!hvXtUlQ&-wwF+GN4-N0Lg|-h@ORa_LE4Gv85F#y z-a|+C)%(Lb;#`Dwd7CKtKz)Fc57mchfLPC4@saunKs{C;rvXr~lBiEoHLbma`={zt zmvBUpzk%4{XX-Odqt}fdey%=u;e=^N%hUqcTP@omKK+#ZMBdU11Hl3Pst!#x_VXB}dxQetQpkRhJBUO!h zj9#o2bE^GK#21%nCHw}j^o}qt)k;ChVKK%twV5t!MA8;mn9)5^kGv=M@JQo1+8j{3Tq_Tjrt?`$HK`g_Xcfe6*!=S&zPZ}mP+8)cdD=XUhqb_u zN2?5FdmE7|tqMI=Yt;$aa(>L$<|DgCt4YWP9I$n03lfq63Uyqo)oR#i@d@<8NaKat zLe2{cY}$10Ps=G@-@y;RZ-5%A)9TQDy;h$pt_Q|^vC^P55DH!Y*G8?81N5dh9`ZPJ zk+z7S1Ew2c3j{Knv?d^9v9>r3paft-&=FwGTC;>y9#1UMmbi?;dEiBn#w}WlTTZS3 zoR?d*)`VnIv*5;}wI!tU;!Cxqp=6Y7*V+jpSM6V>Een;9A8>1>m$JY5axB-I&j*0ZGDJb zC?djT5J{(PNE6sMY8$bdU`oMuq-{zApy2MJZ4Tkh(`mM7TY%?bAj;xAlzKEooN7vN7P-~E}A^PJN<5LH^{k1+cTUA;2)#y zO-Sa_d!M#1Az8}&{n~ysc0fB2%0_>n5^B=yJ*XW70KpLEL)xJMPY7W?t(~T}1>?e4shtV&m>0r)Ry#}O+z{q-+PQ>soP*D6 z=M$182Vc-GP<1GbGKBe}c99bN5axcZA3Pq@Vrf7|0Okw7xE3c!!Fw`?MfOT%!@#{s$C^yxgpHgv}*~N;B0bT zyPl9N+2n?H0|>gQ-AomNJfq+(?G{?Tt=&%OSc=a(+8u)14`IHm-9^H%5axT@Jt~I@ z1{DuBQtf`KASHSBKzl%~`ytE^wTG18hcG|V9$^@dwa1}hpoq}PsF5ey6ROkTg)l$W zo+2R}!u(8oMuqCU5a#FFb2OZ(XYwn`-}E+9hA?O8SvsKfLzt)NQz!v*Q8G-no~`35 zGkt0rfDOQWkIo!DM-Ny#;e(}5<1mgk@+DW#)yWKjv)Xs?LnpU5j|aW;^gOqxD9hLL zc`yGjgfJKA1)$w@eYy)i-)HkpIo1hGOgg5Z5fMeaZ{EQGmAucBJ6jig$y zPJ=;v%I$o8K8;gY?Bu~@jb1|#`ytE=^abF`TD>+n+F>Eg3-yI4Rj1dbigy76#E>)` z*6Z~I6xJRIWrN;;?i%&R5KvwS^CEo_vYYfK9Ud)<^~F?Kc(gR@&HRIVv@Fq=@DJ|M z(xSKMo=1yROUlt;UAfOCkUal|4#OE6N z3VlTyi~?p{ePswoKA!AZCQO<$9c3>_-U zy7aCzK!vwxcI(|Pl(_p$U(0o#vb;4I9P~NZgQY&#r}u%j>-2SC z>=%VFuh-X8ZYYF#gT4XvHtHMGf}C=(-K1{61g4_v!mUgZ=vcRM7y#_MJ5k=m&`8ehBkH{U9ay zA{k*IG zT+lD10c5ysPrsOeIIu9BPxbzUWb#shc#G*V;$lC9Ij+YsCT@xw&<9dAm8h#heGmj6 z(udLjQS#vYl71-xEgV~y^~(v#5?fdFD}*gSg!!s|6=S-lUrQ*@wZ`lE^-wZu`g@-n z`VG{)sozX2;v2kf>9?Z&-7=h zY8277`t!8lVd=G%X=EA@`5=kEN@p2aNDBn4PBEq!@Yyu7jTBj-&^n}qR7$5BQw^>Z zyU9LwCp-~8=6d6USLrOScb%rs6dh`QUi;ZG~Ygr=(tdeV}*}*^RiUV57k#meWR1SKO zH2Rbq<$$!ps7NT!^^Lj4+)y&66S&3ib)GTL%_kHYI4kfx9YHV%I{Hf)vg=Bt5!E|TTCghT~+9|HjU1%&s19e7S zsEBlgt2gQslI6^3Fd9&+(P#{112TlyNP)k|SOie%56WP$(u~(+G*KF6JWv6{0%szO zgyPX@c#Dn2U>)x3(rh%R!GuFhsj(yt-~eWQcas*Qg_uB`AM!~?cU2JnT8&l`uH$&j zFMmNTgyXdtZ9>+<(hKoMF16B4la8|}1%;LxOL-v+guyj*xU&uy)qyy;nXaswZmZ*- zxM6e9D_mtqjLLy)H`>87%Zz0qp22lq#_}}4us0`H7%K?7!qf~ap|LV8ppzSkRmLis zLE?-b7Zk(4)L0!tsGPbTMn^)joVuMxC&;kISQE;|h>_3jvt3448jP6Ox!dR_Qn-;5 zYmK$3TRWsbNsrM(UC2KphCX|ZUP3UAy?ytXKBLb?7D+6+;{f;E@VI)WiUg< zCJMxr*P84!cKTzW9(NhLkf4gAW;ajcQrh|x~rJD5D`n7 zo^XvjlHSI>@mujX5v$mCezDa({1N#McC(SK=%9ukKcKrxpPgg9t;(WxtoJh8e3_lR z%663~NJo7eo!2Z-o^N6~jcnB=cC(k-+PH_RA=?0i=;2bfg0AuRb36KYCDC$i2PNSi z`bSIIx*M#wTq_toa0k~RefuglbA{|j z0U9vrXpBHbJcpnYNtpFOBxo$R^9PNC#Har4R)>s3m>GwS!zr8@;G(s+*s0B&v5pu= zs7Fi$xzrprj*5K(AST^<9K+R>#Dd3+V*us2aoi;}*LzVH1u-DRm>VZTS^l59Fiuh* zV*R70P8p}r)M?{%>PAJj7kO-X#yCUWxksF5jkCkSin4RYxrAhj(FJhljq}0k7mN$3 zg3?3CMdKpI-*5DXAOOr5gw(oYMvR~kao>mAyw}H4j#+@_((?jlE<1T=@ zXWUC2+4x#`-?*Pn!+c;o0H45qj1Yh0Vd};z?FNsGN5JA^<8g@cm^qMKzA@v8@q{D? ze1Y)ycWEj;HJ(y}_+yr^l^V~|0A~Qm*B_o6&j~`IjSdl%QGY2hCGpCusjRF__FsPcj3CY~lm1pK9 zB+F$j-^@p21!jQ>@2ctMbl_KdR~4Ft{DXT}6`4i+gL_xaFlU%S?<&4Sp;eZ~U2GPc zu0fHm07rZN$wi4-g5FBa(lj6v44CH3)DcHc##!bpAWDsOvs3e3-KoqhBjh0>P?~vX zo3nw2Ip&<B%gP_futjFC5+&1o@CD84YGn@bYlm)3|DvxSC_ zKf-nqivfUdHCrhG1Cr9Q&1?%+4Vbo;noIcz$)E%cfSa|2CV>61v@%KCv)K-eEHjq{ zTax!?$n%Qj=5p#+m`Vtc73K=mS!u3J6|0JUiLa`xGFQ3akc4{a{W^dSqD{)b)#hpf z*EOYem>sDC6HcsVXS#3>5n_$GhK34;kyE|P>~awYBeB`-e$WIL?oHxWm9^&Da2D-r zL{^X46V4*P4UyGr_J*^(3z7QFK5)c3b6qeWJ#wkP-dvw53V5E#8_W&paHF{~)M1DZ zHkq4HWV5+Bb)8E~>K1bgTHb1IOX#<;BAO2dV zXD2^k9&mBWr5g^^pnqXsMxG11NUIIGG|GvK>3+~W803ya<{`Ho*Gh zpv5RVVjf8YtdP7qY90l*95auF;P#?nkDJFaK3cJT20LM%K$Vl`$y6~orVTj!qF7Q- znWxbDY4da%Ajo5zGv*m0bD3u?K5L#OJ`$P?7G&$K-bOd#0edBtYQ*Xxp_))afmJ{Wu4X#jnw~23cuH-d zRS47+Sw#t`!Gy*=p4NDGG;^)F!1Fw7 zUI=a!@fGAstCH%JEO1?Qs;nv?9%g0ODXr=>8QFYmJ_b@_)r1D(tvU;=1vChZN}g!f zTD3?B|B--&)qQa*Nf11gQ)8{&THXtE`BY z;CX7b1#t_#Rm!iW)>4qG-D)464MUx^EFoE1JC|F_QEP>@B9sld0F?am1S_qTZYKVC zsl#dG_rkL660ln+k;@MW#ZegB|D%h2ZB!?lmYS^f$j4>O%AK9Brijwc1)u zBgRp!#9fEg0U$fAPTsYK-3pBF&~ESS7X6Ct3-yLX(XWa;DEGpbNJsH)PucQ1mUWlh zVO+-^>43JzT7&+`1=MHZE~|@RfU6;(C0WsJbpwXA)>;=AeDBO{rz73a-DCBjc(2u) z2KLYmNtYMeD3} zgb#S0$UAb^TkDZ9On2U3ZJ=@@bb#=-(b`A}K(4etZ?ZN~0!S-hhN0P!$k~Y zp{+BahKXWlt+S%0Pr~WQj7ySM%S{MEP`mKJyznB$s`*RzC z*Ps;-cPS3m2CM-LbkG{~Fh*5==w2(WMTIXS`xvp>lpy*H!B(AiZW?ODK=| z2aA(+J(LW=1C7FWgt}qfaPw(=pB&Y7)4GYCZdte7Dtzsgq`Ph14kZ)m?pSvSRzIri zu5}kJ+_Ub5iUxr~pssa4AvX}!^}u?71|C`uLq#MAkE}-t$x@|zY&}M;C)SftHXy^c zU9Nslt)~Dr9M$#AdPZr`TCm)K?3`&J(&)5~hWFfh4ync+5HjsdJ00vES$0+$zyZwq zn$8q^ij9Oiek&wb)ZD19Y&+ZL2^4X{Ur6165y_s)2SIj5is2eDsw>CN;e~!sQC-vQ zX*Mi9cCH@q@dD5`6=J=;YVNi4Nsgs84L_8dDL)m3hn+i4KhRbf|fa*jT#Ypy+)aD=lA zX_A{~&!YrCs;knjM8dGBt}45V%K1@U)pj){xCEVV&rcme<#x8lt^s>5uotANWiSKI zmi*J|TD#UC1F_ITdm$2nQC)R*U7F0Gdb^%r3q^G`*bU%REDQ+Cwj0v`P;i9r>a@sS z=mgS zRlq2|(q2j3!HfXNU?j9x4F?N>)%NOyWNC!#used)JMGR?LFpl6jlBlr@3Ol>5CCQ% zT-w09?QVj?^|4-SuML&;G@l;3M=W8m@7cYngH%R!_1S%-oD!)4jUUyu&R&NRthd*v zsz(Ss@TFK}h-Ws~8_>ZA^T7o;0yry`om%S zFhM9BZ-|MH*hi2sEUN3MeH8fRQC-LEV`(r7g7vt498@`BpGY0OY28K3C+(AH`ILPs zL~kzo;qPakPDtjat~2(Tgk-s_owd)Rv2*r08>YhZ_IX0MFil>tFYpg;n!IRVCsTLDM9+PPkEBF*_y{1zZy~EUGJR$I;t>J&*=Of&tSWOdWCLWE`@GfT(a(*CqQB zAasmam~I)z`Gj9b^Chia0W$;82W~N!$rhsqq=U| zH-XVx_N~+b2gMh+E84fyi5Pe6JCJnTsCw7Fn+A}=J6C~3R$E5sIF`$n-ct}uBpz{VAX(GJIBf4 zBP4?oG=NWt>YCzdMvoT5~L3Ex6zM!Ikgu}HB~OhX03$f;i9l(>k4k=QA9KZvu01HFjP zn(538XVJb!WX*DBg|qyqt}>@AoaIGz&30ykBjz}Bg8Ar?OZ{@EJXI9%qPi-a3UoNv znH%a5xbUO<<~j3Fq|&KOUFXu0TIE!s$*9IdZ#R-^%fJhoZstRyn~QC+K? zRm4Y$QC+K@)ehEt;-OcM>gsShsBu54tJCSE1UIT{jk6|!I8roqIbC2V9@W+Dbf=0M z-k{eyYe9(~rzfFz9_`TU^d=-rm;OGd&*2A;BSdwrbJn57_0IZ)s(ek_;A}_>lJlsp zjn2jd7^Moh$=O7-@uRvnJDZ6U5~I4dI9o8(t>e}vXC&0uyMKgPc zvxCYdMs@9Uc2YS%s%w|CixOPrY`3#JRUo5xEYG2+u075kpk=SKH-ufd2s-;x)vI?z zu;1B_z79AC(txcJcRdH4gMjUjb0~y|M5X(%a~P$LI7d>28_*f{V`*kP>KrA)`%zuT zoMV*WM|B-{j-&q*&WSX@Ak2c!$sY(FPC2JA%G1v25G`b!%o*oQ8VsK1^I7LCQOA$! zI_I3D1XnXX@0?E+dOW4}f^z|=x#(O>Kn;W;ZfA7*6O!c;5OZP#m><;@cj9Pfz!^vb zRAP>WqPhm1LBKZT428x5@UbXK7m`cPC9Jdj80@lhISs}D)2eeN1h3@QtIpLl0FSu! znsbeKz>n&>?pz0+Z#XwXaHEK?Am4OuQXN04>y~p1i04sVx1HN*GO|0)9Sr2Ib2l^) zZ`HZy+@nEYRPsdozH=W5;i#?$&I7dZ(0Mq#4cPpgN1sSq>x7JM|BlNiU8z{$PC_f%A>l9BgN=19Mx44DIpmAsIJmTDPWix znd#z!@142rbfl=RS&>;NUKT0iAivo^U4wD>b8~iNc7(P*uNl=fCo+fd;YW3qN6L{f zOn0t`R8ToTs%vg!E+zO;UGpOIC;`kY{n{%dl@ULxt1423?1ZSU>PR)!@uRxtN9F^) zHIbSSHKgHiL1aN1VBI^rtBuqG)`gLUX#kW^-hDeyND73uQ(>yC8${jH6xb^CK0fh{Z26YlbD_ObH)kJayg ftbPAuee%b~ue~+l*WUVCkN%e*o4@(7^_l+*fLIk} literal 0 HcmV?d00001 diff --git a/resource/regex.txt b/resource/regex.txt new file mode 100644 index 0000000..7afef24 --- /dev/null +++ b/resource/regex.txt @@ -0,0 +1 @@ +((前|昨|今|明|后)(天|日)?(早|晚)(晨|上|间)?)|(\d+个?[年月日天][以之]?[前后])|(\d+个?半?(小时|钟头|h|H))|(半个?(小时|钟头))|(\d+(分钟|min))|([13]刻钟)|((上|这|本|下)+(周|星期)([一二三四五六七天日]|[1-7])?)|((周|星期)([一二三四五六七天日]|[1-7]))|((早|晚)?([0-2]?[0-9](点|时)半)(am|AM|pm|PM)?)|((早|晚)?(\d+[::]\d+([::]\d+)*)\s*(am|AM|pm|PM)?)|((早|晚)?([0-2]?[0-9](点|时)[13一三]刻)(am|AM|pm|PM)?)|((早|晚)?(\d+[时点](\d+)?分?(\d+秒?)?)\s*(am|AM|pm|PM)?)|(大+(前|后)天)|(([零一二三四五六七八九十百千万]+|\d+)世)|([0-9]?[0-9]?[0-9]{2}\.((10)|(11)|(12)|([1-9]))\.((?=2017', + 'arrow>=0.10'], + zip_safe=False, + classifiers=[ + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7' + ] +) \ No newline at end of file