From e205527cb11148b19ba4061d8503e7866c3f25dd Mon Sep 17 00:00:00 2001 From: Liangsheng Yin Date: Tue, 13 Aug 2024 21:14:05 -0700 Subject: [PATCH] Fix jump forward final state circular path bug. (#1084) --- python/sglang/srt/constrained/jump_forward.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/constrained/jump_forward.py b/python/sglang/srt/constrained/jump_forward.py index 7b694318e49..b00c48d4784 100644 --- a/python/sglang/srt/constrained/jump_forward.py +++ b/python/sglang/srt/constrained/jump_forward.py @@ -62,16 +62,22 @@ def _init_state_to_jump_forward(regex_string): id_to_symbol.setdefault(id_, []).append(symbol) transitions = fsm_info.transitions + outgoings_ct = defaultdict(int) - state_to_jump_forward = {} + # NOTE(lsyin): Final states can lead to terminate, so they have one outgoing edge naturally + for s in fsm_info.finals: + outgoings_ct[s] = 1 + state_to_jump_forward = {} for (state, id_), next_state in transitions.items(): if id_ == fsm_info.alphabet_anything_value: + # Arbitrarily symbol cannot be recognized as jump forward continue + symbols = id_to_symbol[id_] for c in symbols: if len(c) > 1: - # Skip byte level transitions + # Skip byte level transitions like c = "5E" continue outgoings_ct[state] += 1 @@ -87,6 +93,9 @@ def _init_state_to_jump_forward(regex_string): # Process the byte level jump forward outgoings_ct = defaultdict(int) + for s in fsm_info.finals: + outgoings_ct[s] = 1 + for (state, id_), next_state in transitions.items(): if id_ == fsm_info.alphabet_anything_value: continue @@ -177,3 +186,5 @@ def test_main(regex_string): test_main(r"霍格沃茨特快列车|霍比特人比尔博") # 霍格: \xe9\x9c\x8d \xe6\xa0\xbc ... # 霍比: \xe9\x9c\x8d \xe6\xaf\x94 ... + + test_main(r"[-+]?[0-9]+[ ]*")