Skip to content

Commit

Permalink
Support array of types and arrays without specified types
Browse files Browse the repository at this point in the history
  • Loading branch information
rlouf committed Nov 8, 2023
1 parent 8dc0b20 commit 1e628b3
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 5 deletions.
28 changes: 24 additions & 4 deletions outlines/text/json_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ def build_regex_from_schema(schema: str):
resolver = registry.resolver()

content = schema.contents
regex = to_regex(resolver, content)
return regex
return to_regex(resolver, content)


def to_regex(resolver: Resolver, instance: dict):
Expand Down Expand Up @@ -164,15 +163,36 @@ def to_regex(resolver: Resolver, instance: dict):
return type_to_regex["integer"]

elif type == "array":
items_regex = to_regex(resolver, instance["items"])
return rf"\[({items_regex})(,({items_regex}))*\]"
if "items" in instance:
items_regex = to_regex(resolver, instance["items"])
return rf"\[({items_regex})(,({items_regex}))*\]"
else:
# Here we need to make the choice to exclude generating list of objects
# if the specification of the object is not give, even though a JSON
# object that contains an object here would be valid under the specification.
types = [
{"type": "boolean"},
{"type": "null"},
{"type": "number"},
{"type": "integer"},
{"type": "string"},
]
regexes = [to_regex(resolver, t) for t in types]
return rf"\[({'|'.join(regexes)})(,({'|'.join(regexes)}))*\]"

elif type == "boolean":
return type_to_regex["boolean"]

elif type == "null":
return type_to_regex["null"]

elif isinstance(type, list):
# Here we need to make the choice to exclude generating an object
# if the specification of the object is not give, even though a JSON
# object that contains an object here would be valid under the specification.
regexes = [to_regex(resolver, {"type": t}) for t in type if t != "object"]
return rf"({'|'.join(regexes)})"

raise NotImplementedError(
f"""Could not translate the instance {instance} to a
regular expression. Make sure it is valid to the JSON Schema specification. If
Expand Down
2 changes: 1 addition & 1 deletion tests/text/test_json_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def test_match_number(pattern, does_match):
"title": "Foo",
"anyOf": [{"type": "string"}, {"type": "integer"}],
},
rf'(("(?:[^"\\\x00-\x1f\x7f-\x9f]|\\.)*")|((0|[1-9][0-9]*))|("(?:[^"\\\x00-\x1f\x7f-\x9f]|\\.)*"(0|[1-9][0-9]*))|((0|[1-9][0-9]*)"(?:[^"\\\x00-\x1f\x7f-\x9f]|\\.)*"))',
r'(("(?:[^"\\\x00-\x1f\x7f-\x9f]|\\.)*")|((0|[1-9][0-9]*))|("(?:[^"\\\x00-\x1f\x7f-\x9f]|\\.)*"(0|[1-9][0-9]*))|((0|[1-9][0-9]*)"(?:[^"\\\x00-\x1f\x7f-\x9f]|\\.)*"))',
[("12", True), ('"a"', True), ('1"a"', True)],
),
# allOf
Expand Down

0 comments on commit 1e628b3

Please sign in to comment.