Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose limiting for * and + in Xeger.xeger() #37

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 17 additions & 11 deletions rstr/xeger.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,6 @@
import sre_parse


# The * and + characters in a regular expression
# match up to any number of repeats in theory,
# (and actually 65535 repeats in python) but you
# probably don't want that many repeats in your
# generated strings. This sets an upper-bound on
# repeats generated from + and * characters.
STAR_PLUS_LIMIT = 100


class Xeger(RstrBase):
'''Inspired by the Java library Xeger: http://code.google.com/p/xeger/
This class adds functionality to Rstr allowing users to generate a
Expand Down Expand Up @@ -61,12 +52,27 @@ def __init__(
'negate': lambda x: [False],
}

def xeger(self, string_or_regex: Union[str, Pattern[str]]) -> str:
def xeger(
self,
string_or_regex: Union[str, Pattern[str]],
star_plus_limit: int = 100
) -> str:
'''Generate a random string from a regular expression

By default, * and + metacharacters will generate a maximum of 100
repetitions of the character or group of characters that they modify
for each occurance in the regular expression. You can provide a second
argument to change this limit (note that the maximum amount of repeats
in Python is 65535).

'''
try:
pattern = typing.cast(Pattern[str], string_or_regex).pattern
except AttributeError:
pattern = typing.cast(str, string_or_regex)

self.star_plus_limit = star_plus_limit

parsed = sre_parse.parse(pattern)
result = self._build_string(parsed)
self._cache.clear()
Expand Down Expand Up @@ -99,7 +105,7 @@ def _handle_in(self, value: Any) -> Any:

def _handle_repeat(self, start_range: int, end_range: int, value: str) -> str:
result = []
end_range = min((end_range, STAR_PLUS_LIMIT))
end_range = min((end_range, self.star_plus_limit))
times = self._random.randint(start_range, end_range)
for i in range(times):
result.append(''.join(self._handle_state(i) for i in value))
Expand Down