From 669f2ec371838670b7d8659646ba6a895eb1d8f3 Mon Sep 17 00:00:00 2001 From: msm Date: Wed, 18 Sep 2024 17:14:54 +0200 Subject: [PATCH] Fix nested alternative parsing --- docs/yara.md | 30 +++++++++++++++++-- src/lib/yaraparse.py | 2 +- src/tests/test_yaraparse.py | 14 +++++++++ .../testdata/parse_exception_example.txt | 1 + .../testdata/parse_exception_example.yar | 7 +++++ 5 files changed, 50 insertions(+), 4 deletions(-) create mode 100644 src/tests/yararules/testdata/parse_exception_example.txt create mode 100644 src/tests/yararules/testdata/parse_exception_example.yar diff --git a/docs/yara.md b/docs/yara.md index 929b98d4..4af35c89 100644 --- a/docs/yara.md +++ b/docs/yara.md @@ -231,9 +231,33 @@ rule UnluckyExample } ``` -This is not necessarily a bad rule, but there's not a single full 3gram that can +This is not necessarily a bad rule, but it will generate a following ursadb query: + +``` +{} +``` + +In other words, we ask for every file in the malware collection. That's because there's not a single full 3gram that can be used to narrow the set of suspected files. Due to how mquery works, this will -yara scan every malware file in the dataset, and will be very slow. Becaue of this, -such queries are by defauly disasllowed. They can be enabled by setting +cause a yara scan of every file in the dataset, and will be usually very slow. Becaue of this, +such queries are disallowed by default. They can be enabled by setting `query_allow_slow` config key to true. In this case mquery will allow such queries, but it'll ask for confirmation first. + +## Caveats and advanced topics + +There are some things that could be parsed better, but currently aren't. + +**Mquery ignores alternatives in hex strings** + +`` +rule alternative_edge_case { + strings: + $test1 = { 11 (22 | 33) 44 } + $test2 = { ( 11 11 11 | 22 22 22 ) } + condition: + all of them +} +``` + +The first string could be parsed as `{11 22 44} | {11 33 44}`, and the second as `{11 11 11} | {22 22 22}`, but as of mquery v1.4 everything that's a part of alternative is ignored. diff --git a/src/lib/yaraparse.py b/src/lib/yaraparse.py index 4b1f4c56..44be487d 100644 --- a/src/lib/yaraparse.py +++ b/src/lib/yaraparse.py @@ -132,7 +132,7 @@ def ursify_hex(hex_str: str) -> UrsaExpression: hex_str = hex_str.replace(" ", "") # alternatives, are nested alternatives a thing? - hex_parts = re.split(r"\(.*?\)", hex_str) + hex_parts = re.split(r"\(.*\)", hex_str) hex_parts = [x for y in hex_parts for x in re.split(r"\[[\d-]+\]", y)] output: List[bytes] = [] diff --git a/src/tests/test_yaraparse.py b/src/tests/test_yaraparse.py index b7f18834..c9bda913 100644 --- a/src/tests/test_yaraparse.py +++ b/src/tests/test_yaraparse.py @@ -11,6 +11,20 @@ def test_literal(): assert result.query == "({3f2504e0})" +def test_literal_wildcard(): + hex_str = "3F25??04E0" + result = ursify_hex(hex_str) + + assert result.query == "({3f25} & {04e0})" + + +def test_literal_alternative(): + hex_str = "11(22|33)44" + result = ursify_hex(hex_str) + + assert result.query == "({11} & {44})" + + def test_literal_to_hex(): rule = yaramod.YaraRuleBuilder().with_plain_string("$str", "abc").get() diff --git a/src/tests/yararules/testdata/parse_exception_example.txt b/src/tests/yararules/testdata/parse_exception_example.txt new file mode 100644 index 00000000..4c3dd89a --- /dev/null +++ b/src/tests/yararules/testdata/parse_exception_example.txt @@ -0,0 +1 @@ +(min 2 of (({020000}), ({ffff68747470}))) diff --git a/src/tests/yararules/testdata/parse_exception_example.yar b/src/tests/yararules/testdata/parse_exception_example.yar new file mode 100644 index 00000000..5469f9ac --- /dev/null +++ b/src/tests/yararules/testdata/parse_exception_example.yar @@ -0,0 +1,7 @@ +rule parse_exception_example { + strings: + $xor_key_size = { ((BB)|(68))??020000} + $c2 = { FF FF 68 74 74 70 } + condition: + all of them +}