From 054a204aa064729ccb7c264337433595a59acbd2 Mon Sep 17 00:00:00 2001 From: Simon Michael Date: Tue, 3 Dec 2024 17:25:43 -1000 Subject: [PATCH] imp:csv:if: support & ! (AND NOT) --- hledger-lib/Hledger/Read/RulesReader.hs | 60 ++++++++++++++++--------- hledger/hledger.m4.md | 4 +- hledger/test/csv.test | 58 +++++++++++++++++------- 3 files changed, 81 insertions(+), 41 deletions(-) diff --git a/hledger-lib/Hledger/Read/RulesReader.hs b/hledger-lib/Hledger/Read/RulesReader.hs index 9dcaa3c962c..64c4086bf86 100644 --- a/hledger-lib/Hledger/Read/RulesReader.hs +++ b/hledger-lib/Hledger/Read/RulesReader.hs @@ -308,8 +308,13 @@ type MatchGroupReference = Text -- | A strptime date parsing pattern, as supported by Data.Time.Format. type DateFormat = Text --- | A prefix for a matcher test, either & or none (implicit or). -data MatcherPrefix = And | Not | None +-- | A representation of a matcher's prefix, which indicates how it should be +-- interpreted or combined with other matchers. +data MatcherPrefix = + Or -- ^ no prefix + | And -- ^ & + | Not -- ^ ! + | AndNot -- ^ & ! deriving (Show, Eq) -- | A single test for matching a CSV record, in one way or another. @@ -318,6 +323,14 @@ data Matcher = | FieldMatcher MatcherPrefix CsvFieldReference Regexp -- ^ match if this regexp matches the referenced CSV field's value deriving (Show, Eq) +matcherPrefix :: Matcher -> MatcherPrefix +matcherPrefix (RecordMatcher prefix _) = prefix +matcherPrefix (FieldMatcher prefix _ _) = prefix + +matcherSetPrefix :: MatcherPrefix -> Matcher -> Matcher +matcherSetPrefix p (RecordMatcher _ r) = RecordMatcher p r +matcherSetPrefix p (FieldMatcher _ f r) = FieldMatcher p f r + -- | A conditional block: a set of CSV record matchers, and a sequence -- of rules which will be enabled only if one or more of the matchers -- succeeds. @@ -682,7 +695,11 @@ fieldmatcherp end = do matcherprefixp :: CsvRulesParser MatcherPrefix matcherprefixp = do lift $ dbgparse 8 "trying matcherprefixp" - (char '&' >> lift skipNonNewlineSpaces >> return And) <|> (char '!' >> lift skipNonNewlineSpaces >> return Not) <|> return None + (do + char '&' >> lift skipNonNewlineSpaces + fromMaybe And <$> optional (char '!' >> lift skipNonNewlineSpaces >> return AndNot)) + <|> (char '!' >> lift skipNonNewlineSpaces >> return Not) + <|> return Or csvfieldreferencep :: CsvRulesParser CsvFieldReference csvfieldreferencep = do @@ -744,7 +761,7 @@ lastCBAssignmentTemplate f = snd . last . filter ((==f).fst) . cbAssignments maybeNegate :: MatcherPrefix -> Bool -> Bool maybeNegate Not origbool = not origbool -maybeNegate _ origbool = origbool +maybeNegate _ origbool = origbool -- | Given the conversion rules, a CSV record and a hledger field name, find -- either the last applicable `ConditionalBlock`, or the final value template @@ -789,7 +806,7 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c -- matcherMatches :: Matcher -> Bool matcherMatches = \case - RecordMatcher prefix pat -> maybeNegate prefix $ match pat $ T.intercalate "," record + RecordMatcher prefix pat -> maybeNegate prefix $ match pat $ T.intercalate "," record FieldMatcher prefix csvfieldref pat -> maybeNegate prefix $ match pat $ fromMaybe (warn "'if %CSVFIELD' should use a name declared with 'fields', or a number" "") $ replaceCsvFieldReference rules record csvfieldref @@ -803,14 +820,13 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c -- D -- & E -- => [[A, B], [C], [D, E]] + -- & ! M (and not M) are converted to ! M (not M) within the and groups. groupedMatchers :: [Matcher] -> [[Matcher]] groupedMatchers [] = [] - groupedMatchers (x:xs) = (x:ys) : groupedMatchers zs + groupedMatchers (m:ms) = (m:ands) : groupedMatchers rest where - (ys, zs) = span (\y -> matcherPrefix y == And) xs - matcherPrefix :: Matcher -> MatcherPrefix - matcherPrefix (RecordMatcher prefix _) = prefix - matcherPrefix (FieldMatcher prefix _ _) = prefix + (andandnots, rest) = span (\a -> matcherPrefix a `elem` [And, AndNot]) ms + ands = [matcherSetPrefix p a | a <- andandnots, let p = if matcherPrefix a == AndNot then Not else And] -- | Render a field assignment's template, possibly interpolating referenced -- CSV field values or match groups. Outer whitespace is removed from interpolated values. @@ -1514,12 +1530,12 @@ tests_RulesReader = testGroup "RulesReader" [ ,testCase "assignment with empty value" $ parseWithState' defrules rulesp "account1 \nif foo\n account2 foo\n" @?= - (Right (mkrules $ defrules{rassignments = [("account1","")], rconditionalblocks = [CB{cbMatchers=[RecordMatcher None (toRegex' "foo")],cbAssignments=[("account2","foo")]}]})) + (Right (mkrules $ defrules{rassignments = [("account1","")], rconditionalblocks = [CB{cbMatchers=[RecordMatcher Or (toRegex' "foo")],cbAssignments=[("account2","foo")]}]})) ] ,testGroup "conditionalblockp" [ testCase "space after conditional" $ parseWithState' defrules conditionalblockp "if a\n account2 b\n \n" @?= - (Right $ CB{cbMatchers=[RecordMatcher None $ toRegexCI' "a"],cbAssignments=[("account2","b")]}) + (Right $ CB{cbMatchers=[RecordMatcher Or $ toRegexCI' "a"],cbAssignments=[("account2","b")]}) ], testGroup "csvfieldreferencep" [ @@ -1531,16 +1547,16 @@ tests_RulesReader = testGroup "RulesReader" [ ,testGroup "matcherp" [ testCase "recordmatcherp" $ - parseWithState' defrules matcherp "A A\n" @?= (Right $ RecordMatcher None $ toRegexCI' "A A") + parseWithState' defrules matcherp "A A\n" @?= (Right $ RecordMatcher Or $ toRegexCI' "A A") ,testCase "recordmatcherp.starts-with-&" $ parseWithState' defrules matcherp "& A A\n" @?= (Right $ RecordMatcher And $ toRegexCI' "A A") ,testCase "fieldmatcherp.starts-with-%" $ - parseWithState' defrules matcherp "description A A\n" @?= (Right $ RecordMatcher None $ toRegexCI' "description A A") + parseWithState' defrules matcherp "description A A\n" @?= (Right $ RecordMatcher Or $ toRegexCI' "description A A") ,testCase "fieldmatcherp" $ - parseWithState' defrules matcherp "%description A A\n" @?= (Right $ FieldMatcher None "%description" $ toRegexCI' "A A") + parseWithState' defrules matcherp "%description A A\n" @?= (Right $ FieldMatcher Or "%description" $ toRegexCI' "A A") ,testCase "fieldmatcherp.starts-with-&" $ parseWithState' defrules matcherp "& %description A A\n" @?= (Right $ FieldMatcher And "%description" $ toRegexCI' "A A") @@ -1555,7 +1571,7 @@ tests_RulesReader = testGroup "RulesReader" [ in testCase "toplevel" $ hledgerField rules ["a","b"] "date" @?= (Just "%csvdate") - ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a"] [("date","%csvdate")]]} + ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a"] [("date","%csvdate")]]} in testCase "conditional" $ hledgerField rules ["a","b"] "date" @?= (Just "%csvdate") ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher Not "%csvdate" $ toRegex' "a"] [("date","%csvdate")]]} @@ -1564,16 +1580,16 @@ tests_RulesReader = testGroup "RulesReader" [ ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher Not "%csvdate" $ toRegex' "b"] [("date","%csvdate")]]} in testCase "negated-conditional-true" $ hledgerField rules ["a","b"] "date" @?= (Just "%csvdate") - ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a", FieldMatcher None "%description" $ toRegex' "b"] [("date","%csvdate")]]} + ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a", FieldMatcher Or "%description" $ toRegex' "b"] [("date","%csvdate")]]} in testCase "conditional-with-or-a" $ hledgerField rules ["a"] "date" @?= (Just "%csvdate") - ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a", FieldMatcher None "%description" $ toRegex' "b"] [("date","%csvdate")]]} + ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a", FieldMatcher Or "%description" $ toRegex' "b"] [("date","%csvdate")]]} in testCase "conditional-with-or-b" $ hledgerField rules ["_", "b"] "date" @?= (Just "%csvdate") - ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a", FieldMatcher And "%description" $ toRegex' "b"] [("date","%csvdate")]]} + ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a", FieldMatcher And "%description" $ toRegex' "b"] [("date","%csvdate")]]} in testCase "conditional.with-and" $ hledgerField rules ["a", "b"] "date" @?= (Just "%csvdate") - ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a", FieldMatcher And "%description" $ toRegex' "b", FieldMatcher None "%description" $ toRegex' "c"] [("date","%csvdate")]]} + ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a", FieldMatcher And "%description" $ toRegex' "b", FieldMatcher Or "%description" $ toRegex' "c"] [("date","%csvdate")]]} in testCase "conditional.with-and-or" $ hledgerField rules ["_", "c"] "date" @?= (Just "%csvdate") ] @@ -1584,9 +1600,9 @@ tests_RulesReader = testGroup "RulesReader" [ { rcsvfieldindexes=[ ("date",1), ("description",2) ] , rassignments=[ ("account2","equity"), ("amount1","1") ] -- ConditionalBlocks here are in reverse order: mkrules reverses the list - , rconditionalblocks=[ CB { cbMatchers=[FieldMatcher None "%description" (toRegex' "PREFIX (.*) - (.*)")] + , rconditionalblocks=[ CB { cbMatchers=[FieldMatcher Or "%description" (toRegex' "PREFIX (.*) - (.*)")] , cbAssignments=[("account1","account:\\1:\\2")] } - , CB { cbMatchers=[FieldMatcher None "%description" (toRegex' "PREFIX (.*)")] + , CB { cbMatchers=[FieldMatcher Or "%description" (toRegex' "PREFIX (.*)")] , cbAssignments=[("account1","account:\\1"), ("comment1","\\1")] } ] } diff --git a/hledger/hledger.m4.md b/hledger/hledger.m4.md index 06106184204..394932c871f 100644 --- a/hledger/hledger.m4.md +++ b/hledger/hledger.m4.md @@ -3693,8 +3693,8 @@ When an if block has multiple matchers, each on its own line, - By default they are OR'd (any of them can match). - Matcher lines beginning with `&` (and optional space) are AND'ed with the matcher above (all in the AND'ed group must match). -You can't use both `&` and `!` on the same line (you can't AND a negated matcher), -[currently](https://github.com/simonmichael/hledger/pull/2088#issuecomment-1844200398). +You can use a negated `!` matcher on a `&` line, meaning AND NOT. +*Since 1.41.* ### Match groups diff --git a/hledger/test/csv.test b/hledger/test/csv.test index 9581fd7e65c..966a9eb1262 100644 --- a/hledger/test/csv.test +++ b/hledger/test/csv.test @@ -884,7 +884,7 @@ start of conditional block found, but no assignment rules afterward >=1 # XXX -# ** 44. handle conditions with & operator +# ** 44. handle matchers with & prefix < 10/2009/09,Flubber Co,50 10/2009/09,Blubber Co,50 @@ -908,7 +908,31 @@ $ ./csvtest.sh >=0 -# ** 45. decimal-mark helps parse ambiguous decimals correctly. +# ** 45. handle matchers with both & and ! +< +10/2009/09,Flubber Co,50 +10/2009/09,Blubber Co,50 + +RULES +fields date, description, amount +date-format %d/%Y/%m +currency $ +account1 assets:myacct +if Flubber +&!%amount 50 + account2 acct +$ ./csvtest.sh +2009-09-10 Flubber Co + assets:myacct $50 + income:unknown $-50 + +2009-09-10 Blubber Co + assets:myacct $50 + income:unknown $-50 + +>=0 + +# ** 46. decimal-mark helps parse ambiguous decimals correctly. # Here it's one thousand, one. < 2020-01-01,"1,000" @@ -929,7 +953,7 @@ $ ./csvtest.sh >= -# ** 46. Again, this time with comma as decimal mark. +# ** 47. Again, this time with comma as decimal mark. # Here it's one, one thousand. < 2020-01-01,"1,000" @@ -950,7 +974,7 @@ $ ./csvtest.sh >= -# ** 47. Account aliases work when reading from CSV. +# ** 48. Account aliases work when reading from CSV. < 2020-01-01,10 @@ -964,7 +988,7 @@ $ ./csvtest.sh --alias expenses=FOO >= -# ** 48. Allow for whitespace in csv amounts +# ** 49. Allow for whitespace in csv amounts < 2009-09-10,+ $20 2009-09-10, $ +30 @@ -1000,7 +1024,7 @@ $ ./csvtest.sh >=0 -# ** 49. Handle an entry with all zeros +# ** 50. Handle an entry with all zeros < Date;Description;Category;Debit;Credit;Balance "2020-01-21","Client card point of sale fee",Fees,"0","0","1068.94" @@ -1018,7 +1042,7 @@ $ ./csvtest.sh >=0 -# ** 50. Allow unicode field references (#1809) +# ** 51. Allow unicode field references (#1809) < Date, Description, Id, Amount 12/11/2019, Foo, 123, 10.23 @@ -1036,7 +1060,7 @@ $ ./csvtest.sh >=0 -# ** 51. Throw an error when unable to substitute csv templates +# ** 52. Throw an error when unable to substitute csv templates < "2021-12-23","caffe_siciliaexpenses:cibo:dolce","-10.5" @@ -1048,7 +1072,7 @@ $ ./csvtest.sh >2 /transaction is unbalanced/ >=1 -# ** 52. We can't parse double quotes inside an unquoted field, or other non-RFC4180 data. (#1966) +# ** 53. We can't parse double quotes inside an unquoted field, or other non-RFC4180 data. (#1966) < 2022-01-01,B"B",C RULES @@ -1057,7 +1081,7 @@ $ ./csvtest.sh >2 /unexpected '"'/ >=1 -# ** 53. A top-level skip directive is able to skip lines which would fail to parse as CSV. (#1967) +# ** 54. A top-level skip directive is able to skip lines which would fail to parse as CSV. (#1967) < 2022-01-01,B"B",C RULES @@ -1066,7 +1090,7 @@ fields date, b, c $ ./csvtest.sh >= -# ** 54. Empty (zero length) or blank (containing only spaces, tabs, etc.) lines +# ** 55. Empty (zero length) or blank (containing only spaces, tabs, etc.) lines # are skipped automatically, including inner ones; skip's argument # counts only the non-empty/blank lines. < @@ -1087,7 +1111,7 @@ $ ./csvtest.sh >= -# ** 55. Some validation is done on account name assignments; trying to +# ** 56. Some validation is done on account name assignments; trying to # also set an amount there (with 2+ spaces) will be rejected. (#1978) < 2022-01-01,1 @@ -1098,7 +1122,7 @@ $ ./csvtest.sh >2 /unexpected space/ >=1 -# ** 56. make sure transaction tags are functional (#2114), including ones on subsequent lines (#2241) +# ** 57. make sure transaction tags are functional (#2114), including ones on subsequent lines (#2241) < 2020-01-01, 1 RULES @@ -1116,7 +1140,7 @@ $ ./csvtest.sh tag:ttag2 >= -# ** 57. and also posting tags, and (primary, yearful) posting dates (#2114, #2241) +# ** 59. and also posting tags, and (primary, yearful) posting dates (#2114, #2241) $ ./csvtest.sh tag:date 2020-01-01 ; ttag:tval ; ttag2: @@ -1125,7 +1149,7 @@ $ ./csvtest.sh tag:date >= -# ** 58. handle newlines in an assignment value, without breaking interpolations (#2134) +# ** 59. handle newlines in an assignment value, without breaking interpolations (#2134) < 2023-01-01,1 RULES @@ -1139,7 +1163,7 @@ $ ./csvtest.sh >= -# ** 59. specify ssv prefix and no extension +# ** 60. specify ssv prefix and no extension < 12/11/2019;Foo;123;10.23 RULES @@ -1152,7 +1176,7 @@ $ ./ssvtest.sh >= -# ** 60. tabular rules with comments +# ** 61. tabular rules with comments < 10/2009/09,Flubber Co,50 10/2009/09,Blubber Co,150