imp:csv:if: support & ! (AND NOT)

simonmichael · Dec 4, 2024 · 054a204 · 054a204
1 parent f73888d
commit 054a204
Show file tree

Hide file tree

Showing 3 changed files with 81 additions and 41 deletions.
diff --git a/hledger-lib/Hledger/Read/RulesReader.hs b/hledger-lib/Hledger/Read/RulesReader.hs
@@ -308,8 +308,13 @@ type MatchGroupReference = Text
 -- | A strptime date parsing pattern, as supported by Data.Time.Format.
 type DateFormat       = Text
 
--- | A prefix for a matcher test, either & or none (implicit or).
-data MatcherPrefix = And | Not | None
+-- | A representation of a matcher's prefix, which indicates how it should be
+-- interpreted or combined with other matchers.
+data MatcherPrefix =
+    Or      -- ^ no prefix
+  | And     -- ^ &
+  | Not     -- ^ !
+  | AndNot  -- ^ & !
   deriving (Show, Eq)
 
 -- | A single test for matching a CSV record, in one way or another.
@@ -318,6 +323,14 @@ data Matcher =
   | FieldMatcher MatcherPrefix CsvFieldReference Regexp         -- ^ match if this regexp matches the referenced CSV field's value
   deriving (Show, Eq)
 
+matcherPrefix :: Matcher -> MatcherPrefix
+matcherPrefix (RecordMatcher prefix _) = prefix
+matcherPrefix (FieldMatcher prefix _ _) = prefix
+
+matcherSetPrefix :: MatcherPrefix -> Matcher -> Matcher
+matcherSetPrefix p (RecordMatcher _ r)  = RecordMatcher p r
+matcherSetPrefix p (FieldMatcher _ f r) = FieldMatcher p f r
+
 -- | A conditional block: a set of CSV record matchers, and a sequence
 -- of rules which will be enabled only if one or more of the matchers
 -- succeeds.
@@ -682,7 +695,11 @@ fieldmatcherp end = do
 matcherprefixp :: CsvRulesParser MatcherPrefix
 matcherprefixp = do
   lift $ dbgparse 8 "trying matcherprefixp"
-  (char '&' >> lift skipNonNewlineSpaces >> return And) <|> (char '!' >> lift skipNonNewlineSpaces >> return Not) <|> return None
+  (do
+    char '&' >> lift skipNonNewlineSpaces
+    fromMaybe And <$> optional (char '!' >> lift skipNonNewlineSpaces >> return AndNot))
+  <|> (char '!' >> lift skipNonNewlineSpaces >> return Not)
+  <|> return Or
 
 csvfieldreferencep :: CsvRulesParser CsvFieldReference
 csvfieldreferencep = do
@@ -744,7 +761,7 @@ lastCBAssignmentTemplate f = snd . last . filter ((==f).fst) . cbAssignments
 
 maybeNegate :: MatcherPrefix -> Bool -> Bool
 maybeNegate Not origbool = not origbool
-maybeNegate _ origbool = origbool
+maybeNegate _   origbool = origbool
 
 -- | Given the conversion rules, a CSV record and a hledger field name, find
 -- either the last applicable `ConditionalBlock`, or the final value template
@@ -789,7 +806,7 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c
     --
     matcherMatches :: Matcher -> Bool
     matcherMatches = \case
-      RecordMatcher prefix pat -> maybeNegate prefix $ match pat $ T.intercalate "," record
+      RecordMatcher prefix             pat -> maybeNegate prefix $ match pat $ T.intercalate "," record
       FieldMatcher  prefix csvfieldref pat -> maybeNegate prefix $ match pat $
         fromMaybe (warn "'if %CSVFIELD' should use a name declared with 'fields', or a number" "") $
         replaceCsvFieldReference rules record csvfieldref
@@ -803,14 +820,13 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c
     --   D
     --   & E
     --   => [[A, B], [C], [D, E]]
+    --  & ! M (and not M) are converted to ! M (not M) within the and groups.
     groupedMatchers :: [Matcher] -> [[Matcher]]
     groupedMatchers [] = []
-    groupedMatchers (x:xs) = (x:ys) : groupedMatchers zs
+    groupedMatchers (m:ms) = (m:ands) : groupedMatchers rest
       where
-        (ys, zs) = span (\y -> matcherPrefix y == And) xs
-        matcherPrefix :: Matcher -> MatcherPrefix
-        matcherPrefix (RecordMatcher prefix _) = prefix
-        matcherPrefix (FieldMatcher prefix _ _) = prefix
+        (andandnots, rest) = span (\a -> matcherPrefix a `elem` [And, AndNot]) ms
+        ands = [matcherSetPrefix p a | a <- andandnots, let p = if matcherPrefix a == AndNot then Not else And]
 
 -- | Render a field assignment's template, possibly interpolating referenced
 -- CSV field values or match groups. Outer whitespace is removed from interpolated values.
@@ -1514,12 +1530,12 @@ tests_RulesReader = testGroup "RulesReader" [
 
     ,testCase "assignment with empty value" $
       parseWithState' defrules rulesp "account1 \nif foo\n  account2 foo\n" @?=
-        (Right (mkrules $ defrules{rassignments = [("account1","")], rconditionalblocks = [CB{cbMatchers=[RecordMatcher None (toRegex' "foo")],cbAssignments=[("account2","foo")]}]}))
+        (Right (mkrules $ defrules{rassignments = [("account1","")], rconditionalblocks = [CB{cbMatchers=[RecordMatcher Or (toRegex' "foo")],cbAssignments=[("account2","foo")]}]}))
    ]
   ,testGroup "conditionalblockp" [
     testCase "space after conditional" $
       parseWithState' defrules conditionalblockp "if a\n account2 b\n \n" @?=
-        (Right $ CB{cbMatchers=[RecordMatcher None $ toRegexCI' "a"],cbAssignments=[("account2","b")]})
+        (Right $ CB{cbMatchers=[RecordMatcher Or $ toRegexCI' "a"],cbAssignments=[("account2","b")]})
   ],
 
   testGroup "csvfieldreferencep" [
@@ -1531,16 +1547,16 @@ tests_RulesReader = testGroup "RulesReader" [
   ,testGroup "matcherp" [
 
     testCase "recordmatcherp" $
-      parseWithState' defrules matcherp "A A\n" @?= (Right $ RecordMatcher None $ toRegexCI' "A A")
+      parseWithState' defrules matcherp "A A\n" @?= (Right $ RecordMatcher Or $ toRegexCI' "A A")
 
    ,testCase "recordmatcherp.starts-with-&" $
       parseWithState' defrules matcherp "& A A\n" @?= (Right $ RecordMatcher And $ toRegexCI' "A A")
 
    ,testCase "fieldmatcherp.starts-with-%" $
-      parseWithState' defrules matcherp "description A A\n" @?= (Right $ RecordMatcher None $ toRegexCI' "description A A")
+      parseWithState' defrules matcherp "description A A\n" @?= (Right $ RecordMatcher Or $ toRegexCI' "description A A")
 
    ,testCase "fieldmatcherp" $
-      parseWithState' defrules matcherp "%description A A\n" @?= (Right $ FieldMatcher None "%description" $ toRegexCI' "A A")
+      parseWithState' defrules matcherp "%description A A\n" @?= (Right $ FieldMatcher Or "%description" $ toRegexCI' "A A")
 
    ,testCase "fieldmatcherp.starts-with-&" $
       parseWithState' defrules matcherp "& %description A A\n" @?= (Right $ FieldMatcher And "%description" $ toRegexCI' "A A")
@@ -1555,7 +1571,7 @@ tests_RulesReader = testGroup "RulesReader" [
 
     in testCase "toplevel" $ hledgerField rules ["a","b"] "date" @?= (Just "%csvdate")
 
-   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a"] [("date","%csvdate")]]}
+   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a"] [("date","%csvdate")]]}
     in testCase "conditional" $ hledgerField rules ["a","b"] "date" @?= (Just "%csvdate")
 
    ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher Not "%csvdate" $ toRegex' "a"] [("date","%csvdate")]]}
@@ -1564,16 +1580,16 @@ tests_RulesReader = testGroup "RulesReader" [
    ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher Not "%csvdate" $ toRegex' "b"] [("date","%csvdate")]]}
     in testCase "negated-conditional-true" $ hledgerField rules ["a","b"] "date" @?= (Just "%csvdate")
 
-   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a", FieldMatcher None "%description" $ toRegex' "b"] [("date","%csvdate")]]}
+   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a", FieldMatcher Or "%description" $ toRegex' "b"] [("date","%csvdate")]]}
     in testCase "conditional-with-or-a" $ hledgerField rules ["a"] "date" @?= (Just "%csvdate")
 
-   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a", FieldMatcher None "%description" $ toRegex' "b"] [("date","%csvdate")]]}
+   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a", FieldMatcher Or "%description" $ toRegex' "b"] [("date","%csvdate")]]}
     in testCase "conditional-with-or-b" $ hledgerField rules ["_", "b"] "date" @?= (Just "%csvdate")
 
-   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a", FieldMatcher And "%description" $ toRegex' "b"] [("date","%csvdate")]]}
+   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a", FieldMatcher And "%description" $ toRegex' "b"] [("date","%csvdate")]]}
     in testCase "conditional.with-and" $ hledgerField rules ["a", "b"] "date" @?= (Just "%csvdate")
 
-   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a", FieldMatcher And "%description" $ toRegex' "b", FieldMatcher None "%description" $ toRegex' "c"] [("date","%csvdate")]]}
+   ,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a", FieldMatcher And "%description" $ toRegex' "b", FieldMatcher Or "%description" $ toRegex' "c"] [("date","%csvdate")]]}
     in testCase "conditional.with-and-or" $ hledgerField rules ["_", "c"] "date" @?= (Just "%csvdate")
 
    ]
@@ -1584,9 +1600,9 @@ tests_RulesReader = testGroup "RulesReader" [
           { rcsvfieldindexes=[ ("date",1), ("description",2) ]
           , rassignments=[ ("account2","equity"), ("amount1","1") ]
           -- ConditionalBlocks here are in reverse order: mkrules reverses the list
-          , rconditionalblocks=[ CB { cbMatchers=[FieldMatcher None "%description" (toRegex' "PREFIX (.*) - (.*)")] 
+          , rconditionalblocks=[ CB { cbMatchers=[FieldMatcher Or "%description" (toRegex' "PREFIX (.*) - (.*)")]
                                     , cbAssignments=[("account1","account:\\1:\\2")] }
-                               , CB { cbMatchers=[FieldMatcher None "%description" (toRegex' "PREFIX (.*)")]
+                               , CB { cbMatchers=[FieldMatcher Or "%description" (toRegex' "PREFIX (.*)")]
                                     , cbAssignments=[("account1","account:\\1"), ("comment1","\\1")] }
                                ]
           }

diff --git a/hledger/hledger.m4.md b/hledger/hledger.m4.md
@@ -3693,8 +3693,8 @@ When an if block has multiple matchers, each on its own line,
 - By default they are OR'd (any of them can match).
 - Matcher lines beginning with `&` (and optional space) are AND'ed with the matcher above (all in the AND'ed group must match).
 
-You can't use both `&` and `!` on the same line (you can't AND a negated matcher),
-[currently](https://github.com/simonmichael/hledger/pull/2088#issuecomment-1844200398).
+You can use a negated `!` matcher on a `&` line, meaning AND NOT.
+*Since 1.41.*
 
 ### Match groups
 

diff --git a/hledger/test/csv.test b/hledger/test/csv.test
@@ -884,7 +884,7 @@ start of conditional block found, but no assignment rules afterward
 >=1
 # XXX
 
-# ** 44. handle conditions with & operator
+# ** 44. handle matchers with & prefix
 <
 10/2009/09,Flubber Co,50
 10/2009/09,Blubber Co,50
@@ -908,7 +908,31 @@ $  ./csvtest.sh
 
 >=0
 
-# ** 45. decimal-mark helps parse ambiguous decimals correctly.
+# ** 45. handle matchers with both & and !
+<
+10/2009/09,Flubber Co,50
+10/2009/09,Blubber Co,50
+
+RULES
+fields date, description, amount
+date-format %d/%Y/%m
+currency $
+account1 assets:myacct
+if Flubber
+&!%amount 50
+  account2 acct
+$  ./csvtest.sh
+2009-09-10 Flubber Co
+    assets:myacct              $50
+    income:unknown            $-50
+
+2009-09-10 Blubber Co
+    assets:myacct              $50
+    income:unknown            $-50
+
+>=0
+
+# ** 46. decimal-mark helps parse ambiguous decimals correctly.
 # Here it's one thousand, one.
 <
 2020-01-01,"1,000"
@@ -929,7 +953,7 @@ $  ./csvtest.sh
 
 >=
 
-# ** 46. Again, this time with comma as decimal mark.
+# ** 47. Again, this time with comma as decimal mark.
 # Here it's one, one thousand.
 <
 2020-01-01,"1,000"
@@ -950,7 +974,7 @@ $  ./csvtest.sh
 
 >=
 
-# ** 47. Account aliases work when reading from CSV.
+# ** 48. Account aliases work when reading from CSV.
 <
 2020-01-01,10
 
@@ -964,7 +988,7 @@ $  ./csvtest.sh --alias expenses=FOO
 
 >=
 
-# ** 48. Allow for whitespace in csv amounts
+# ** 49. Allow for whitespace in csv amounts
 <
 2009-09-10,+ $20
 2009-09-10, $ +30
@@ -1000,7 +1024,7 @@ $  ./csvtest.sh
 
 >=0
 
-# ** 49. Handle an entry with all zeros
+# ** 50. Handle an entry with all zeros
 <
 Date;Description;Category;Debit;Credit;Balance
 "2020-01-21","Client card point of sale fee",Fees,"0","0","1068.94"
@@ -1018,7 +1042,7 @@ $  ./csvtest.sh
 
 >=0
 
-# ** 50. Allow unicode field references (#1809)
+# ** 51. Allow unicode field references (#1809)
 <
 Date, Description, Id, Amount
 12/11/2019, Foo, 123, 10.23
@@ -1036,7 +1060,7 @@ $  ./csvtest.sh
 
 >=0
 
-# ** 51. Throw an error when unable to substitute csv templates
+# ** 52. Throw an error when unable to substitute csv templates
 <
 "2021-12-23","caffe_siciliaexpenses:cibo:dolce","-10.5"
 
@@ -1048,7 +1072,7 @@ $  ./csvtest.sh
 >2 /transaction is unbalanced/
 >=1
 
-# ** 52. We can't parse double quotes inside an unquoted field, or other non-RFC4180 data. (#1966)
+# ** 53. We can't parse double quotes inside an unquoted field, or other non-RFC4180 data. (#1966)
 <
 2022-01-01,B"B",C
 RULES
@@ -1057,7 +1081,7 @@ $  ./csvtest.sh
 >2 /unexpected '"'/
 >=1
 
-# ** 53. A top-level skip directive is able to skip lines which would fail to parse as CSV. (#1967)
+# ** 54. A top-level skip directive is able to skip lines which would fail to parse as CSV. (#1967)
 <
 2022-01-01,B"B",C
 RULES
@@ -1066,7 +1090,7 @@ fields date, b, c
 $  ./csvtest.sh
 >=
 
-# ** 54. Empty (zero length) or blank (containing only spaces, tabs, etc.) lines
+# ** 55. Empty (zero length) or blank (containing only spaces, tabs, etc.) lines
 # are skipped automatically, including inner ones; skip's argument
 # counts only the non-empty/blank lines.
 <
@@ -1087,7 +1111,7 @@ $  ./csvtest.sh
 
 >=
 
-# ** 55. Some validation is done on account name assignments; trying to
+# ** 56. Some validation is done on account name assignments; trying to
 # also set an amount there (with 2+ spaces) will be rejected. (#1978)
 <
 2022-01-01,1
@@ -1098,7 +1122,7 @@ $  ./csvtest.sh
 >2 /unexpected space/
 >=1
 
-# ** 56. make sure transaction tags are functional (#2114), including ones on subsequent lines (#2241)
+# ** 57. make sure transaction tags are functional (#2114), including ones on subsequent lines (#2241)
 <
 2020-01-01, 1
 RULES
@@ -1116,7 +1140,7 @@ $  ./csvtest.sh tag:ttag2
 
 >=
 
-# ** 57. and also posting tags, and (primary, yearful) posting dates (#2114, #2241)
+# ** 59. and also posting tags, and (primary, yearful) posting dates (#2114, #2241)
 $  ./csvtest.sh tag:date
 2020-01-01  ; ttag:tval
     ; ttag2:
@@ -1125,7 +1149,7 @@ $  ./csvtest.sh tag:date
 
 >=
 
-# ** 58. handle newlines in an assignment value, without breaking interpolations (#2134)
+# ** 59. handle newlines in an assignment value, without breaking interpolations (#2134)
 <
 2023-01-01,1
 RULES
@@ -1139,7 +1163,7 @@ $  ./csvtest.sh
 
 >=
 
-# ** 59. specify ssv prefix and no extension
+# ** 60. specify ssv prefix and no extension
 <
 12/11/2019;Foo;123;10.23
 RULES
@@ -1152,7 +1176,7 @@ $  ./ssvtest.sh
 
 >=
 
-# ** 60. tabular rules with comments
+# ** 61. tabular rules with comments
 <
 10/2009/09,Flubber Co,50
 10/2009/09,Blubber Co,150