Skip to content

Commit

Permalink
imp:csv:if: support & ! (AND NOT)
Browse files Browse the repository at this point in the history
  • Loading branch information
simonmichael committed Dec 4, 2024
1 parent f73888d commit 054a204
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 41 deletions.
60 changes: 38 additions & 22 deletions hledger-lib/Hledger/Read/RulesReader.hs
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,13 @@ type MatchGroupReference = Text
-- | A strptime date parsing pattern, as supported by Data.Time.Format.
type DateFormat = Text

-- | A prefix for a matcher test, either & or none (implicit or).
data MatcherPrefix = And | Not | None
-- | A representation of a matcher's prefix, which indicates how it should be
-- interpreted or combined with other matchers.
data MatcherPrefix =
Or -- ^ no prefix
| And -- ^ &
| Not -- ^ !
| AndNot -- ^ & !
deriving (Show, Eq)

-- | A single test for matching a CSV record, in one way or another.
Expand All @@ -318,6 +323,14 @@ data Matcher =
| FieldMatcher MatcherPrefix CsvFieldReference Regexp -- ^ match if this regexp matches the referenced CSV field's value
deriving (Show, Eq)

matcherPrefix :: Matcher -> MatcherPrefix
matcherPrefix (RecordMatcher prefix _) = prefix
matcherPrefix (FieldMatcher prefix _ _) = prefix

matcherSetPrefix :: MatcherPrefix -> Matcher -> Matcher
matcherSetPrefix p (RecordMatcher _ r) = RecordMatcher p r
matcherSetPrefix p (FieldMatcher _ f r) = FieldMatcher p f r

-- | A conditional block: a set of CSV record matchers, and a sequence
-- of rules which will be enabled only if one or more of the matchers
-- succeeds.
Expand Down Expand Up @@ -682,7 +695,11 @@ fieldmatcherp end = do
matcherprefixp :: CsvRulesParser MatcherPrefix
matcherprefixp = do
lift $ dbgparse 8 "trying matcherprefixp"
(char '&' >> lift skipNonNewlineSpaces >> return And) <|> (char '!' >> lift skipNonNewlineSpaces >> return Not) <|> return None
(do
char '&' >> lift skipNonNewlineSpaces
fromMaybe And <$> optional (char '!' >> lift skipNonNewlineSpaces >> return AndNot))
<|> (char '!' >> lift skipNonNewlineSpaces >> return Not)
<|> return Or

csvfieldreferencep :: CsvRulesParser CsvFieldReference
csvfieldreferencep = do
Expand Down Expand Up @@ -744,7 +761,7 @@ lastCBAssignmentTemplate f = snd . last . filter ((==f).fst) . cbAssignments

maybeNegate :: MatcherPrefix -> Bool -> Bool
maybeNegate Not origbool = not origbool
maybeNegate _ origbool = origbool
maybeNegate _ origbool = origbool

-- | Given the conversion rules, a CSV record and a hledger field name, find
-- either the last applicable `ConditionalBlock`, or the final value template
Expand Down Expand Up @@ -789,7 +806,7 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c
--
matcherMatches :: Matcher -> Bool
matcherMatches = \case
RecordMatcher prefix pat -> maybeNegate prefix $ match pat $ T.intercalate "," record
RecordMatcher prefix pat -> maybeNegate prefix $ match pat $ T.intercalate "," record
FieldMatcher prefix csvfieldref pat -> maybeNegate prefix $ match pat $
fromMaybe (warn "'if %CSVFIELD' should use a name declared with 'fields', or a number" "") $
replaceCsvFieldReference rules record csvfieldref
Expand All @@ -803,14 +820,13 @@ isBlockActive rules record CB{..} = any (all matcherMatches) $ groupedMatchers c
-- D
-- & E
-- => [[A, B], [C], [D, E]]
-- & ! M (and not M) are converted to ! M (not M) within the and groups.
groupedMatchers :: [Matcher] -> [[Matcher]]
groupedMatchers [] = []
groupedMatchers (x:xs) = (x:ys) : groupedMatchers zs
groupedMatchers (m:ms) = (m:ands) : groupedMatchers rest
where
(ys, zs) = span (\y -> matcherPrefix y == And) xs
matcherPrefix :: Matcher -> MatcherPrefix
matcherPrefix (RecordMatcher prefix _) = prefix
matcherPrefix (FieldMatcher prefix _ _) = prefix
(andandnots, rest) = span (\a -> matcherPrefix a `elem` [And, AndNot]) ms
ands = [matcherSetPrefix p a | a <- andandnots, let p = if matcherPrefix a == AndNot then Not else And]

-- | Render a field assignment's template, possibly interpolating referenced
-- CSV field values or match groups. Outer whitespace is removed from interpolated values.
Expand Down Expand Up @@ -1514,12 +1530,12 @@ tests_RulesReader = testGroup "RulesReader" [

,testCase "assignment with empty value" $
parseWithState' defrules rulesp "account1 \nif foo\n account2 foo\n" @?=
(Right (mkrules $ defrules{rassignments = [("account1","")], rconditionalblocks = [CB{cbMatchers=[RecordMatcher None (toRegex' "foo")],cbAssignments=[("account2","foo")]}]}))
(Right (mkrules $ defrules{rassignments = [("account1","")], rconditionalblocks = [CB{cbMatchers=[RecordMatcher Or (toRegex' "foo")],cbAssignments=[("account2","foo")]}]}))
]
,testGroup "conditionalblockp" [
testCase "space after conditional" $
parseWithState' defrules conditionalblockp "if a\n account2 b\n \n" @?=
(Right $ CB{cbMatchers=[RecordMatcher None $ toRegexCI' "a"],cbAssignments=[("account2","b")]})
(Right $ CB{cbMatchers=[RecordMatcher Or $ toRegexCI' "a"],cbAssignments=[("account2","b")]})
],

testGroup "csvfieldreferencep" [
Expand All @@ -1531,16 +1547,16 @@ tests_RulesReader = testGroup "RulesReader" [
,testGroup "matcherp" [

testCase "recordmatcherp" $
parseWithState' defrules matcherp "A A\n" @?= (Right $ RecordMatcher None $ toRegexCI' "A A")
parseWithState' defrules matcherp "A A\n" @?= (Right $ RecordMatcher Or $ toRegexCI' "A A")

,testCase "recordmatcherp.starts-with-&" $
parseWithState' defrules matcherp "& A A\n" @?= (Right $ RecordMatcher And $ toRegexCI' "A A")

,testCase "fieldmatcherp.starts-with-%" $
parseWithState' defrules matcherp "description A A\n" @?= (Right $ RecordMatcher None $ toRegexCI' "description A A")
parseWithState' defrules matcherp "description A A\n" @?= (Right $ RecordMatcher Or $ toRegexCI' "description A A")

,testCase "fieldmatcherp" $
parseWithState' defrules matcherp "%description A A\n" @?= (Right $ FieldMatcher None "%description" $ toRegexCI' "A A")
parseWithState' defrules matcherp "%description A A\n" @?= (Right $ FieldMatcher Or "%description" $ toRegexCI' "A A")

,testCase "fieldmatcherp.starts-with-&" $
parseWithState' defrules matcherp "& %description A A\n" @?= (Right $ FieldMatcher And "%description" $ toRegexCI' "A A")
Expand All @@ -1555,7 +1571,7 @@ tests_RulesReader = testGroup "RulesReader" [

in testCase "toplevel" $ hledgerField rules ["a","b"] "date" @?= (Just "%csvdate")

,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a"] [("date","%csvdate")]]}
,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a"] [("date","%csvdate")]]}
in testCase "conditional" $ hledgerField rules ["a","b"] "date" @?= (Just "%csvdate")

,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher Not "%csvdate" $ toRegex' "a"] [("date","%csvdate")]]}
Expand All @@ -1564,16 +1580,16 @@ tests_RulesReader = testGroup "RulesReader" [
,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1)], rconditionalblocks=[CB [FieldMatcher Not "%csvdate" $ toRegex' "b"] [("date","%csvdate")]]}
in testCase "negated-conditional-true" $ hledgerField rules ["a","b"] "date" @?= (Just "%csvdate")

,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a", FieldMatcher None "%description" $ toRegex' "b"] [("date","%csvdate")]]}
,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a", FieldMatcher Or "%description" $ toRegex' "b"] [("date","%csvdate")]]}
in testCase "conditional-with-or-a" $ hledgerField rules ["a"] "date" @?= (Just "%csvdate")

,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a", FieldMatcher None "%description" $ toRegex' "b"] [("date","%csvdate")]]}
,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a", FieldMatcher Or "%description" $ toRegex' "b"] [("date","%csvdate")]]}
in testCase "conditional-with-or-b" $ hledgerField rules ["_", "b"] "date" @?= (Just "%csvdate")

,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a", FieldMatcher And "%description" $ toRegex' "b"] [("date","%csvdate")]]}
,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a", FieldMatcher And "%description" $ toRegex' "b"] [("date","%csvdate")]]}
in testCase "conditional.with-and" $ hledgerField rules ["a", "b"] "date" @?= (Just "%csvdate")

,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher None "%csvdate" $ toRegex' "a", FieldMatcher And "%description" $ toRegex' "b", FieldMatcher None "%description" $ toRegex' "c"] [("date","%csvdate")]]}
,let rules = mkrules $ defrules{rcsvfieldindexes=[("csvdate",1),("description",2)], rconditionalblocks=[CB [FieldMatcher Or "%csvdate" $ toRegex' "a", FieldMatcher And "%description" $ toRegex' "b", FieldMatcher Or "%description" $ toRegex' "c"] [("date","%csvdate")]]}
in testCase "conditional.with-and-or" $ hledgerField rules ["_", "c"] "date" @?= (Just "%csvdate")

]
Expand All @@ -1584,9 +1600,9 @@ tests_RulesReader = testGroup "RulesReader" [
{ rcsvfieldindexes=[ ("date",1), ("description",2) ]
, rassignments=[ ("account2","equity"), ("amount1","1") ]
-- ConditionalBlocks here are in reverse order: mkrules reverses the list
, rconditionalblocks=[ CB { cbMatchers=[FieldMatcher None "%description" (toRegex' "PREFIX (.*) - (.*)")]
, rconditionalblocks=[ CB { cbMatchers=[FieldMatcher Or "%description" (toRegex' "PREFIX (.*) - (.*)")]
, cbAssignments=[("account1","account:\\1:\\2")] }
, CB { cbMatchers=[FieldMatcher None "%description" (toRegex' "PREFIX (.*)")]
, CB { cbMatchers=[FieldMatcher Or "%description" (toRegex' "PREFIX (.*)")]
, cbAssignments=[("account1","account:\\1"), ("comment1","\\1")] }
]
}
Expand Down
4 changes: 2 additions & 2 deletions hledger/hledger.m4.md
Original file line number Diff line number Diff line change
Expand Up @@ -3693,8 +3693,8 @@ When an if block has multiple matchers, each on its own line,
- By default they are OR'd (any of them can match).
- Matcher lines beginning with `&` (and optional space) are AND'ed with the matcher above (all in the AND'ed group must match).

You can't use both `&` and `!` on the same line (you can't AND a negated matcher),
[currently](https://github.com/simonmichael/hledger/pull/2088#issuecomment-1844200398).
You can use a negated `!` matcher on a `&` line, meaning AND NOT.
*Since 1.41.*

### Match groups

Expand Down
58 changes: 41 additions & 17 deletions hledger/test/csv.test
Original file line number Diff line number Diff line change
Expand Up @@ -884,7 +884,7 @@ start of conditional block found, but no assignment rules afterward
>=1
# XXX

# ** 44. handle conditions with & operator
# ** 44. handle matchers with & prefix
<
10/2009/09,Flubber Co,50
10/2009/09,Blubber Co,50
Expand All @@ -908,7 +908,31 @@ $ ./csvtest.sh

>=0

# ** 45. decimal-mark helps parse ambiguous decimals correctly.
# ** 45. handle matchers with both & and !
<
10/2009/09,Flubber Co,50
10/2009/09,Blubber Co,50

RULES
fields date, description, amount
date-format %d/%Y/%m
currency $
account1 assets:myacct
if Flubber
&!%amount 50
account2 acct
$ ./csvtest.sh
2009-09-10 Flubber Co
assets:myacct $50
income:unknown $-50

2009-09-10 Blubber Co
assets:myacct $50
income:unknown $-50

>=0

# ** 46. decimal-mark helps parse ambiguous decimals correctly.
# Here it's one thousand, one.
<
2020-01-01,"1,000"
Expand All @@ -929,7 +953,7 @@ $ ./csvtest.sh

>=

# ** 46. Again, this time with comma as decimal mark.
# ** 47. Again, this time with comma as decimal mark.
# Here it's one, one thousand.
<
2020-01-01,"1,000"
Expand All @@ -950,7 +974,7 @@ $ ./csvtest.sh

>=

# ** 47. Account aliases work when reading from CSV.
# ** 48. Account aliases work when reading from CSV.
<
2020-01-01,10

Expand All @@ -964,7 +988,7 @@ $ ./csvtest.sh --alias expenses=FOO

>=

# ** 48. Allow for whitespace in csv amounts
# ** 49. Allow for whitespace in csv amounts
<
2009-09-10,+ $20
2009-09-10, $ +30
Expand Down Expand Up @@ -1000,7 +1024,7 @@ $ ./csvtest.sh

>=0

# ** 49. Handle an entry with all zeros
# ** 50. Handle an entry with all zeros
<
Date;Description;Category;Debit;Credit;Balance
"2020-01-21","Client card point of sale fee",Fees,"0","0","1068.94"
Expand All @@ -1018,7 +1042,7 @@ $ ./csvtest.sh

>=0

# ** 50. Allow unicode field references (#1809)
# ** 51. Allow unicode field references (#1809)
<
Date, Description, Id, Amount
12/11/2019, Foo, 123, 10.23
Expand All @@ -1036,7 +1060,7 @@ $ ./csvtest.sh

>=0

# ** 51. Throw an error when unable to substitute csv templates
# ** 52. Throw an error when unable to substitute csv templates
<
"2021-12-23","caffe_siciliaexpenses:cibo:dolce","-10.5"

Expand All @@ -1048,7 +1072,7 @@ $ ./csvtest.sh
>2 /transaction is unbalanced/
>=1

# ** 52. We can't parse double quotes inside an unquoted field, or other non-RFC4180 data. (#1966)
# ** 53. We can't parse double quotes inside an unquoted field, or other non-RFC4180 data. (#1966)
<
2022-01-01,B"B",C
RULES
Expand All @@ -1057,7 +1081,7 @@ $ ./csvtest.sh
>2 /unexpected '"'/
>=1

# ** 53. A top-level skip directive is able to skip lines which would fail to parse as CSV. (#1967)
# ** 54. A top-level skip directive is able to skip lines which would fail to parse as CSV. (#1967)
<
2022-01-01,B"B",C
RULES
Expand All @@ -1066,7 +1090,7 @@ fields date, b, c
$ ./csvtest.sh
>=

# ** 54. Empty (zero length) or blank (containing only spaces, tabs, etc.) lines
# ** 55. Empty (zero length) or blank (containing only spaces, tabs, etc.) lines
# are skipped automatically, including inner ones; skip's argument
# counts only the non-empty/blank lines.
<
Expand All @@ -1087,7 +1111,7 @@ $ ./csvtest.sh

>=

# ** 55. Some validation is done on account name assignments; trying to
# ** 56. Some validation is done on account name assignments; trying to
# also set an amount there (with 2+ spaces) will be rejected. (#1978)
<
2022-01-01,1
Expand All @@ -1098,7 +1122,7 @@ $ ./csvtest.sh
>2 /unexpected space/
>=1

# ** 56. make sure transaction tags are functional (#2114), including ones on subsequent lines (#2241)
# ** 57. make sure transaction tags are functional (#2114), including ones on subsequent lines (#2241)
<
2020-01-01, 1
RULES
Expand All @@ -1116,7 +1140,7 @@ $ ./csvtest.sh tag:ttag2

>=

# ** 57. and also posting tags, and (primary, yearful) posting dates (#2114, #2241)
# ** 59. and also posting tags, and (primary, yearful) posting dates (#2114, #2241)
$ ./csvtest.sh tag:date
2020-01-01 ; ttag:tval
; ttag2:
Expand All @@ -1125,7 +1149,7 @@ $ ./csvtest.sh tag:date

>=

# ** 58. handle newlines in an assignment value, without breaking interpolations (#2134)
# ** 59. handle newlines in an assignment value, without breaking interpolations (#2134)
<
2023-01-01,1
RULES
Expand All @@ -1139,7 +1163,7 @@ $ ./csvtest.sh

>=

# ** 59. specify ssv prefix and no extension
# ** 60. specify ssv prefix and no extension
<
12/11/2019;Foo;123;10.23
RULES
Expand All @@ -1152,7 +1176,7 @@ $ ./ssvtest.sh

>=

# ** 60. tabular rules with comments
# ** 61. tabular rules with comments
<
10/2009/09,Flubber Co,50
10/2009/09,Blubber Co,150
Expand Down

0 comments on commit 054a204

Please sign in to comment.