Add example for optional clearing of postings

dumbPy · Jun 8, 2024 · 0698811 · 0698811
1 parent 3a25d01
commit 0698811
Showing 12 changed files with 206 additions and 5 deletions.
diff --git a/beancount_import/source/generic_importer_source.py b/beancount_import/source/generic_importer_source.py
@@ -36,7 +36,8 @@ class ImporterSource(DescriptionBasedSource):
     def __init__(self,
                  directory: str,
                  importer: ImporterProtocol,
-                 account: Optional[str]=None, # use None for importers that are not authoritative and would not clear any postings
+                # use None for importers that are not authoritative and would not clear any postings
+                 account: Optional[str]=None,
                  **kwargs) -> None:
         super().__init__(**kwargs)
         self.directory = os.path.expanduser(directory)

diff --git a/examples/README.md b/examples/README.md
@@ -10,3 +10,8 @@ Examples:
   - `fresh`: Example of importing transactions starting with an empty journal.
   - `manually_entered`: Example of importing transactions corresponding to
     existing, manually-entered transactions.
+  - `multiple_imports`: Example of importing same transactions from multiple
+    importers, eg. you receive transaction emails same day while the monthly
+    statement is received at the end of the month. here, the transaction is
+    imported from email but not cleared (by setting `account=None` in run.py)
+    and is cleared only at the end of the month by monthly statement.
diff --git a/examples/data/importers/bank.csv b/examples/data/importers/bank.csv
@@ -1,8 +1,4 @@
 "Date","Description","Amount"
-2020-01-01,by debit card-OTHPG 063441 GOOGLE CLOUD,-1
-2020-01-01,by debit card-OTHPG 063444 GOOGLE CLOUD,-1
-2020-01-02,BULK POSTING- 00000008237 250120 GOOGLE,1
 2020-01-02,ATM-WD Some Random ATM Machine,-500
-2020-01-02,BULK POSTING- 00000008237 250120 GOOGLE,1
 2020-01-05,Transfer to 1234567890123,300
 2020-01-14,Transfer to Amex 431145642232,-30
diff --git a/examples/data/importers/single_transaction_email.html b/examples/data/importers/single_transaction_email.html
@@ -0,0 +1,30 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Tables Example</title>
+</head>
+<body>
+    <b>FooBar Bank Transaction Alert</b>
+    <table>
+        <tr>
+            <th>Account</th>
+        </tr>
+        <tr>
+            <td>********9876</td>
+        </tr>
+    </table>
+    <br>
+    <table>
+        <tr>
+            <th>Date</th>
+            <th>Description</th>
+            <th>Amount</th>
+        </tr>
+        <tr>
+            <td>2020-01-14</td>
+            <td>Cleared Credit Card Bill</td>
+            <td>-30.00</td>
+        </tr>
+    </table>
+</body>
+</html>
diff --git a/examples/multiple_imports/accounts.beancount b/examples/multiple_imports/accounts.beancount
@@ -0,0 +1,5 @@
+1900-01-01 open Assets:FooBarBank EUR
+
+1900-01-01 open Liabilities:Amex-Credit-Card EUR
+
+2020-01-14 open Expenses:Misc EUR
diff --git a/examples/multiple_imports/config.py b/examples/multiple_imports/config.py
@@ -0,0 +1,46 @@
+"""
+This config is where you would initialize your importers with personal info
+like account number or credit card last4 digit.
+
+you may also define CONFIG:List[ImporterProtocol] for other beancount tools like
+bean-identify, bean-file, and other beancount scripts to use
+eg. `bean-identify _config.py ~/Downloads`
+to identify the files that importers defined here can process
+
+beancount-import should have it's own run.py where you invoke the
+`beancount_import.webserver.main` but import the Importer objects from this config
+"""
+from beancount.ingest.importers.csv import Importer as CSVImporter, Col
+from foo_bar_email_importer import FooBarTransactionEmailImporter
+
+my_foobar_bank_importer = CSVImporter({
+                        Col.DATE: 'Date',
+                        Col.NARRATION1: 'Description',
+                        Col.AMOUNT: 'Amount',
+                        },
+                       'Assets:FooBarBank', # account
+                       'EUR', # currency
+                        # regexps used by ImporterProtocol.identify() to identify the correct file
+                       '"Date","Description","Amount"',
+                       )
+
+foobar_email_importer = FooBarTransactionEmailImporter(filing_account='Assets:FooBarBank')
+
+
+my_amex_cc_importer = CSVImporter({
+                        Col.DATE: 'Date',
+                        Col.NARRATION1: 'Description',
+                        Col.AMOUNT: 'Amount',
+                        Col.BALANCE:'Balance'
+                        },
+                       'Liabilities:Amex-Credit-Card', # account
+                       'EUR', # currency
+                        # regexps used by ImporterProtocol.identify() to identify the correct file
+                       ('Date,Description,Amount,Balance',
+                       'Credit.*7890'
+                       ),
+                       skip_lines=1
+                       )
+
+# beancount's scripts use this
+CONFIG = [my_foobar_bank_importer, foobar_email_importer, my_amex_cc_importer]
diff --git a/examples/multiple_imports/foo_bar_email_importer.py b/examples/multiple_imports/foo_bar_email_importer.py
@@ -0,0 +1,53 @@
+"""
+Imports a single transaction from transaction email received.
+The same transaction would also exist in monthly csv statement.
+so this importer does not clear the transaction,
+by setting `self.account=None`
+"""
+
+import re
+from beancount.ingest import importer
+from beancount.core import data, flags
+from pathlib import Path
+from dateutil.parser import parse as date_parse
+
+
+class FooBarTransactionEmailImporter(importer.ImporterProtocol):
+    def __init__(self, filing_account='Assets:FooBarBank'):
+        self._filing_account = filing_account
+        self.account = None
+
+    def identify(self, f):
+        return (
+            f.name.endswith(".html")
+            and re.search(r"FooBar Bank Transaction Alert", Path(f.name).read_text())
+            is not None
+        )
+
+    def extract(self, f, existing_entries=None):
+        pattern = r"<tr>\s*<th>Date</th>\s*<th>Description</th>\s*<th>Amount</th>\s*</tr>\s*<tr>\s*<td>(?P<DATE>.*)</td>\s*<td>(?P<DESCRIPTION>.*)</td>\s*<td>(?P<AMOUNT>.*)</td>\s*</tr>"
+        match = re.search(pattern, Path(f.name).read_text())
+        if not match:
+            return []
+        groups = match.groupdict()
+        txn = data.Transaction(
+            meta=data.new_metadata(f.name, 0),
+            date=date_parse(groups["DATE"]).date(),
+            flag=flags.FLAG_OKAY,
+            payee=None,
+            narration=groups["DESCRIPTION"],
+            tags=set(),
+            links=set(),
+            postings=[
+                data.Posting(
+                    account=self._filing_account,
+                    units= data.Amount(data.D(groups["AMOUNT"]), "EUR"),
+                    cost=None,
+                    price=None,
+                    flag=None,
+                    meta={},
+                )
+            ],
+        )
+        # returns the single transaction imported from the transaction email
+        return [txn]
diff --git a/examples/multiple_imports/ignored.beancount b/examples/multiple_imports/ignored.beancount
diff --git a/examples/multiple_imports/journal.beancount b/examples/multiple_imports/journal.beancount
@@ -0,0 +1,3 @@
+include "accounts.beancount"
+include "transactions.beancount"
+include "prices.beancount"
diff --git a/examples/multiple_imports/prices.beancount b/examples/multiple_imports/prices.beancount
diff --git a/examples/multiple_imports/run.py b/examples/multiple_imports/run.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+
+import glob
+import os
+import json
+import sys
+
+from config import my_foobar_bank_importer, my_amex_cc_importer, foobar_email_importer
+
+
+def run_reconcile(extra_args):
+    import beancount_import.webserver
+
+    journal_dir = os.path.dirname(__file__)
+    data_dir = os.path.join(os.path.dirname(__file__), "..", "data")
+
+    data_sources = [
+        dict(
+            module="beancount_import.source.generic_importer_source",
+            # imports monthly bank statements
+            importer=my_foobar_bank_importer,
+            account="Assets:FooBarBank",
+            directory=os.path.join(data_dir, "importers"),
+        ),
+        dict(
+            module="beancount_import.source.generic_importer_source",
+            # imports individual transactions from email
+            importer=foobar_email_importer,
+            # this importer just imports transactions from email
+            # but does not clear the postings, hence account=None
+            # note than the importer just above this one clears the postings
+            # imported by this importer
+            account=None,
+            directory=os.path.join(data_dir, "importers"),
+        ),
+        dict(
+            module="beancount_import.source.generic_importer_source",
+            # imports monthly credit card statements
+            importer=my_amex_cc_importer,
+            account="Liabilities:Amex-Credit-Card",
+            directory=os.path.join(data_dir, "importers"),
+        ),
+    ]
+
+    beancount_import.webserver.main(
+        extra_args,
+        journal_input=os.path.join(journal_dir, "journal.beancount"),
+        ignored_journal=os.path.join(journal_dir, "ignored.beancount"),
+        default_output=os.path.join(journal_dir, "transactions.beancount"),
+        open_account_output_map=[
+            (".*", os.path.join(journal_dir, "accounts.beancount")),
+        ],
+        balance_account_output_map=[
+            (".*", os.path.join(journal_dir, "accounts.beancount")),
+        ],
+        price_output=os.path.join(journal_dir, "prices.beancount"),
+        data_sources=data_sources,
+    )
+
+
+if __name__ == "__main__":
+    run_reconcile(sys.argv[1:])
diff --git a/examples/multiple_imports/transactions.beancount b/examples/multiple_imports/transactions.beancount