Skip to content

Commit

Permalink
Add example for optional clearing of postings
Browse files Browse the repository at this point in the history
  • Loading branch information
Sufiyan Adhikari committed Jun 8, 2024
1 parent 3a25d01 commit 0698811
Showing 12 changed files with 206 additions and 5 deletions.
3 changes: 2 additions & 1 deletion beancount_import/source/generic_importer_source.py
Original file line number Diff line number Diff line change
@@ -36,7 +36,8 @@ class ImporterSource(DescriptionBasedSource):
def __init__(self,
directory: str,
importer: ImporterProtocol,
account: Optional[str]=None, # use None for importers that are not authoritative and would not clear any postings
# use None for importers that are not authoritative and would not clear any postings
account: Optional[str]=None,
**kwargs) -> None:
super().__init__(**kwargs)
self.directory = os.path.expanduser(directory)
5 changes: 5 additions & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
@@ -10,3 +10,8 @@ Examples:
- `fresh`: Example of importing transactions starting with an empty journal.
- `manually_entered`: Example of importing transactions corresponding to
existing, manually-entered transactions.
- `multiple_imports`: Example of importing same transactions from multiple
importers, eg. you receive transaction emails same day while the monthly
statement is received at the end of the month. here, the transaction is
imported from email but not cleared (by setting `account=None` in run.py)
and is cleared only at the end of the month by monthly statement.
4 changes: 0 additions & 4 deletions examples/data/importers/bank.csv
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
"Date","Description","Amount"
2020-01-01,by debit card-OTHPG 063441 GOOGLE CLOUD,-1
2020-01-01,by debit card-OTHPG 063444 GOOGLE CLOUD,-1
2020-01-02,BULK POSTING- 00000008237 250120 GOOGLE,1
2020-01-02,ATM-WD Some Random ATM Machine,-500
2020-01-02,BULK POSTING- 00000008237 250120 GOOGLE,1
2020-01-05,Transfer to 1234567890123,300
2020-01-14,Transfer to Amex 431145642232,-30
30 changes: 30 additions & 0 deletions examples/data/importers/single_transaction_email.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<!DOCTYPE html>
<html>
<head>
<title>Tables Example</title>
</head>
<body>
<b>FooBar Bank Transaction Alert</b>
<table>
<tr>
<th>Account</th>
</tr>
<tr>
<td>********9876</td>
</tr>
</table>
<br>
<table>
<tr>
<th>Date</th>
<th>Description</th>
<th>Amount</th>
</tr>
<tr>
<td>2020-01-14</td>
<td>Cleared Credit Card Bill</td>
<td>-30.00</td>
</tr>
</table>
</body>
</html>
5 changes: 5 additions & 0 deletions examples/multiple_imports/accounts.beancount
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
1900-01-01 open Assets:FooBarBank EUR

1900-01-01 open Liabilities:Amex-Credit-Card EUR

2020-01-14 open Expenses:Misc EUR
46 changes: 46 additions & 0 deletions examples/multiple_imports/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""
This config is where you would initialize your importers with personal info
like account number or credit card last4 digit.
you may also define CONFIG:List[ImporterProtocol] for other beancount tools like
bean-identify, bean-file, and other beancount scripts to use
eg. `bean-identify _config.py ~/Downloads`
to identify the files that importers defined here can process
beancount-import should have it's own run.py where you invoke the
`beancount_import.webserver.main` but import the Importer objects from this config
"""
from beancount.ingest.importers.csv import Importer as CSVImporter, Col
from foo_bar_email_importer import FooBarTransactionEmailImporter

my_foobar_bank_importer = CSVImporter({
Col.DATE: 'Date',
Col.NARRATION1: 'Description',
Col.AMOUNT: 'Amount',
},
'Assets:FooBarBank', # account
'EUR', # currency
# regexps used by ImporterProtocol.identify() to identify the correct file
'"Date","Description","Amount"',
)

foobar_email_importer = FooBarTransactionEmailImporter(filing_account='Assets:FooBarBank')


my_amex_cc_importer = CSVImporter({
Col.DATE: 'Date',
Col.NARRATION1: 'Description',
Col.AMOUNT: 'Amount',
Col.BALANCE:'Balance'
},
'Liabilities:Amex-Credit-Card', # account
'EUR', # currency
# regexps used by ImporterProtocol.identify() to identify the correct file
('Date,Description,Amount,Balance',
'Credit.*7890'
),
skip_lines=1
)

# beancount's scripts use this
CONFIG = [my_foobar_bank_importer, foobar_email_importer, my_amex_cc_importer]
53 changes: 53 additions & 0 deletions examples/multiple_imports/foo_bar_email_importer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""
Imports a single transaction from transaction email received.
The same transaction would also exist in monthly csv statement.
so this importer does not clear the transaction,
by setting `self.account=None`
"""

import re
from beancount.ingest import importer
from beancount.core import data, flags
from pathlib import Path
from dateutil.parser import parse as date_parse


class FooBarTransactionEmailImporter(importer.ImporterProtocol):
def __init__(self, filing_account='Assets:FooBarBank'):
self._filing_account = filing_account
self.account = None

def identify(self, f):
return (
f.name.endswith(".html")
and re.search(r"FooBar Bank Transaction Alert", Path(f.name).read_text())
is not None
)

def extract(self, f, existing_entries=None):
pattern = r"<tr>\s*<th>Date</th>\s*<th>Description</th>\s*<th>Amount</th>\s*</tr>\s*<tr>\s*<td>(?P<DATE>.*)</td>\s*<td>(?P<DESCRIPTION>.*)</td>\s*<td>(?P<AMOUNT>.*)</td>\s*</tr>"
match = re.search(pattern, Path(f.name).read_text())
if not match:
return []
groups = match.groupdict()
txn = data.Transaction(
meta=data.new_metadata(f.name, 0),
date=date_parse(groups["DATE"]).date(),
flag=flags.FLAG_OKAY,
payee=None,
narration=groups["DESCRIPTION"],
tags=set(),
links=set(),
postings=[
data.Posting(
account=self._filing_account,
units= data.Amount(data.D(groups["AMOUNT"]), "EUR"),
cost=None,
price=None,
flag=None,
meta={},
)
],
)
# returns the single transaction imported from the transaction email
return [txn]
Empty file.
3 changes: 3 additions & 0 deletions examples/multiple_imports/journal.beancount
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
include "accounts.beancount"
include "transactions.beancount"
include "prices.beancount"
Empty file.
62 changes: 62 additions & 0 deletions examples/multiple_imports/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/usr/bin/env python3

import glob
import os
import json
import sys

from config import my_foobar_bank_importer, my_amex_cc_importer, foobar_email_importer


def run_reconcile(extra_args):
import beancount_import.webserver

journal_dir = os.path.dirname(__file__)
data_dir = os.path.join(os.path.dirname(__file__), "..", "data")

data_sources = [
dict(
module="beancount_import.source.generic_importer_source",
# imports monthly bank statements
importer=my_foobar_bank_importer,
account="Assets:FooBarBank",
directory=os.path.join(data_dir, "importers"),
),
dict(
module="beancount_import.source.generic_importer_source",
# imports individual transactions from email
importer=foobar_email_importer,
# this importer just imports transactions from email
# but does not clear the postings, hence account=None
# note than the importer just above this one clears the postings
# imported by this importer
account=None,
directory=os.path.join(data_dir, "importers"),
),
dict(
module="beancount_import.source.generic_importer_source",
# imports monthly credit card statements
importer=my_amex_cc_importer,
account="Liabilities:Amex-Credit-Card",
directory=os.path.join(data_dir, "importers"),
),
]

beancount_import.webserver.main(
extra_args,
journal_input=os.path.join(journal_dir, "journal.beancount"),
ignored_journal=os.path.join(journal_dir, "ignored.beancount"),
default_output=os.path.join(journal_dir, "transactions.beancount"),
open_account_output_map=[
(".*", os.path.join(journal_dir, "accounts.beancount")),
],
balance_account_output_map=[
(".*", os.path.join(journal_dir, "accounts.beancount")),
],
price_output=os.path.join(journal_dir, "prices.beancount"),
data_sources=data_sources,
)


if __name__ == "__main__":
run_reconcile(sys.argv[1:])
Empty file.

0 comments on commit 0698811

Please sign in to comment.