diff options
Diffstat (limited to 'beancount_extras_kris7t/importers/transferwise/transferwise_json.py')
-rw-r--r-- | beancount_extras_kris7t/importers/transferwise/transferwise_json.py | 377 |
1 files changed, 377 insertions, 0 deletions
diff --git a/beancount_extras_kris7t/importers/transferwise/transferwise_json.py b/beancount_extras_kris7t/importers/transferwise/transferwise_json.py new file mode 100644 index 0000000..c42de90 --- /dev/null +++ b/beancount_extras_kris7t/importers/transferwise/transferwise_json.py | |||
@@ -0,0 +1,377 @@ | |||
1 | ''' | ||
2 | Importer for Transferwise API transaction history. | ||
3 | ''' | ||
4 | __copyright__ = 'Copyright (c) 2020 Kristóf Marussy <kristof@marussy.com>' | ||
5 | __license__ = 'GNU GPLv2' | ||
6 | |||
7 | from collections import defaultdict | ||
8 | import datetime as dt | ||
9 | import json | ||
10 | import logging | ||
11 | import re | ||
12 | from os import path | ||
13 | from typing import Any, Callable, Dict, Iterable, List, NamedTuple, Optional, Set | ||
14 | |||
15 | from beancount.core import account, data | ||
16 | import beancount.core.amount as am | ||
17 | from beancount.core.amount import Amount | ||
18 | from beancount.core.flags import FLAG_WARNING | ||
19 | from beancount.core.inventory import Inventory | ||
20 | from beancount.core.number import ZERO | ||
21 | from beancount.ingest.cache import _FileMemo as FileMemo | ||
22 | from beancount.ingest.importer import ImporterProtocol | ||
23 | |||
24 | import beancount_extras_kris7t.importers.utils as utils | ||
25 | from beancount_extras_kris7t.importers.utils import COMMENT_META, InvalidEntry, PAYEE_META | ||
26 | |||
27 | DATE_FORMAT = '%Y-%m-%dT%H:%M:%SZ' | ||
28 | DATE_FORMAT_FRACTIONAL = '%Y-%m-%dT%H:%M:%S.%fZ' | ||
29 | CATEGORY_META = 'transferwise-category' | ||
30 | ENTRY_TYPE_META = 'transferwise-entry-type' | ||
31 | TRANSFERWISE_JSON_TAG = 'transferwise-json' | ||
32 | CD_DEBIT = 'DEBIT' | ||
33 | CD_CREDIT = 'CREDIT' | ||
34 | MONEY_ADDED_TYPE = 'MONEY_ADDED' | ||
35 | CARD_TYPE = 'CARD' | ||
36 | CARD_REGEX = re.compile(r'^Card transaction of.*issued by', re.IGNORECASE) | ||
37 | |||
38 | |||
39 | def _parse_date(date_str: str) -> dt.date: | ||
40 | # TODO Handle time zones accurately | ||
41 | try: | ||
42 | return utils.parse_date(date_str, DATE_FORMAT) | ||
43 | except InvalidEntry: | ||
44 | return utils.parse_date(date_str, DATE_FORMAT_FRACTIONAL) | ||
45 | |||
46 | |||
47 | def _parse_json_amount(data: Any, cd: Optional[str] = None) -> Amount: | ||
48 | # TODO Handle precision better | ||
49 | amount = Amount(round(utils.parse_number(data['value']), 2), data['currency']) | ||
50 | if cd == CD_DEBIT: | ||
51 | return -amount | ||
52 | else: | ||
53 | return amount | ||
54 | |||
55 | |||
56 | def _validate_cd(amount: Amount, cd: str) -> None: | ||
57 | if cd == CD_DEBIT: | ||
58 | if amount.number >= ZERO: | ||
59 | raise InvalidEntry(f'Invalid debit amount: {amount}') | ||
60 | elif cd == CD_CREDIT: | ||
61 | if amount.number <= ZERO: | ||
62 | raise InvalidEntry(f'Invalid credit amount: {amount}') | ||
63 | else: | ||
64 | raise InvalidEntry(f'Invalid credit/debit type: {cd}') | ||
65 | |||
66 | |||
67 | class Reference(NamedTuple): | ||
68 | type: str | ||
69 | reference_number: str | ||
70 | |||
71 | |||
72 | class _ConversionResult(NamedTuple): | ||
73 | converted_fraction: Amount | ||
74 | price: Optional[Amount] | ||
75 | fudge: Optional[Amount] | ||
76 | |||
77 | |||
78 | class _Conversion(NamedTuple): | ||
79 | native_amount: Amount | ||
80 | converted_amount: Optional[Amount] | ||
81 | |||
82 | def get_fraction(self, | ||
83 | total_converted_amount: Amount, | ||
84 | assigned_amount: Amount, | ||
85 | diverted_fees: Inventory) -> _ConversionResult: | ||
86 | fees = diverted_fees.get_currency_units(self.native_amount.currency) | ||
87 | native_amount = am.add(self.native_amount, fees) | ||
88 | fraction = am.div(assigned_amount, total_converted_amount.number).number | ||
89 | if self.converted_amount: | ||
90 | converted_fraction = am.mul(self.converted_amount, fraction) | ||
91 | price = am.div(native_amount, self.converted_amount.number) | ||
92 | assert price.number is not None | ||
93 | price = price._replace(number=round(price.number, 4)) | ||
94 | # TODO Do we have to calculate the fudge here? | ||
95 | return _ConversionResult(converted_fraction, price, None) | ||
96 | else: | ||
97 | return _ConversionResult(am.mul(native_amount, fraction), None, None) | ||
98 | |||
99 | |||
100 | class Accounts(NamedTuple): | ||
101 | borderless_root_asset: str | ||
102 | fees_expense: str | ||
103 | |||
104 | def get_borderless_account(self, currency: str) -> str: | ||
105 | return account.sep.join([self.borderless_root_asset, currency]) | ||
106 | |||
107 | |||
108 | class Row(utils.Row): | ||
109 | cd: str | ||
110 | _transacted_amount: Amount | ||
111 | date: dt.date | ||
112 | _conversions: List[_Conversion] | ||
113 | _inputs: Inventory | ||
114 | _fees: Inventory | ||
115 | divert_fees: Optional[str] | ||
116 | |||
117 | def __init__(self, reference: Reference, transaction_list: List[Any]): | ||
118 | assert len(transaction_list) >= 1 | ||
119 | first_transaction = transaction_list[0] | ||
120 | details = first_transaction['details'] | ||
121 | entry_type = details.get('type', None) | ||
122 | merchant = details.get('merchant', None) | ||
123 | if merchant: | ||
124 | payee = merchant['name'] | ||
125 | else: | ||
126 | payee = None | ||
127 | comment = details['description'] | ||
128 | super().__init__('<transferwise>', 0, entry_type, payee, comment) | ||
129 | if entry_type: | ||
130 | self.meta[ENTRY_TYPE_META] = entry_type | ||
131 | category = details.get('category', None) | ||
132 | if category: | ||
133 | self.meta[CATEGORY_META] = category | ||
134 | if merchant: | ||
135 | if 'category' in merchant and merchant['category'] == category: | ||
136 | del merchant['category'] | ||
137 | not_null_elements = [str(value).strip() for _, value in merchant.items() if value] | ||
138 | self.meta[PAYEE_META] = '; '.join(not_null_elements) | ||
139 | self.meta[COMMENT_META] = comment | ||
140 | self.tags.add(TRANSFERWISE_JSON_TAG) | ||
141 | self.links.add(f'transferwise_{reference.reference_number}') | ||
142 | self.date = _parse_date(first_transaction['date']) | ||
143 | self.cd = reference.type | ||
144 | self.divert_fees = None | ||
145 | self._conversions = [] | ||
146 | self._inputs = Inventory() | ||
147 | self._fees = Inventory() | ||
148 | self._compute_transacted_amount(transaction_list) | ||
149 | for transaction in transaction_list: | ||
150 | self._add_json_transaction(transaction) | ||
151 | |||
152 | def _compute_transacted_amount(self, transaction_list: List[Any]) -> None: | ||
153 | first_transaction = transaction_list[0] | ||
154 | details = first_transaction['details'] | ||
155 | transacted_json = details.get('amount', None) | ||
156 | if transacted_json: | ||
157 | self._transacted_amount = _parse_json_amount(transacted_json, self.cd) | ||
158 | else: | ||
159 | if len(transaction_list) != 1: | ||
160 | raise InvalidEntry('Cannot determine transaction amount') | ||
161 | if exchange_details := first_transaction.get('exchangeDetails', None): | ||
162 | self._transacted_amount = _parse_json_amount(exchange_details, self.cd) | ||
163 | else: | ||
164 | self._transacted_amount = _parse_json_amount(first_transaction['amount']) | ||
165 | _validate_cd(self._transacted_amount, self.cd) | ||
166 | |||
167 | def _add_json_transaction(self, transaction: Any) -> None: | ||
168 | if exchange := transaction.get('exchangeDetails', None): | ||
169 | native = _parse_json_amount(exchange['fromAmount'], self.cd) | ||
170 | converted = _parse_json_amount(exchange['toAmount'], self.cd) | ||
171 | _validate_cd(converted, self.cd) | ||
172 | else: | ||
173 | native = _parse_json_amount(transaction['amount']) | ||
174 | converted = None | ||
175 | _validate_cd(native, self.cd) | ||
176 | if total_fees := transaction.get('totalFees', None): | ||
177 | fee = -_parse_json_amount(total_fees) | ||
178 | if fee.number > ZERO: | ||
179 | raise InvalidEntry(f'Invalid transaction fee: {fee}') | ||
180 | if fee.number != ZERO: | ||
181 | self._fees.add_amount(fee) | ||
182 | native_after_fees = am.sub(native, fee) | ||
183 | else: | ||
184 | native_after_fees = native | ||
185 | self._conversions.append(_Conversion(native_after_fees, converted)) | ||
186 | self._inputs.add_amount(native) | ||
187 | |||
188 | @property | ||
189 | def transacted_amount(self) -> Amount: | ||
190 | return self._transacted_amount | ||
191 | |||
192 | def _to_transaction(self, accounts: Accounts) -> data.Transaction: | ||
193 | postings = self._get_postings(accounts) | ||
194 | return data.Transaction(self.meta, self.date, self.flag, self.payee, self.comment, | ||
195 | self.tags, self.links, postings) | ||
196 | |||
197 | def _get_postings(self, accounts: Accounts) -> List[data.Posting]: | ||
198 | postings: List[data.Posting] = [] | ||
199 | for units, cost in self._inputs: | ||
200 | assert cost is None | ||
201 | postings.append(data.Posting( | ||
202 | accounts.get_borderless_account(units.currency), units, None, None, None, None)) | ||
203 | if self.divert_fees: | ||
204 | for units, cost in self._fees: | ||
205 | assert cost is None | ||
206 | postings.append(data.Posting( | ||
207 | self.divert_fees, units, None, None, None, None)) | ||
208 | diverted_fees = self._fees | ||
209 | else: | ||
210 | diverted_fees = Inventory() | ||
211 | # Also add the "fudge" amounts to the fees generated by rounding currency conversions. | ||
212 | all_fees = Inventory() | ||
213 | all_fees.add_inventory(self._fees) | ||
214 | for acc, assigned_units in self.postings: | ||
215 | for conversion in self._conversions: | ||
216 | units, price, fudge = conversion.get_fraction( | ||
217 | self._transacted_amount, assigned_units, diverted_fees) | ||
218 | postings.append(data.Posting(acc, -units, None, price, None, None)) | ||
219 | if fudge: | ||
220 | all_fees.add_amount(-fudge) | ||
221 | for units, cost in all_fees: | ||
222 | assert cost is None | ||
223 | postings.append(data.Posting( | ||
224 | accounts.fees_expense, -units, None, None, None, None)) | ||
225 | return postings | ||
226 | |||
227 | |||
228 | Extractor = Callable[[Row], None] | ||
229 | |||
230 | |||
231 | def extract_card_transaction(payment_processors: Dict[str, Optional[str]] = {}) -> Extractor: | ||
232 | regexes = [(re.compile(f'^\\s*{key}\\s*\\*', re.IGNORECASE), value) | ||
233 | for key, value in payment_processors.items()] | ||
234 | |||
235 | def do_extract(row: Row) -> None: | ||
236 | if row.entry_type == CARD_TYPE and CARD_REGEX.search(row.comment): | ||
237 | if row.cd == CD_DEBIT: | ||
238 | row.comment = '' | ||
239 | else: | ||
240 | row.comment = 'Refund' | ||
241 | # Most manually add posting for refunded fees. | ||
242 | row.flag = FLAG_WARNING | ||
243 | if row.payee: | ||
244 | for key, value in regexes: | ||
245 | if match := key.search(row.payee): | ||
246 | if value: | ||
247 | row.tags.add(value) | ||
248 | row.payee = row.payee[match.end():].strip() | ||
249 | return do_extract | ||
250 | |||
251 | |||
252 | def extract_add_money(add_money_asset: str, add_money_fees_asset: str) -> Extractor: | ||
253 | def do_extract(row: Row) -> None: | ||
254 | if row.entry_type == MONEY_ADDED_TYPE: | ||
255 | row.payee = 'Transferwise' | ||
256 | row.divert_fees = add_money_fees_asset | ||
257 | row.assign_to_account(add_money_asset) | ||
258 | return do_extract | ||
259 | |||
260 | |||
261 | class Importer(ImporterProtocol): | ||
262 | _log: logging.Logger | ||
263 | profile_id: int | ||
264 | borderless_account_id: int | ||
265 | currencies: List[str] | ||
266 | accounts: Accounts | ||
267 | _extractors: List[Extractor] | ||
268 | |||
269 | def __init__(self, | ||
270 | profile_id: int, | ||
271 | borderless_account_id: int, | ||
272 | currencies: Iterable[str], | ||
273 | accounts: Accounts, | ||
274 | extractors: List[Extractor]): | ||
275 | self._log = logging.getLogger(type(self).__qualname__) | ||
276 | self.profile_id = profile_id | ||
277 | self.borderless_account_id = borderless_account_id | ||
278 | self.currencies = list(currencies) | ||
279 | self.accounts = accounts | ||
280 | self._extractors = extractors | ||
281 | |||
282 | def _parse_file(self, file: FileMemo) -> Any: | ||
283 | def parse_json(path: str) -> Any: | ||
284 | with open(path, 'r') as json_file: | ||
285 | try: | ||
286 | return json.load(json_file) | ||
287 | except json.JSONDecodeError as exc: | ||
288 | self._log.info('Invalid JSON: %s', path, exc_info=exc) | ||
289 | return None | ||
290 | |||
291 | return file.convert(parse_json) | ||
292 | |||
293 | def identify(self, file: FileMemo) -> bool: | ||
294 | _, extension = path.splitext(file.name) | ||
295 | if extension.lower() != '.json': | ||
296 | return False | ||
297 | contents = self._parse_file(file) | ||
298 | try: | ||
299 | query = contents['query'] | ||
300 | return query['accountId'] == self.borderless_account_id and \ | ||
301 | query['currency'] in self.currencies | ||
302 | except (KeyError, TypeError): | ||
303 | return False | ||
304 | |||
305 | def file_name(self, file: FileMemo) -> str: | ||
306 | return 'statement.json' | ||
307 | |||
308 | def file_account(self, file: FileMemo) -> str: | ||
309 | contents = self._parse_file(file) | ||
310 | try: | ||
311 | currency = contents['query']['currency'] | ||
312 | except (KeyError, TypeError) as exc: | ||
313 | raise ValueError(f'Invalid account statement: {file.name}') from exc | ||
314 | if not isinstance(currency, str): | ||
315 | raise ValueError(f'Invalid account statement: {file.name}') | ||
316 | return self.accounts.get_borderless_account(currency) | ||
317 | |||
318 | def file_date(self, file: FileMemo) -> dt.date: | ||
319 | contents = self._parse_file(file) | ||
320 | try: | ||
321 | date_str = contents['query']['intervalEnd'] | ||
322 | except (KeyError, TypeError) as exc: | ||
323 | raise ValueError(f'Invalid account statement: {file.name}') from exc | ||
324 | if not isinstance(date_str, str): | ||
325 | raise ValueError(f'Invalid account statement: {file.name}') | ||
326 | return _parse_date(date_str) | ||
327 | |||
328 | def extract(self, file: FileMemo) -> data.Entries: | ||
329 | contents = self._parse_file(file) | ||
330 | if contents: | ||
331 | return self.extract_objects([contents]) | ||
332 | else: | ||
333 | return [] | ||
334 | |||
335 | def extract_objects(self, | ||
336 | statements: Iterable[Any], | ||
337 | skip_references: Set[str] = set()) -> data.Entries: | ||
338 | transactions: Dict[Reference, List[Any]] = defaultdict(lambda: []) | ||
339 | for statement in statements: | ||
340 | for transaction in statement['transactions']: | ||
341 | reference_number = transaction['referenceNumber'] | ||
342 | if reference_number in skip_references: | ||
343 | continue | ||
344 | reference = Reference(transaction['type'], reference_number) | ||
345 | transactions[reference].append(transaction) | ||
346 | entries: data.Entries = [] | ||
347 | for reference, transaction_list in transactions.items(): | ||
348 | if not transaction_list: | ||
349 | continue | ||
350 | try: | ||
351 | row = Row(reference, transaction_list) | ||
352 | except (TypeError, KeyError, InvalidEntry) as exc: | ||
353 | self._log.warn('Invalid entry: %s', reference, exc_info=exc) | ||
354 | continue | ||
355 | try: | ||
356 | utils.run_row_extractors(row, self._extractors) | ||
357 | except InvalidEntry as exc: | ||
358 | self._log.warn('Invalid entry: %s', reference, exc_info=exc) | ||
359 | continue | ||
360 | entries.append(row._to_transaction(self.accounts)) | ||
361 | entries.sort(key=lambda entry: entry.date) | ||
362 | if entries: | ||
363 | self._extract_closing_balances(statements, entries) | ||
364 | return entries | ||
365 | |||
366 | def _extract_closing_balances(self, | ||
367 | statements: Iterable[Any], | ||
368 | entries: data.Entries) -> None: | ||
369 | for statement in statements: | ||
370 | query = statement['query'] | ||
371 | end_date = _parse_date(query['intervalEnd']) + dt.timedelta(days=1) | ||
372 | currency = query['currency'] | ||
373 | balance = statement['endOfStatementBalance'] | ||
374 | amount = Amount(utils.parse_number(balance['value']), balance['currency']) | ||
375 | meta = data.new_metadata(f'<transferwise:{currency}>', 0) | ||
376 | account = self.accounts.get_borderless_account(currency) | ||
377 | entries.append(data.Balance(meta, end_date, account, amount, None, None)) | ||