''' Importer for Hetzner PDF invoices. ''' __copyright__ = 'Copyright (c) 2020 Kristóf Marussy ' __license__ = 'GNU GPLv2' import datetime as dt import logging import re from typing import cast, Iterable, Optional from beancount.core import amount as am, data from beancount.core.amount import Amount from beancount.core.flags import FLAG_OKAY, FLAG_WARNING from beancount.core.number import D from beancount.ingest.cache import _FileMemo as FileMemo from beancount.ingest.importer import ImporterProtocol from pdfminer.high_level import extract_pages from pdfminer.layout import LTPage, LTTextContainer from beancount_extras_kris7t.importers.utils import MISSING_AMOUNT INVOICE_REGEX = re.compile( r'.*Hetzner_(?P\d{4}-\d{2}-\d{2})_(?PR\d+)\.pdf$', re.IGNORECASE) AMOUNT_REGEX = re.compile(r'Amount due: € (?P\d+(\.\d+)?)', re.IGNORECASE) BALANCE_REGEX = re.compile( 'The amount has been charged to the credit balance on your client credit account.', re.IGNORECASE) CARD_REGEX = re.compile( 'The invoice amount will soon be debited from your credit card.', re.IGNORECASE) MIXED_REGEX = re.compile( r'The amount of € (?P\d+(\.\d+)?) has been charged to the credit balance ' + r'on your client credit account. The remaining amount of € (?P\d(\.\d+)?) ' + 'will be debited by credit card in the next few days.', re.IGNORECASE) def _extract_match(pages: Iterable[LTPage], pattern: re.Pattern) -> Optional[re.Match]: for page in pages: for element in page: if isinstance(element, LTTextContainer): text = element.get_text().strip().replace('\n', ' ') if match := pattern.match(text): return match return None class Importer(ImporterProtocol): ''' Importer for Hetzner PDF invoices. ''' _log: logging.Logger _liability: str _credit_balance: str _expense: str def __init__(self, liability: str, credit_balance: str, expense: str): self._log = logging.getLogger(type(self).__qualname__) self._liability = liability self._credit_balance = credit_balance self._expense = expense def identify(self, file: FileMemo) -> bool: return INVOICE_REGEX.match(file.name) is not None def file_name(self, file: FileMemo) -> str: if match := INVOICE_REGEX.match(file.name): number = match.group('number') return f'Hetzner_{number}.pdf' else: raise RuntimeError(f'Not an invoice: {file.name}') def file_account(self, file: FileMemo) -> str: if INVOICE_REGEX.match(file.name) is None: raise RuntimeError(f'Not an invoice: {file.name}') else: return self._liability def file_date(self, file: FileMemo) -> dt.date: if match := INVOICE_REGEX.match(file.name): date_str = match.group('date') return dt.datetime.strptime(date_str, '%Y-%m-%d').date() else: raise RuntimeError(f'Not an invoice: {file.name}') def extract(self, file: FileMemo) -> data.Entries: if match := INVOICE_REGEX.match(file.name): invoice_number = match.group('number') else: self._log.warn('Not an invoice: %s', file.name) return [] date = self.file_date(file) pages = [page for page in cast(Iterable[LTPage], extract_pages(file.name))] if match := _extract_match(pages, AMOUNT_REGEX): number = D(match.group('amount')) assert number is not None amount = Amount(number, 'EUR') else: self._log.warn('Not amount found in %s', file.name) return [] postings = [] flag = FLAG_OKAY if _extract_match(pages, BALANCE_REGEX) is not None: postings.append( data.Posting(self._credit_balance, -amount, None, None, None, None)) elif _extract_match(pages, CARD_REGEX) is not None: postings.append( data.Posting(self._liability, -amount, None, None, None, None)) elif match := _extract_match(pages, MIXED_REGEX): balance_number = D(match.group('balance')) assert balance_number is not None balance_amount = Amount(balance_number, 'EUR') postings.append( data.Posting(self._credit_balance, -balance_amount, None, None, None, None)) card_number = D(match.group('card')) assert card_number is not None card_amount = Amount(card_number, 'EUR') postings.append( data.Posting(self._liability, -card_amount, None, None, None, None)) if am.add(balance_amount, card_amount) != amount: self._log.warn('Payments do not cover total amount in %s', file.name) flag = FLAG_WARNING else: self._log.warn('Unknown payment method in %s', file.name) flag = FLAG_WARNING if flag == FLAG_OKAY: amount = MISSING_AMOUNT postings.append( data.Posting(self._expense, amount, None, None, None, None)) return [ data.Transaction(data.new_metadata(file.name, 0), date, flag, 'Hetzner', 'Invoice', set(), {f'hetzner_{invoice_number}'}, postings) ]