# coding: utf-8 import os import re from contextlib import contextmanager import fitz ''' Created on Jun 6, 2019 @author: deeejas ''' class MyOpSender(object): ''' classdocs ''' w_path = '.' file_ind = 0 @classmethod def set_w_path(cls, w_path): cls.w_path = w_path return cls.w_path @classmethod def set_file_ind(cls, file_ind): cls.file_ind = file_ind return cls.file_ind bt_chk = re.compile('[0-9]*.pdf') def __init__(self, watch_path, bank_dict=None): ''' Constructor ''' self.bank_symbol = bank_dict self.watch_path = watch_path self._pdf_files = [f for f in os.listdir(self.watch_path) if f.endswith('.pdf')] self._pdf_files.sort(key=lambda x: os.stat(os.path.join(self.watch_path, x)).st_mtime, reverse=True) res = self.bt_chk.search([f for f in os.listdir(self.w_path) if f.endswith('.pdf')][self.file_ind]) UNI = {'name': 'dok', 'value': 'Amount', 'after_value': 'Requested processing', 'benef': 'Beneficiary Name', 'after_benef': 'Account No.', 'after_benef_ron': 'Beneficiary Fiscal code', 'date': 'Payment', 'after_date': 'Payer'} BRD = {'name': 'print', 'value': 'Suma', 'after_value': 'X', 'benef': "Beneficiary's account", 'after_benef': 'Beneficiary', 'date': 'Data emiterii ', 'after_date': 'Primit,'} BT = {'name': 'transfer', 'value': 'Amount', 'after_value': 'Currency', 'benef': 'Beneficiary name', 'after_benef': 'Beneficiary account', 'date': 'Transaction date', 'after_date': 'Processing date'} BT_EUR = {'name': f'eur', 'value': 'Amount', 'after_value': 'Currency', 'benef': 'Beneficiary name', 'after_benef': 'Beneficiary account', 'date': 'Transaction date', 'after_date': 'Processing date'} BT_USD = {'name': f'usd', 'value': 'Amount', 'after_value': 'Currency', 'benef': 'Beneficiary name', 'after_benef': 'Beneficiary account', 'date': 'Transaction date', 'after_date': 'Processing date'} self.BANKS = (UNI, BRD, BT_EUR, BT_USD, BT) def get_ops(self): return self._pdf_files @contextmanager def _last_op(self): try: last_op = self._pdf_files[0] yield last_op finally: last_op = None # return self._pdf_files[0] def _wrap_pdf(self, file=None): with self._last_op() as last_file: if file: self.file = fitz.Document(file) else: self.file = fitz.Document(os.path.join(self.watch_path, last_file)) my_page = self.file.load_page(0) op_text = my_page.get_text('text') return op_text.strip() # if file: # self.file = fitz.open(file) # else: # self.file = fitz.open(os.path.join(self.watch_path, self._last_op())) # my_page = self.file.load_page(0) # op_text = my_page.get_text('text') # return op_text.strip() def op_value(self): for bank in self.BANKS: with self._last_op() as last_file: # print(last_file, bank['name']) if bank['name'] in last_file.lower(): amount_ind_start = re.search(bank['value'], self._wrap_pdf()) amount_ind_end = re.search(bank['after_value'], self._wrap_pdf()) return self._wrap_pdf()[amount_ind_start.end():amount_ind_end.start()].strip() def op_date(self): for bank in self.BANKS: with self._last_op() as last_file: if bank['name'] in last_file.lower(): amount_ind_start = re.search(bank['date'], self._wrap_pdf()) amount_ind_end = re.search(bank['after_date'], self._wrap_pdf()) return self._wrap_pdf()[amount_ind_start.end():amount_ind_end.start()].strip() def op_beneficiary(self): for bank in self.BANKS: with self._last_op() as last_file: if bank['name'] in last_file.lower(): if 'print' in last_file: amount_ind_start = re.search(bank['benef'], self._wrap_pdf()) amount_ind_end = re.search(bank['after_benef'] + '(?=\nCUI/CNP / Identity Code)', self._wrap_pdf()) # + '(?=\nCUI/CNP / Identity Code)' return self._wrap_pdf()[amount_ind_start.end():amount_ind_end.start()].strip() else: try: amount_ind_start = re.search(bank['benef'], self._wrap_pdf()) amount_ind_end = re.search(bank['after_benef'], self._wrap_pdf()) return self._wrap_pdf()[amount_ind_start.end():amount_ind_end.start()].strip() except AttributeError: amount_ind_start = re.search(bank['benef'], self._wrap_pdf()) amount_ind_end = re.search(bank['after_benef_ron'], self._wrap_pdf()) if (amount_ind_start and amount_ind_end) is None: raise AttributeError return else: return self._wrap_pdf()[amount_ind_start.end():amount_ind_end.start()].strip() if __name__ == '__main__': op_sender = MyOpSender('.') # print(op_sender._last_op()) # print(op_sender._wrap_pdf()) # print(re.search('Suma transferului ', op_sender._wrap_pdf())) print(op_sender.op_date()) print(op_sender.op_beneficiary()) print(op_sender.op_value()) # with op_sender._last_op() as l_file: # print(op_sender._wrap_pdf(l_file)) # print(re.search("Beneficiary's account", op_sender._wrap_pdf(l_file))) # print(re.search('Beneficiary' + '(?=\nCUI/CNP / Identity Code)', op_sender._wrap_pdf(l_file)))