123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148 |
- # coding: utf-8
- import os
- import re
- from contextlib import contextmanager
- import fitz
- '''
- Created on Jun 6, 2019
- @author: deeejas
- '''
- class MyOpSender(object):
- '''
- classdocs
- '''
- w_path = '.'
- file_ind = 0
- @classmethod
- def set_w_path(cls, w_path):
- cls.w_path = w_path
- return cls.w_path
- @classmethod
- def set_file_ind(cls, file_ind):
- cls.file_ind = file_ind
- return cls.file_ind
- bt_chk = re.compile('[0-9]*.pdf')
- def __init__(self, watch_path, bank_dict=None):
- '''
- Constructor
- '''
- self.bank_symbol = bank_dict
- self.watch_path = watch_path
- self._pdf_files = [f for f in os.listdir(self.watch_path) if f.endswith('.pdf')]
- self._pdf_files.sort(key=lambda x: os.stat(os.path.join(self.watch_path, x)).st_mtime, reverse=True)
- res = self.bt_chk.search([f for f in os.listdir(self.w_path) if f.endswith('.pdf')][self.file_ind])
- UNI = {'name': 'dok', 'value': 'Amount', 'after_value': 'Requested processing', 'benef': 'Beneficiary Name',
- 'after_benef': 'Account No.', 'after_benef_ron': 'Beneficiary Fiscal code', 'date': 'Payment', 'after_date': 'Payer'}
- BRD = {'name': 'print', 'value': 'Suma', 'after_value': 'X', 'benef': "Beneficiary's account",
- 'after_benef': 'Beneficiary', 'date': 'Data emiterii ', 'after_date': 'Primit,'}
- BT = {'name': 'transfer', 'value': 'Amount', 'after_value': 'Currency', 'benef': 'Beneficiary name',
- 'after_benef': 'Beneficiary account', 'date': 'Transaction date', 'after_date': 'Processing date'}
-
- BT_EUR = {'name': f'eur', 'value': 'Amount', 'after_value': 'Currency', 'benef': 'Beneficiary name',
- 'after_benef': 'Beneficiary account', 'date': 'Transaction date', 'after_date': 'Processing date'}
-
- BT_USD = {'name': f'usd', 'value': 'Amount', 'after_value': 'Currency', 'benef': 'Beneficiary name',
- 'after_benef': 'Beneficiary account', 'date': 'Transaction date', 'after_date': 'Processing date'}
- self.BANKS = (UNI,
- BRD,
- BT_EUR,
- BT_USD,
- BT)
- def get_ops(self):
- return self._pdf_files
- @contextmanager
- def _last_op(self):
- try:
- last_op = self._pdf_files[0]
- yield last_op
- finally:
- last_op = None
- # return self._pdf_files[0]
- def _wrap_pdf(self, file=None):
- with self._last_op() as last_file:
- if file:
- self.file = fitz.Document(file)
- else:
- self.file = fitz.Document(os.path.join(self.watch_path, last_file))
- my_page = self.file.load_page(0)
- op_text = my_page.get_text('text')
- return op_text.strip()
- # if file:
- # self.file = fitz.open(file)
- # else:
- # self.file = fitz.open(os.path.join(self.watch_path, self._last_op()))
- # my_page = self.file.load_page(0)
- # op_text = my_page.get_text('text')
- # return op_text.strip()
- def op_value(self):
- for bank in self.BANKS:
- with self._last_op() as last_file:
- # print(last_file, bank['name'])
- if bank['name'] in last_file.lower():
- amount_ind_start = re.search(bank['value'], self._wrap_pdf())
- amount_ind_end = re.search(bank['after_value'], self._wrap_pdf())
- return self._wrap_pdf()[amount_ind_start.end():amount_ind_end.start()].strip()
-
- def op_date(self):
- for bank in self.BANKS:
- with self._last_op() as last_file:
- if bank['name'] in last_file.lower():
- amount_ind_start = re.search(bank['date'], self._wrap_pdf())
- amount_ind_end = re.search(bank['after_date'], self._wrap_pdf())
- return self._wrap_pdf()[amount_ind_start.end():amount_ind_end.start()].strip()
- def op_beneficiary(self):
- for bank in self.BANKS:
- with self._last_op() as last_file:
- if bank['name'] in last_file.lower():
- if 'print' in last_file:
- amount_ind_start = re.search(bank['benef'], self._wrap_pdf())
- amount_ind_end = re.search(bank['after_benef'] + '(?=\nCUI/CNP / Identity Code)', self._wrap_pdf()) # + '(?=\nCUI/CNP / Identity Code)'
- return self._wrap_pdf()[amount_ind_start.end():amount_ind_end.start()].strip()
- else:
- try:
- amount_ind_start = re.search(bank['benef'], self._wrap_pdf())
- amount_ind_end = re.search(bank['after_benef'], self._wrap_pdf())
- return self._wrap_pdf()[amount_ind_start.end():amount_ind_end.start()].strip()
- except AttributeError:
- amount_ind_start = re.search(bank['benef'], self._wrap_pdf())
- amount_ind_end = re.search(bank['after_benef_ron'], self._wrap_pdf())
- if (amount_ind_start and amount_ind_end) is None:
- raise AttributeError
- return
- else:
- return self._wrap_pdf()[amount_ind_start.end():amount_ind_end.start()].strip()
- if __name__ == '__main__':
- op_sender = MyOpSender('.')
- # print(op_sender._last_op())
- # print(op_sender._wrap_pdf())
- # print(re.search('Suma transferului ', op_sender._wrap_pdf()))
- print(op_sender.op_date())
- print(op_sender.op_beneficiary())
- print(op_sender.op_value())
- # with op_sender._last_op() as l_file:
- # print(op_sender._wrap_pdf(l_file))
- # print(re.search("Beneficiary's account", op_sender._wrap_pdf(l_file)))
- # print(re.search('Beneficiary' + '(?=\nCUI/CNP / Identity Code)', op_sender._wrap_pdf(l_file)))
|