instant_op.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. # coding: utf-8
  2. import os
  3. import re
  4. from contextlib import contextmanager
  5. import fitz
  6. '''
  7. Created on Jun 6, 2019
  8. @author: deeejas
  9. '''
  10. class MyOpSender(object):
  11. '''
  12. classdocs
  13. '''
  14. w_path = '.'
  15. file_ind = 0
  16. @classmethod
  17. def set_w_path(cls, w_path):
  18. cls.w_path = w_path
  19. return cls.w_path
  20. @classmethod
  21. def set_file_ind(cls, file_ind):
  22. cls.file_ind = file_ind
  23. return cls.file_ind
  24. bt_chk = re.compile('[0-9]*.pdf')
  25. def __init__(self, watch_path, bank_dict=None):
  26. '''
  27. Constructor
  28. '''
  29. self.bank_symbol = bank_dict
  30. self.watch_path = watch_path
  31. self._pdf_files = [f for f in os.listdir(self.watch_path) if f.endswith('.pdf')]
  32. self._pdf_files.sort(key=lambda x: os.stat(os.path.join(self.watch_path, x)).st_mtime, reverse=True)
  33. res = self.bt_chk.search([f for f in os.listdir(self.w_path) if f.endswith('.pdf')][self.file_ind])
  34. UNI = {'name': 'dok', 'value': 'Amount', 'after_value': 'Requested processing', 'benef': 'Beneficiary Name',
  35. 'after_benef': 'Account No.', 'after_benef_ron': 'Beneficiary Fiscal code', 'date': 'Payment', 'after_date': 'Payer'}
  36. BRD = {'name': 'print', 'value': 'Suma', 'after_value': 'X', 'benef': "Beneficiary's account",
  37. 'after_benef': 'Beneficiary', 'date': 'Data emiterii ', 'after_date': 'Primit,'}
  38. BT = {'name': 'transfer', 'value': 'Amount', 'after_value': 'Currency', 'benef': 'Beneficiary name',
  39. 'after_benef': 'Beneficiary account', 'date': 'Transaction date', 'after_date': 'Processing date'}
  40. BT_EUR = {'name': f'eur', 'value': 'Amount', 'after_value': 'Currency', 'benef': 'Beneficiary name',
  41. 'after_benef': 'Beneficiary account', 'date': 'Transaction date', 'after_date': 'Processing date'}
  42. BT_USD = {'name': f'usd', 'value': 'Amount', 'after_value': 'Currency', 'benef': 'Beneficiary name',
  43. 'after_benef': 'Beneficiary account', 'date': 'Transaction date', 'after_date': 'Processing date'}
  44. self.BANKS = (UNI,
  45. BRD,
  46. BT_EUR,
  47. BT_USD,
  48. BT)
  49. def get_ops(self):
  50. return self._pdf_files
  51. @contextmanager
  52. def _last_op(self):
  53. try:
  54. last_op = self._pdf_files[0]
  55. yield last_op
  56. finally:
  57. last_op = None
  58. # return self._pdf_files[0]
  59. def _wrap_pdf(self, file=None):
  60. with self._last_op() as last_file:
  61. if file:
  62. self.file = fitz.Document(file)
  63. else:
  64. self.file = fitz.Document(os.path.join(self.watch_path, last_file))
  65. my_page = self.file.load_page(0)
  66. op_text = my_page.get_text('text')
  67. return op_text.strip()
  68. # if file:
  69. # self.file = fitz.open(file)
  70. # else:
  71. # self.file = fitz.open(os.path.join(self.watch_path, self._last_op()))
  72. # my_page = self.file.load_page(0)
  73. # op_text = my_page.get_text('text')
  74. # return op_text.strip()
  75. def op_value(self):
  76. for bank in self.BANKS:
  77. with self._last_op() as last_file:
  78. # print(last_file, bank['name'])
  79. if bank['name'] in last_file.lower():
  80. amount_ind_start = re.search(bank['value'], self._wrap_pdf())
  81. amount_ind_end = re.search(bank['after_value'], self._wrap_pdf())
  82. return self._wrap_pdf()[amount_ind_start.end():amount_ind_end.start()].strip()
  83. def op_date(self):
  84. for bank in self.BANKS:
  85. with self._last_op() as last_file:
  86. if bank['name'] in last_file.lower():
  87. amount_ind_start = re.search(bank['date'], self._wrap_pdf())
  88. amount_ind_end = re.search(bank['after_date'], self._wrap_pdf())
  89. return self._wrap_pdf()[amount_ind_start.end():amount_ind_end.start()].strip()
  90. def op_beneficiary(self):
  91. for bank in self.BANKS:
  92. with self._last_op() as last_file:
  93. if bank['name'] in last_file.lower():
  94. if 'print' in last_file:
  95. amount_ind_start = re.search(bank['benef'], self._wrap_pdf())
  96. amount_ind_end = re.search(bank['after_benef'] + '(?=\nCUI/CNP / Identity Code)', self._wrap_pdf()) # + '(?=\nCUI/CNP / Identity Code)'
  97. return self._wrap_pdf()[amount_ind_start.end():amount_ind_end.start()].strip()
  98. else:
  99. try:
  100. amount_ind_start = re.search(bank['benef'], self._wrap_pdf())
  101. amount_ind_end = re.search(bank['after_benef'], self._wrap_pdf())
  102. return self._wrap_pdf()[amount_ind_start.end():amount_ind_end.start()].strip()
  103. except AttributeError:
  104. amount_ind_start = re.search(bank['benef'], self._wrap_pdf())
  105. amount_ind_end = re.search(bank['after_benef_ron'], self._wrap_pdf())
  106. if (amount_ind_start and amount_ind_end) is None:
  107. raise AttributeError
  108. return
  109. else:
  110. return self._wrap_pdf()[amount_ind_start.end():amount_ind_end.start()].strip()
  111. if __name__ == '__main__':
  112. op_sender = MyOpSender('.')
  113. # print(op_sender._last_op())
  114. # print(op_sender._wrap_pdf())
  115. # print(re.search('Suma transferului ', op_sender._wrap_pdf()))
  116. print(op_sender.op_date())
  117. print(op_sender.op_beneficiary())
  118. print(op_sender.op_value())
  119. # with op_sender._last_op() as l_file:
  120. # print(op_sender._wrap_pdf(l_file))
  121. # print(re.search("Beneficiary's account", op_sender._wrap_pdf(l_file)))
  122. # print(re.search('Beneficiary' + '(?=\nCUI/CNP / Identity Code)', op_sender._wrap_pdf(l_file)))