123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171 |
- import os
- # from xml.etree import ElementTree as et
- import PyPDF2
- import xlsxwriter as xlsw
- import openpyxl as xl
- def filter_files(_ext, path=None):
- '''lists all pdf's in current directory'''
- if path:
- my_files = filter(lambda file: file.endswith(_ext) or
- file.endswith(_ext.upper()), os.listdir(path))
- else:
- my_files = filter(lambda file: file.endswith(_ext) or
- file.endswith(_ext.upper()), os.listdir())
- return my_files
- def add_dict_item(my_dict, **kwargs):
- for k in kwargs:
- t_dict = {k: kwargs[k]}
- my_dict.update(t_dict)
- return my_dict
- def dict_to_list(s_dict, from_ind=None):
- if from_ind is None:
- n_list = [s_dict[i] for i in s_dict]
- else:
- n_list = [s_dict[i] for i in s_dict]
- n_list.pop(from_ind)
- return n_list
- class Type_Error(Exception):
- d_types = ['100', '710']
- def __init__(self, m='Error, the given declaration type is not valid, valid code`s are:'):
- self.type_list = [i for i in Type_Error.d_types]
- self.mess = m + str(self.type_list)
- def __str__(self):
- return self.mess
- class Declaratii(object):
- def __init__(self, d_type='', _ext='', xl_dict={}):
- self.d_type = d_type
- self._ext = _ext
- self.xl_dict = xl_dict
- if d_type not in Type_Error.d_types:
- raise Type_Error()
- @property
- def code_list(self):
- return Type_Error.d_types
- def add_code(self, _code):
- Type_Error.d_types.append(_code)
- @property
- def get_d_type(self):
- return self.d_type
- def init_headers(self):
- self.headers = []
- if self.d_type not in Type_Error.d_types:
- raise Type_Error()
- if self.d_type == Type_Error.d_types[0]:
- self.headers = ['cod_fiscal', 'denumire', 'luna', 'an', 'cod_impozit_1', 'suma_1', 'cod_impozit2', 'suma2']
- elif self.d_type == Type_Error.d_types[1]:
- self.headers = ['cod_fiscal', 'denumire', 'luna', 'an', 'cod_impozit_1', 'suma_i', 'suma_c', 'cod_impozit2', 'suma_i2', 'suma_c2']
- return self.headers
- def init_dict(self, _inp):
- self.d_dict = {}
- if self.d_type not in Type_Error.d_types:
- raise Type_Error()
- if self.d_type == Type_Error.d_types[0]:
- year = 'an_r'
- month = 'luna_r'
- f_code = 'cif'
- c_name = 'denumire'
- cod_0 = 'cod_oblig_0'
- dat_0 = 'suma_plata_0'
- cod_1 = 'cod_oblig_1'
- dat_1 = 'suma_plata_1'
- cont = PyPDF2.PdfFileReader(_inp).getFormTextFields()
- if cod_1 in cont:
- self.d_dict = {cont[f_code]: [cont[c_name], cont[month], cont[year], cont[cod_0], cont[dat_0], cont[cod_1], cont[dat_1]]}
- else:
- self.d_dict = {cont[f_code]: [cont[c_name], cont[month], cont[year], cont[cod_0], cont[dat_0]]}
- return self.d_dict
- elif self.d_type == Type_Error.d_types[1]:
- year = 'an_r'
- month = 'luna_r'
- f_code = 'cif'
- c_name = 'denumire'
- cod_0 = 'cod_oblig_0'
- dat_i0 = 'suma_plata_I0'
- dat_c0 = 'suma_plata_C0'
- cod_1 = 'cod_oblig_1'
- dat_i1 = 'suma_plata_I1'
- dat_c1 = 'suma_plata_C1'
- cont = PyPDF2.PdfFileReader(_inp).getFormTextFields()
- if cod_1 in cont:
- self.d_dict = {cont[f_code]: [cont[c_name], cont[month], cont[year], cont[cod_0], cont[dat_i0], cont[dat_c0],
- cont[cod_1], cont[dat_i1], cont[dat_c1]]}
- else:
- self.d_dict = {cont[f_code]: [cont[c_name], cont[month], cont[year], cont[cod_0], cont[dat_i0], cont[dat_c0]]}
- return self.d_dict
- def gen_xls(self, xl_name, w_sheet_name, seq, headers=[], **kwargs):
- _out = xlsw.Workbook(xl_name + '.xlsx')
- _sheet = _out.add_worksheet(w_sheet_name)
- _row, _col = 1, 0
- headformat = _out.add_format({'bold': True})
- hcol = 0
- for h in headers:
- _sheet.write(1, hcol, h, headformat)
- hcol += 1
- for s in seq:
- for i in self.init_dict(s):
- _row += 1
- _sheet.write(_row, _col, i)
- print(f'write {_row}', i)
- _sheet.write_row(_row, 1, self.init_dict(s)[i])
- for k, a in kwargs.items():
- if k == 'autofilter':
- _sheet.autofilter(a[0], a[1], 100, 100)
- if k == 'freeze':
- _sheet.freeze_panes(a[0], a[1])
- _out.close()
- wb = xl.load_workbook(xl_name + '.xlsx')
- sh = wb.worksheets[0]
- colC = sh['C3':'D{}'.format(sh.max_row)]
- colF = sh['F3':'F{}'.format(sh.max_row)]
- colI = sh['H3':'H{}'.format(sh.max_row)]
- for valuesC, valuesF, valuesI in zip(colC, colF, colI):
- for value in valuesC:
- if isinstance(value.value, str):
- try:
- num_value = float(value.value)
- value.value = num_value
- except ValueError as ve:
- print(str(ve))
- for value in valuesF:
- if isinstance(value.value, str):
- try:
- num_value = float(value.value)
- value.value = num_value
- except ValueError as ve:
- print(str(ve))
- value.number_format = '#,##0.00'
- for value in valuesI:
- if isinstance(value.value, str):
- try:
- num_value = float(value.value)
- value.value = num_value
- except ValueError as ve:
- print(str(ve))
- value.number_format = '#,##0.00'
- # value.number_format = '#,##0'
- wb.save(xl_name + '.xlsx')
- wb.close()
- _decl = Declaratii('100', '.pdf')
- # for i in recipisa.filter_files('.xml'):
- # print(_decl.init_dict(i))
- _decl.gen_xls('D100', 'data', filter_files('.pdf'), _decl.init_headers(),
- autofilter=[1, 0], freeze=[2, 1])
|