import os # from xml.etree import ElementTree as et import PyPDF2 import xlsxwriter as xlsw import openpyxl as xl def filter_files(_ext, path=None): '''lists all pdf's in current directory''' if path: my_files = filter(lambda file: file.endswith(_ext) or file.endswith(_ext.upper()), os.listdir(path)) else: my_files = filter(lambda file: file.endswith(_ext) or file.endswith(_ext.upper()), os.listdir()) return my_files def add_dict_item(my_dict, **kwargs): for k in kwargs: t_dict = {k: kwargs[k]} my_dict.update(t_dict) return my_dict def dict_to_list(s_dict, from_ind=None): if from_ind is None: n_list = [s_dict[i] for i in s_dict] else: n_list = [s_dict[i] for i in s_dict] n_list.pop(from_ind) return n_list class Type_Error(Exception): d_types = ['100', '710'] def __init__(self, m='Error, the given declaration type is not valid, valid code`s are:'): self.type_list = [i for i in Type_Error.d_types] self.mess = m + str(self.type_list) def __str__(self): return self.mess class Declaratii(object): def __init__(self, d_type='', _ext='', xl_dict={}): self.d_type = d_type self._ext = _ext self.xl_dict = xl_dict if d_type not in Type_Error.d_types: raise Type_Error() @property def code_list(self): return Type_Error.d_types def add_code(self, _code): Type_Error.d_types.append(_code) @property def get_d_type(self): return self.d_type def init_headers(self): self.headers = [] if self.d_type not in Type_Error.d_types: raise Type_Error() if self.d_type == Type_Error.d_types[0]: self.headers = ['cod_fiscal', 'denumire', 'luna', 'an', 'cod_impozit_1', 'suma_1', 'cod_impozit2', 'suma2'] elif self.d_type == Type_Error.d_types[1]: self.headers = ['cod_fiscal', 'denumire', 'luna', 'an', 'cod_impozit_1', 'suma_i', 'suma_c', 'cod_impozit2', 'suma_i2', 'suma_c2'] return self.headers def init_dict(self, _inp): self.d_dict = {} if self.d_type not in Type_Error.d_types: raise Type_Error() if self.d_type == Type_Error.d_types[0]: year = 'an_r' month = 'luna_r' f_code = 'cif' c_name = 'denumire' cod_0 = 'cod_oblig_0' dat_0 = 'suma_plata_0' cod_1 = 'cod_oblig_1' dat_1 = 'suma_plata_1' cont = PyPDF2.PdfFileReader(_inp).getFormTextFields() if cod_1 in cont: self.d_dict = {cont[f_code]: [cont[c_name], cont[month], cont[year], cont[cod_0], cont[dat_0], cont[cod_1], cont[dat_1]]} else: self.d_dict = {cont[f_code]: [cont[c_name], cont[month], cont[year], cont[cod_0], cont[dat_0]]} return self.d_dict elif self.d_type == Type_Error.d_types[1]: year = 'an_r' month = 'luna_r' f_code = 'cif' c_name = 'denumire' cod_0 = 'cod_oblig_0' dat_i0 = 'suma_plata_I0' dat_c0 = 'suma_plata_C0' cod_1 = 'cod_oblig_1' dat_i1 = 'suma_plata_I1' dat_c1 = 'suma_plata_C1' cont = PyPDF2.PdfFileReader(_inp).getFormTextFields() if cod_1 in cont: self.d_dict = {cont[f_code]: [cont[c_name], cont[month], cont[year], cont[cod_0], cont[dat_i0], cont[dat_c0], cont[cod_1], cont[dat_i1], cont[dat_c1]]} else: self.d_dict = {cont[f_code]: [cont[c_name], cont[month], cont[year], cont[cod_0], cont[dat_i0], cont[dat_c0]]} return self.d_dict def gen_xls(self, xl_name, w_sheet_name, seq, headers=[], **kwargs): _out = xlsw.Workbook(xl_name + '.xlsx') _sheet = _out.add_worksheet(w_sheet_name) _row, _col = 1, 0 headformat = _out.add_format({'bold': True}) hcol = 0 for h in headers: _sheet.write(1, hcol, h, headformat) hcol += 1 for s in seq: for i in self.init_dict(s): _row += 1 _sheet.write(_row, _col, i) print(f'write {_row}', i) _sheet.write_row(_row, 1, self.init_dict(s)[i]) for k, a in kwargs.items(): if k == 'autofilter': _sheet.autofilter(a[0], a[1], 100, 100) if k == 'freeze': _sheet.freeze_panes(a[0], a[1]) _out.close() wb = xl.load_workbook(xl_name + '.xlsx') sh = wb.worksheets[0] colC = sh['C3':'D{}'.format(sh.max_row)] colF = sh['F3':'F{}'.format(sh.max_row)] colI = sh['H3':'H{}'.format(sh.max_row)] for valuesC, valuesF, valuesI in zip(colC, colF, colI): for value in valuesC: if isinstance(value.value, str): try: num_value = float(value.value) value.value = num_value except ValueError as ve: print(str(ve)) for value in valuesF: if isinstance(value.value, str): try: num_value = float(value.value) value.value = num_value except ValueError as ve: print(str(ve)) value.number_format = '#,##0.00' for value in valuesI: if isinstance(value.value, str): try: num_value = float(value.value) value.value = num_value except ValueError as ve: print(str(ve)) value.number_format = '#,##0.00' # value.number_format = '#,##0' wb.save(xl_name + '.xlsx') wb.close() _decl = Declaratii('100', '.pdf') # for i in recipisa.filter_files('.xml'): # print(_decl.init_dict(i)) _decl.gen_xls('D100', 'data', filter_files('.pdf'), _decl.init_headers(), autofilter=[1, 0], freeze=[2, 1])