''' Created on Nov 14, 2019 @author: deeejas ''' import fitz import re class itemList(object): def __init__(self, item_list, code_ind, name_ind): self.item_list = item_list self._code_ind = code_ind self._name_ind = name_ind self.codes = [] for c in self.item_list: c = c.split(1*' '+'\n')[self._code_ind] # 1 for invoice 0 for proforma: TO DO if c != '': i = re.search('(?<=ROSP-)[A-Z0-9]*', c) if i: c = c.replace('ROSP-', '') self.codes.append(c) # print(c) self.names = [] for c in self.item_list: try: c = c.split(1*' '+'\n')[self._name_ind] # 2 for invoice 1 for proforma: TO DO if c != '': # i = re.search('(?<=ROSP-)[0-9]+', c) # if i: # c = c.replace('ROSP-', '') self.names.append('ROLAND ' + c) # print(c) except IndexError: pass self.qnt = [] for c in self.item_list: try: if not isinstance(int(c.split(1*' '+'\n')[3]), int): c = c.split(1*' '+'\n')[4] else: c = c.split(1*' '+'\n')[3] if c != '': self.qnt.append(c) except IndexError: pass except ValueError: c = c.split(1*' '+'\n')[2] if c != '': self.qnt.append(c) # print(c) self.net_val = [] for c in self.item_list: try: if not isinstance(int(c.split(1*' '+'\n')[3]), int): try: c = c.split(1*' '+'\n')[5].replace(',', '.') except IndexError: c = c.split(1*' '+'\n')[4].replace(',', '.') else: c = c.split(1*' '+'\n')[4].replace(',', '.') if c != '': self.net_val.append(c) except IndexError: pass except ValueError: try: c = c.split(1*' '+'\n')[3].replace(',', '.') if c != '': self.net_val.append(c) # print(self.net_val) except IndexError: pass # print(c) # c = c.split(1*' '+'\n')[4].replace(',', '.') # print(self.net_val) self.disc = ['0' for i in range(len(self.item_list)-1)] def __getitem__(self, k): return self.__dict__[k] class atlasInvoice(object): ''' classdocs ''' def __init__(self, invoice): ''' Constructor ''' self.doc = fitz.open(invoice) self.fpage = self.doc.loadPage(0) self.cnt = self.fpage.getText('text') self.items_start = re.search('Price \(€\)', self.cnt) # print(self.items_start) self.items_end = re.search('Total Net Value \(€\)', self.cnt) self.items_area = self.cnt[self.items_start.end()+3:self.items_end.start()].split( 3 * ' ' + '\n') self.items_details = itemList(self.items_area, 1, 2) self.size = len(self.cnt[self.items_start.end()+3:self.items_end.start()].split( 3 * ' ' + '\n')) - 1 if re.search('(?<=INV)[0-9]+', self.cnt): self.inv_nr = re.search('(?<=INV)[0-9]+', self.cnt).group(0) else: self.inv_nr = re.search('(?<=INVDN)[0-9]+', self.cnt).group(0) if re.search('(?<=Date: ).+', self.cnt): self.data = re.search('(?<=Date: ).+', self.cnt).group(0).replace(' ', '').replace('/', '.') else: self.data = re.search('(?<=Date : ).+', self.cnt).group(0).replace(' ', '').replace('/', '.') self.vat_num = re.search('(?<=VAT No: )[A-Z0-9]+', self.cnt).group(0) def __getitem__(self, k): return self.__dict__[k] def __len__(self): return self.size class atlasProforma(object): ''' classdocs ''' def __init__(self, invoice): ''' Constructor ''' self.doc = fitz.open(invoice) self.fpage = self.doc.loadPage(0) self.cnt = self.fpage.getText('text') self.items_start = re.search('Price \(€\)', self.cnt) # print(self.items_start) self.items_end = re.search('Total \(€\)', self.cnt) self.items_area = self.cnt[self.items_start.end()+2:self.items_end.start()].split( 3 * ' ' + '\n') # print(self.items_area) self.items_details = itemList(self.items_area, 0, 1) self.size = len(self.cnt[self.items_start.end()+3:self.items_end.start()].split( 3 * ' ' + '\n')) - 1 if re.search('(?<=INV)[0-9]+', self.cnt): self.inv_nr = re.search('(?<=INV)[0-9]+', self.cnt).group(0) else: self.inv_nr = re.search('(?<=PAX)[0-9]+', self.cnt).group(0) if re.search('(?<=Date: ).+', self.cnt): self.data = re.search('(?<=Date: ).+', self.cnt).group(0).replace(' ', '').replace('/', '.') else: self.data = re.search('(?<=Date : ).+', self.cnt).group(0).replace(' ', '').replace('/', '.') self.vat_num = re.search('(?<=VAT No: )[A-Z0-9]+', self.cnt).group(0) def __getitem__(self, k): return self.__dict__[k] def __len__(self): return self.size