''' Created on 3 Jun 2024 @author: vnc-console ''' import unittest import re import os import datetime from jinja2 import (Environment, PackageLoader) import pymupdf # noqa import pandas as pd from py_mentor_imports import (convert_date_format, get_last_month, BoltSaleInvoice, Partner) class Test(unittest.TestCase): templates = Environment(loader=PackageLoader('py_mentor_imports', 'templates')) template = templates.get_template('bolt_sale_invoice.txt') template2 = templates.get_template('partners.txt') code_string = r'(?<=Cod TVA: )[a-zA-Z0-9]*|(?<=VAT no.: )[a-zA-Z0-9]*|(?<=Reg. code: )[a-zA-Z0-9]*|(?<=Cod unic de înregistrare: )[a-zA-Z0-9]*' reg_string = r'(?<=Reg. code: )[a-zA-Z0-9]*' name_string = r'[a-zA-Z0-9. ]*(?=\nRidewave SRL)' value_string = r'(?<=otal \(RON\):\n)[a-zA-Z0-9.]*' docn_string = r'(?<=Factura nr. )[a-zA-Z\n0-9-]*|(?<=Invoice no. )[a-zA-Z\n0-9-]*' note_string = r'(?<=Pornire: )[a-zA-Z \n,.\(\):0-9-]*|(?<=Start: )[a-zA-Z \n,.\(\):0-9-]*' date_string = r'(?<=Dată: )[0-9.]*|(?<=Date: )[0-9.]*' def test_bolt_inv(self) -> None: names = [] codes = [] values = [] numbers = [] notes = [] dates = [] for root, _dir, files in os.walk(r'f:\Gdrive\Misc\Dosar-Contabilitate\Ridewave\2024_04\e-Fact'): for file in files: if file.endswith('.pdf'): with pymupdf.open(os.path.join(root, file)) as doc: page = doc[0] texts = [] txt = page.get_text() texts.append(txt) # print(texts) buyer_name = re.search(self.name_string, txt) buyer_code = re.search(self.code_string, txt).group() # noqa E:if match None if buyer_code == '49340170': buyer_code = input(f'enter code manually for {buyer_name.group()}') # noqa E:if match None elif buyer_code.startswith('J'): buyer_code = re.search(r'(?<=Cod TVA: )[a-zA-Z0-9]*|(?<=VAT no.: )[a-zA-Z0-9]*', txt).group() # noqa E:if match None elif buyer_code == '14532901': buyer_code = '42717800' total_value = re.search(self.value_string, txt) doc_number = re.search(self.docn_string, txt) doc_number = doc_number.group().strip().replace('\n', '')[-3:] # noqa E:if match None doc_notes = re.search(self.note_string, txt) doc_notes = doc_notes.group().strip().replace('\n', '') # noqa E:if match None doc_data = re.search(self.date_string, txt) names.append(buyer_name.group()) # noqa E:if match None codes.append(buyer_code) values.append(total_value.group()) # noqa E:if match None numbers.append(doc_number) notes.append(doc_notes) dates.append(doc_data.group()) # noqa E:if match None # print(total_value.group(), buyer_code.group(), doc_number) data = {'dates': dates, 'names': names, 'codes': codes, 'values': values, 'numbers': numbers, 'notes': notes} df = pd.DataFrame(data) # print(df) invoices = [] partners = [] for date, name, code, value, number, note in zip(df['dates'], df['names'], df['codes'], df['values'], df['numbers'], df['notes']): invoice = BoltSaleInvoice(doc_data=date, buyer_name=name, buyer_code=code, total_value=value, doc_number=number, doc_notes=note) invoices.append(invoice) partner = Partner(vat_code=code, name=name) partners.append(partner) data = {'AnLucru': datetime.date.today().year, 'LunaLucru': get_last_month(0), 'TotalFacturi': len(df), 'documents': invoices, 'Code': 'TRANSPMF' } data2 = {'partners': partners} output = self.template.render(data) output2 = self.template2.render(data2) print(output) home = os.path.expanduser('~') downloads_dir = os.path.join(home, 'Downloads', 'mentor', 'import', 'RIDEWAVE') output_file = os.path.join(downloads_dir, 'facturi.txt') output_file2 = os.path.join(downloads_dir, 'PARTNER.txt') with open(output_file, 'w') as text: text.write(output) with open(output_file2, 'w') as text: # output2.encode('utf-8') text.write(output2) if __name__ == "__main__": # unittest.main() Test().test_bolt_inv()