pdf2exc.py 771 B

123456789101112131415161718192021222324
  1. '''Created Dec 28, 2021 Levi'''
  2. import camelot
  3. import pandas as pd
  4. tables = camelot.read_pdf("C:/Users/Levi/Downloads/ikea/ROINV22000000202597.pdf", flavor='stream')
  5. df = tables[0].df
  6. # concat = lambda s1, s2: '{} {}'.format(s1, s2)
  7. # r3 = df.iloc[3].combine(df.iloc[4], func=concat)
  8. # df.iloc[3] = r3
  9. df = df[5:]
  10. # print(df)
  11. columns = ['Cod', 'Denumire', 'Cantitate', 'Pret']
  12. ndf = pd.DataFrame(df[[0, 1, 2, 3]], columns=[0, 1, 2, 3])
  13. ndf.columns = columns
  14. ndf['Denumire'] = ndf['Denumire'].str.upper()
  15. print(ndf)
  16. # doc = fitz.Document("C:/Users/Levi/Downloads/ikea/ROINV22000000202597.pdf")
  17. # page = doc.load_page(1)
  18. # pix = page.get_pixmap(matrix=mat) # render page to an image
  19. # print(page)
  20. # print(pix)
  21. # text = page.get_text('text')
  22. # print(pix.save('out.png'))