invoice_reader.py 555 B

1234567891011121314151617181920212223242526
  1. # coding:utf-8
  2. import os
  3. import re
  4. import fitz
  5. file = fitz.open('MOZAIK CONSULTING INV0001145.PDF')
  6. cnt = file.loadPage(0)
  7. head = cnt.searchFor('Ln')[0]
  8. my_word = [w for w in cnt.getTextWords() if fitz.Rect(w[:4]) in head]
  9. print(my_word[0][4])
  10. text = cnt.getText('text')
  11. # print(text[126:])
  12. def search_text(page, word):
  13. found = 0
  14. wlist = page.getTextWords()
  15. for w in wlist:
  16. if word in w[4]:
  17. head = w
  18. found += 1
  19. return found, wlist.index(head)
  20. # print(search_text(cnt, 'Ln'))