1234567891011121314151617181920212223242526 |
- # coding:utf-8
- import os
- import re
- import fitz
- file = fitz.open('MOZAIK CONSULTING INV0001145.PDF')
- cnt = file.loadPage(0)
- head = cnt.searchFor('Ln')[0]
- my_word = [w for w in cnt.getTextWords() if fitz.Rect(w[:4]) in head]
- print(my_word[0][4])
- text = cnt.getText('text')
- # print(text[126:])
- def search_text(page, word):
- found = 0
- wlist = page.getTextWords()
- for w in wlist:
- if word in w[4]:
- head = w
- found += 1
- return found, wlist.index(head)
- # print(search_text(cnt, 'Ln'))
|