Committed script

Committed script, requirements file
master
zacc806 2023-08-07 18:27:38 +06:00
parent 5114acdda6
commit 82fd443cb2
2 changed files with 28 additions and 0 deletions

9
requirements.txt Normal file
View File

@ -0,0 +1,9 @@
cffi==1.15.1
charset-normalizer==3.2.0
cryptography==41.0.3
packaging==23.1
pdf2image==1.16.3
pdfminer.six==20221105
Pillow==10.0.0
pycparser==2.21
pytesseract==0.3.10

19
some.py Normal file
View File

@ -0,0 +1,19 @@
from pdfminer.high_level import extract_text
import pytesseract
from pdf2image import convert_from_path
file_path = 'Исх. № 0145-07-23 от 13.07.2023г. битум ГПК.pdf'
text = extract_text(file_path)
print(text)
if text.isspace():
# Convert the PDF to a series of images
images = convert_from_path(file_path)
# Extract text from each image
all_text = ""
for img in images:
text = pytesseract.image_to_string(img, lang='rus+eng') # 'rus' is for Russian. 'eng' is for English.
all_text += text
print(all_text)