第1步:
根据操作系统在系统上安装tesseract。最新的安装程序可以在https://github.com/UB-Mannheim/tesseract/wiki中找到
步骤2:使用以下方法安装以下依赖项库:pip install pytesseract pip install opencv-python pip install numpy
步骤3:示例代码
import cv2
import numpy as np
import pytesseract
from PIL import Image
from pytesseract import image_to_string
src_path = "C:\\Users\\<user>\\PycharmProjects\\ImageToText\\input\\"
following:
pytesseract.pytesseract.tesseract_cmd = 'C:/Program Files (x86)/Tesseract-
OCR/tesseract'
TESSDATA_PREFIX = 'C:/Program Files (x86)/Tesseract-OCR'
def get_string(img_path):
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
cv2.imwrite(src_path + "removed_noise.png", img)
cv2.imwrite(src_path + "thres.png", img)
result = pytesseract.image_to_string(Image.open(src_path + "thres.png"))
return result
print('--- Start recognize text from image ---')
print(get_string(src_path + "image.png") )
print("------ Done -------")