提取文本OpenCV

148

我试图在图像中找到文本的边界框，并且目前正在使用这种方法：

// calculate the local variances of the grayscale image
Mat t_mean, t_mean_2;
Mat grayF;
outImg_gray.convertTo(grayF, CV_32F);
int winSize = 35;
blur(grayF, t_mean, cv::Size(winSize,winSize));
blur(grayF.mul(grayF), t_mean_2, cv::Size(winSize,winSize));
Mat varMat = t_mean_2 - t_mean.mul(t_mean);
varMat.convertTo(varMat, CV_8U);

// threshold the high variance regions
Mat varMatRegions = varMat > 100;

当给出这样的图像时：

在此处输入图片说明

然后当我显示varMatRegions我得到此图像：

在此处输入图片说明

如您所见，它在某种程度上将文本的左侧部分与卡的标题结合在一起，对于大多数卡而言，此方法效果很好，但在繁忙的卡上会引起问题。

这些轮廓连接不好的原因是，它使轮廓的边界框几乎占据了整个卡。

谁能建议我以其他方式找到文本以确保正确检测文本？

凡在这两张卡片上方都能找到文字的人，可获得200分。

在此处输入图片说明

— 夹
source

1

我在这里看到的最简单方法是......让这些区域之前增加对比度

— 帕维尔Stawarz

3

很酷的问题。感谢您发布并托管赏金，以确保获得如此有趣的回复。

— 杰夫2014年

编程新手。是否可以用除梵语等英语以外的其他文字处理相同的内容？

— Vamshi Krishna

127

您可以通过查找边缘元素（来自LPD）来检测文本：

#include "opencv2/opencv.hpp"

std::vector<cv::Rect> detectLetters(cv::Mat img)
{
    std::vector<cv::Rect> boundRect;
    cv::Mat img_gray, img_sobel, img_threshold, element;
    cvtColor(img, img_gray, CV_BGR2GRAY);
    cv::Sobel(img_gray, img_sobel, CV_8U, 1, 0, 3, 1, 0, cv::BORDER_DEFAULT);
    cv::threshold(img_sobel, img_threshold, 0, 255, CV_THRESH_OTSU+CV_THRESH_BINARY);
    element = getStructuringElement(cv::MORPH_RECT, cv::Size(17, 3) );
    cv::morphologyEx(img_threshold, img_threshold, CV_MOP_CLOSE, element); //Does the trick
    std::vector< std::vector< cv::Point> > contours;
    cv::findContours(img_threshold, contours, 0, 1); 
    std::vector<std::vector<cv::Point> > contours_poly( contours.size() );
    for( int i = 0; i < contours.size(); i++ )
        if (contours[i].size()>100)
        { 
            cv::approxPolyDP( cv::Mat(contours[i]), contours_poly[i], 3, true );
            cv::Rect appRect( boundingRect( cv::Mat(contours_poly[i]) ));
            if (appRect.width>appRect.height) 
                boundRect.push_back(appRect);
        }
    return boundRect;
}

用法：

int main(int argc,char** argv)
{
    //Read
    cv::Mat img1=cv::imread("side_1.jpg");
    cv::Mat img2=cv::imread("side_2.jpg");
    //Detect
    std::vector<cv::Rect> letterBBoxes1=detectLetters(img1);
    std::vector<cv::Rect> letterBBoxes2=detectLetters(img2);
    //Display
    for(int i=0; i< letterBBoxes1.size(); i++)
        cv::rectangle(img1,letterBBoxes1[i],cv::Scalar(0,255,0),3,8,0);
    cv::imwrite( "imgOut1.jpg", img1);  
    for(int i=0; i< letterBBoxes2.size(); i++)
        cv::rectangle(img2,letterBBoxes2[i],cv::Scalar(0,255,0),3,8,0);
    cv::imwrite( "imgOut2.jpg", img2);  
    return 0;
}

结果：

一个。element = getStructuringElement（cv :: MORPH_RECT，cv :: Size（17，3））; imgOut1 imgOut2

b。元素= getStructuringElement（cv :: MORPH_RECT，cv :: Size（30，30））; imgOut1 imgOut2

对于提到的其他图像，结果相似。

— 洛娃·比尔
source

6

车牌检测器。

— LovaBill 2014年

2

对于某些卡片，边框不会包含所有文本，例如半个字母被截断。例如这张卡：i.imgur.com/tX3XrwH.jpg 如何扩展每个边界框的高度和宽度n？感谢您的解决方案，效果很好！

— 剪辑

4

说cv::Rect a;。由n放大a.x-=n/2;a.y-=n/2;a.width+=n;a.height+=n;。

— LovaBill 2014年

2

嗨，我如何使用python cv2获得相同的结果？

— dnth 2015年

3

书。代码。

— LovaBill '16

128

我在下面的程序中使用了基于梯度的方法。添加了结果图像。请注意，我正在使用图像的缩小版本进行处理。

C ++版本

The MIT License (MIT)

Copyright (c) 2014 Dhanushka Dangampola

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

#include "stdafx.h"

#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <iostream>

using namespace cv;
using namespace std;

#define INPUT_FILE              "1.jpg"
#define OUTPUT_FOLDER_PATH      string("")

int _tmain(int argc, _TCHAR* argv[])
{
    Mat large = imread(INPUT_FILE);
    Mat rgb;
    // downsample and use it for processing
    pyrDown(large, rgb);
    Mat small;
    cvtColor(rgb, small, CV_BGR2GRAY);
    // morphological gradient
    Mat grad;
    Mat morphKernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
    morphologyEx(small, grad, MORPH_GRADIENT, morphKernel);
    // binarize
    Mat bw;
    threshold(grad, bw, 0.0, 255.0, THRESH_BINARY | THRESH_OTSU);
    // connect horizontally oriented regions
    Mat connected;
    morphKernel = getStructuringElement(MORPH_RECT, Size(9, 1));
    morphologyEx(bw, connected, MORPH_CLOSE, morphKernel);
    // find contours
    Mat mask = Mat::zeros(bw.size(), CV_8UC1);
    vector<vector<Point>> contours;
    vector<Vec4i> hierarchy;
    findContours(connected, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));
    // filter contours
    for(int idx = 0; idx >= 0; idx = hierarchy[idx][0])
    {
        Rect rect = boundingRect(contours[idx]);
        Mat maskROI(mask, rect);
        maskROI = Scalar(0, 0, 0);
        // fill the contour
        drawContours(mask, contours, idx, Scalar(255, 255, 255), CV_FILLED);
        // ratio of non-zero pixels in the filled region
        double r = (double)countNonZero(maskROI)/(rect.width*rect.height);

        if (r > .45 /* assume at least 45% of the area is filled if it contains text */
            && 
            (rect.height > 8 && rect.width > 8) /* constraints on region size */
            /* these two conditions alone are not very robust. better to use something 
            like the number of significant peaks in a horizontal projection as a third condition */
            )
        {
            rectangle(rgb, rect, Scalar(0, 255, 0), 2);
        }
    }
    imwrite(OUTPUT_FOLDER_PATH + string("rgb.jpg"), rgb);

    return 0;
}

python版本

The MIT License (MIT)

Copyright (c) 2017 Dhanushka Dangampola

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

import cv2
import numpy as np

large = cv2.imread('1.jpg')
rgb = cv2.pyrDown(large)
small = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)

kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
grad = cv2.morphologyEx(small, cv2.MORPH_GRADIENT, kernel)

_, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1))
connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
# using RETR_EXTERNAL instead of RETR_CCOMP
contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
#For opencv 3+ comment the previous line and uncomment the following line
#_, contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

mask = np.zeros(bw.shape, dtype=np.uint8)

for idx in range(len(contours)):
    x, y, w, h = cv2.boundingRect(contours[idx])
    mask[y:y+h, x:x+w] = 0
    cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
    r = float(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h)

    if r > 0.45 and w > 8 and h > 8:
        cv2.rectangle(rgb, (x, y), (x+w-1, y+h-1), (0, 255, 0), 2)

cv2.imshow('rects', rgb)

在此处输入图片说明

— 达努什卡
source

3

我只是看看他的方法。我看到的主要区别是他使用的是Sobel滤波器，而我使用的是形态梯度滤波器。我认为形态过滤器和下采样会使大部分不太坚固的边缘变平。索贝尔可能会听到更多的声音。

— dhanushka

1

@ascenator将OTSU与阈值类型结合使用时，它将使用Otsu的阈值而不是指定的阈值。看这里。

— dhanushka '16

1

@VishnuJayanand您只需要对进行缩放即可rect。还有一个pyrdown，所以乘x, y, width, height的rect由4

— dhanushka

1

您能否为我们提供第三个条件：水平投影或至少一些引线中的显着峰数。

— ISlimani

2

@DforTye取填充轮廓的水平投影（cv :: reduce），然后对其设定阈值（例如，使用均值或中值高度）。如果可视化此结果，它将看起来像条形码。我认为当时我正在考虑计算小节的数量，并对其施加阈值。现在，我认为，如果该区域足够干净，我们可以将其馈送到OCR并获得每个检测到的字符的置信度，以确保该区域包含文本，这也可能会有所帮助。

— dhanushka

51

这是我用来检测文本块的另一种方法：

将图像转换为灰度
应用的阈值（简单的二进制阈值，以150的精选值作为阈值）
应用膨胀来加粗图像中的线条，从而使对象更紧凑，空白碎片更少。对迭代次数使用了较高的值，因此扩展非常繁琐（13次迭代，也为获得最佳结果而精心挑选）。
使用opencv findContours函数识别结果图像中对象的轮廓。
绘制一个包围每个轮廓对象的边界框（矩形）-每个框都构成一个文本块。
给定大小，可以选择丢弃不大可能成为您要搜索的对象的区域（例如文本块），因为上述算法还可以找到相交或嵌套的对象（例如第一张卡片的整个顶部区域），其中一些可能是对您的目的无趣。

下面是用pyopencv用python编写的代码，应该很容易移植到C ++。

import cv2

image = cv2.imread("card.png")
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY) # grayscale
_,thresh = cv2.threshold(gray,150,255,cv2.THRESH_BINARY_INV) # threshold
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS,(3,3))
dilated = cv2.dilate(thresh,kernel,iterations = 13) # dilate
_, contours, hierarchy = cv2.findContours(dilated,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE) # get contours

# for each contour found, draw a rectangle around it on original image
for contour in contours:
    # get rectangle bounding contour
    [x,y,w,h] = cv2.boundingRect(contour)

    # discard areas that are too large
    if h>300 and w>300:
        continue

    # discard areas that are too small
    if h<40 or w<40:
        continue

    # draw rectangle around contour on original image
    cv2.rectangle(image,(x,y),(x+w,y+h),(255,0,255),2)

# write original image with added contours to disk  
cv2.imwrite("contoured.jpg", image)

原始图片是您帖子中的第一张图片。

经过预处理（灰度，阈值和扩张-在步骤3之后），图像如下所示：

图像放大

下面是结果图像（最后一行中的“ contoured.jpg”）；图像中对象的最终边界框如下所示：

在此处输入图片说明

您可以看到左侧的文本块被检测为一个单独的块，与其周围的区域分隔开来。

使用具有相同参数的相同脚本（除了阈值类型已针对第二张图像进行了更改，如下所述），这是其他两张卡的结果：

在此处输入图片说明

调整参数

针对该图像和该任务（查找文本块）优化了参数（阈值，膨胀参数），并且可以根据需要针对其他卡片图像或其他类型的对象进行调整。

对于阈值化（步骤2），我使用了黑色阈值。对于文本比背景浅的图像（例如帖子中的第二张图像），应使用白色阈值，因此将保留类型替换为cv2.THRESH_BINARY）。对于第二张图像，我还使用了稍高的阈值（180）。改变阈值的参数和用于扩张的迭代次数将导致在界定图像中的物体时具有不同程度的灵敏度。

查找其他对象类型：

例如，将第一个图像中的膨胀减少到5次迭代可以使我们对图像中的对象进行更精细的划界，从而大致找到图像中的所有单词（而不是文本块）：

在此处输入图片说明

知道单词的粗略大小后，在这里我丢弃了太小（宽度或高度小于20像素）或太大（宽度或高度大于100像素）而无法忽略不太可能是单词的对象的区域，以得到结果上图。

— 安娜娜
source

2

你真厉害！我会在早上尝试。

— 剪辑

我添加了另一个步骤，丢弃不感兴趣的对象；还添加了用于识别单词或其他类型的对象（而不是文本块）的

— 示例

感谢您提供详细的答案，但是我在中遇到错误cv2.findContours。它说ValueError: too many values to unpack。

— 阿比吉斯（Abhijith）'17

1

问题是该函数cv2.findContours返回3个参数，和原来的代码仅捕获2

— 阿济斯

@Abhijith cv2在第二个版本中返回了两个参数，但是现在，在第三个版本中，它返回了3

— Tomasz Giba，

27

@dhanushka的方法显示出最大的希望，但我想在Python中玩转，所以继续进行翻译以求有趣：

import cv2
import numpy as np
from cv2 import boundingRect, countNonZero, cvtColor, drawContours, findContours, getStructuringElement, imread, morphologyEx, pyrDown, rectangle, threshold

large = imread(image_path)
# downsample and use it for processing
rgb = pyrDown(large)
# apply grayscale
small = cvtColor(rgb, cv2.COLOR_BGR2GRAY)
# morphological gradient
morph_kernel = getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
grad = morphologyEx(small, cv2.MORPH_GRADIENT, morph_kernel)
# binarize
_, bw = threshold(src=grad, thresh=0, maxval=255, type=cv2.THRESH_BINARY+cv2.THRESH_OTSU)
morph_kernel = getStructuringElement(cv2.MORPH_RECT, (9, 1))
# connect horizontally oriented regions
connected = morphologyEx(bw, cv2.MORPH_CLOSE, morph_kernel)
mask = np.zeros(bw.shape, np.uint8)
# find contours
im2, contours, hierarchy = findContours(connected, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
# filter contours
for idx in range(0, len(hierarchy[0])):
    rect = x, y, rect_width, rect_height = boundingRect(contours[idx])
    # fill the contour
    mask = drawContours(mask, contours, idx, (255, 255, 2555), cv2.FILLED)
    # ratio of non-zero pixels in the filled region
    r = float(countNonZero(mask)) / (rect_width * rect_height)
    if r > 0.45 and rect_height > 8 and rect_width > 8:
        rgb = rectangle(rgb, (x, y+rect_height), (x+rect_width, y), (0,255,0),3)

现在显示图像：

from PIL import Image
Image.fromarray(rgb).show()

不是最Python的脚本，但我试图尽可能地类似于原始C ++代码，以使读者可以遵循。

它的工作原理和原始的差不多。我很高兴阅读有关如何对其进行改进/修复以完全类似于原始结果的建议。

— rtkaleta
source

3

感谢您提供python版本。许多人会发现这很有用。+1

— dhanushka

填充轮廓和绘制轮廓有什么区别？我在这里找到了没有填充阶段的代码：stackoverflow.com/a/23556997/6837132

— SarahData

@SarahM我不知道您是否在询问绘图和填充（我认为是显而易见的）之间的通用区别还是专门针对OpenCV API？如果是后者则见文档对于drawContours该状态“的函数绘制轮廓的轮廓图像中，如果厚度> 0或填充<0.如果厚度由轮廓所包围的区域” 完成后，我们可以检查非零像素的比率，以确定该框是否可能包含文本。

— rtkaleta

15

您可以尝试由Chucai Yi和Tian Yingli开发的这种方法。

他们还共享可以使用的软件（基于Opencv-1.0，应在Windows平台上运行。）（尽管没有可用的源代码）。它将在图像中生成所有文本边界框（以彩色阴影显示）。通过将样本图像应用于图像，您将获得以下结果：

注意：为使结果更可靠，可以将相邻的框进一步合并在一起。

更新：如果您的最终目标是识别图像中的文本，则可以进一步检出gttext，它是OCR免费软件和用于带文本的彩色图像的地面处理工具。源代码也可用。

这样，您可以获得诸如以下的公认文本：

— 英雄胡桃
source

gttext用于Windows。对Mac / Linux用户的任何建议

— Saghir A. Khatri

5

上面的代码JAVA版本：谢谢@William

public static List<Rect> detectLetters(Mat img){    
    List<Rect> boundRect=new ArrayList<>();

    Mat img_gray =new Mat(), img_sobel=new Mat(), img_threshold=new Mat(), element=new Mat();
    Imgproc.cvtColor(img, img_gray, Imgproc.COLOR_RGB2GRAY);
    Imgproc.Sobel(img_gray, img_sobel, CvType.CV_8U, 1, 0, 3, 1, 0, Core.BORDER_DEFAULT);
    //at src, Mat dst, double thresh, double maxval, int type
    Imgproc.threshold(img_sobel, img_threshold, 0, 255, 8);
    element=Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(15,5));
    Imgproc.morphologyEx(img_threshold, img_threshold, Imgproc.MORPH_CLOSE, element);
    List<MatOfPoint> contours = new ArrayList<MatOfPoint>();
    Mat hierarchy = new Mat();
    Imgproc.findContours(img_threshold, contours,hierarchy, 0, 1);

    List<MatOfPoint> contours_poly = new ArrayList<MatOfPoint>(contours.size());

     for( int i = 0; i < contours.size(); i++ ){             

         MatOfPoint2f  mMOP2f1=new MatOfPoint2f();
         MatOfPoint2f  mMOP2f2=new MatOfPoint2f();

         contours.get(i).convertTo(mMOP2f1, CvType.CV_32FC2);
         Imgproc.approxPolyDP(mMOP2f1, mMOP2f2, 2, true); 
         mMOP2f2.convertTo(contours.get(i), CvType.CV_32S);


            Rect appRect = Imgproc.boundingRect(contours.get(i));
            if (appRect.width>appRect.height) {
                boundRect.add(appRect);
            }
     }

    return boundRect;
}

并在实践中使用此代码：

        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
        Mat img1=Imgcodecs.imread("abc.png");
        List<Rect> letterBBoxes1=Utils.detectLetters(img1);

        for(int i=0; i< letterBBoxes1.size(); i++)
            Imgproc.rectangle(img1,letterBBoxes1.get(i).br(), letterBBoxes1.get(i).tl(),new Scalar(0,255,0),3,8,0);         
        Imgcodecs.imwrite("abc1.png", img1);

— 法里兹·阿加耶夫（Fariz Agayev）
source

2

@dhanushka解决方案的Python实现：

def process_rgb(rgb):
    hasText = False
    gray = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
    morphKernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
    grad = cv2.morphologyEx(gray, cv2.MORPH_GRADIENT, morphKernel)
    # binarize
    _, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    # connect horizontally oriented regions
    morphKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1))
    connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, morphKernel)
    # find contours
    mask = np.zeros(bw.shape[:2], dtype="uint8")
    _,contours, hierarchy = cv2.findContours(connected, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
    # filter contours
    idx = 0
    while idx >= 0:
        x,y,w,h = cv2.boundingRect(contours[idx])
        # fill the contour
        cv2.drawContours(mask, contours, idx, (255, 255, 255), cv2.FILLED)
        # ratio of non-zero pixels in the filled region
        r = cv2.contourArea(contours[idx])/(w*h)
        if(r > 0.45 and h > 5 and w > 5 and w > h):
            cv2.rectangle(rgb, (x,y), (x+w,y+h), (0, 255, 0), 2)
            hasText = True
        idx = hierarchy[0][idx][0]
    return hasText, rgb

— 阿卡什（Akash Budhia）
source

为什么要用口罩？

— SarahData'9

1

重复的答案。如果您对stackoverflow.com/a/43283990/6809909上的对话做出了贡献，它会更有用。

— rtkaleta

2

这是dhanushka使用OpenCVSharp的答案的C＃版本

        Mat large = new Mat(INPUT_FILE);
        Mat rgb = new Mat(), small = new Mat(), grad = new Mat(), bw = new Mat(), connected = new Mat();

        // downsample and use it for processing
        Cv2.PyrDown(large, rgb);
        Cv2.CvtColor(rgb, small, ColorConversionCodes.BGR2GRAY);

        // morphological gradient
        var morphKernel = Cv2.GetStructuringElement(MorphShapes.Ellipse, new OpenCvSharp.Size(3, 3));
        Cv2.MorphologyEx(small, grad, MorphTypes.Gradient, morphKernel);

        // binarize
        Cv2.Threshold(grad, bw, 0, 255, ThresholdTypes.Binary | ThresholdTypes.Otsu);

        // connect horizontally oriented regions
        morphKernel = Cv2.GetStructuringElement(MorphShapes.Rect, new OpenCvSharp.Size(9, 1));
        Cv2.MorphologyEx(bw, connected, MorphTypes.Close, morphKernel);

        // find contours
        var mask = new Mat(Mat.Zeros(bw.Size(), MatType.CV_8UC1), Range.All);
        Cv2.FindContours(connected, out OpenCvSharp.Point[][] contours, out HierarchyIndex[] hierarchy, RetrievalModes.CComp, ContourApproximationModes.ApproxSimple, new OpenCvSharp.Point(0, 0));

        // filter contours
        var idx = 0;
        foreach (var hierarchyItem in hierarchy)
        {
            idx = hierarchyItem.Next;
            if (idx < 0)
                break;
            OpenCvSharp.Rect rect = Cv2.BoundingRect(contours[idx]);
            var maskROI = new Mat(mask, rect);
            maskROI.SetTo(new Scalar(0, 0, 0));

            // fill the contour
            Cv2.DrawContours(mask, contours, idx, Scalar.White, -1);

            // ratio of non-zero pixels in the filled region
            double r = (double)Cv2.CountNonZero(maskROI) / (rect.Width * rect.Height);
            if (r > .45 /* assume at least 45% of the area is filled if it contains text */
                 &&
            (rect.Height > 8 && rect.Width > 8) /* constraints on region size */
            /* these two conditions alone are not very robust. better to use something 
            like the number of significant peaks in a horizontal projection as a third condition */
            )
            {
                Cv2.Rectangle(rgb, rect, new Scalar(0, 255, 0), 2);
            }
        }

        rgb.SaveImage(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "rgb.jpg"));

— DiomedesDomínguez
source

0

这是dhanushka使用EmguCV的答案的VB.NET版本。

与带有OpenCVSharp的C＃版本相比，EmguCV中的一些功能和结构需要不同的考虑

Imports Emgu.CV
Imports Emgu.CV.Structure
Imports Emgu.CV.CvEnum
Imports Emgu.CV.Util

        Dim input_file As String = "C:\your_input_image.png"
        Dim large As Mat = New Mat(input_file)
        Dim rgb As New Mat
        Dim small As New Mat
        Dim grad As New Mat
        Dim bw As New Mat
        Dim connected As New Mat
        Dim morphanchor As New Point(0, 0)

        '//downsample and use it for processing
        CvInvoke.PyrDown(large, rgb)
        CvInvoke.CvtColor(rgb, small, ColorConversion.Bgr2Gray)

        '//morphological gradient
        Dim morphKernel As Mat = CvInvoke.GetStructuringElement(ElementShape.Ellipse, New Size(3, 3), morphanchor)
        CvInvoke.MorphologyEx(small, grad, MorphOp.Gradient, morphKernel, New Point(0, 0), 1, BorderType.Isolated, New MCvScalar(0))

        '// binarize
        CvInvoke.Threshold(grad, bw, 0, 255, ThresholdType.Binary Or ThresholdType.Otsu)

        '// connect horizontally oriented regions
        morphKernel = CvInvoke.GetStructuringElement(ElementShape.Rectangle, New Size(9, 1), morphanchor)
        CvInvoke.MorphologyEx(bw, connected, MorphOp.Close, morphKernel, morphanchor, 1, BorderType.Isolated, New MCvScalar(0))

        '// find contours
        Dim mask As Mat = Mat.Zeros(bw.Size.Height, bw.Size.Width, DepthType.Cv8U, 1)  '' MatType.CV_8UC1
        Dim contours As New VectorOfVectorOfPoint
        Dim hierarchy As New Mat

        CvInvoke.FindContours(connected, contours, hierarchy, RetrType.Ccomp, ChainApproxMethod.ChainApproxSimple, Nothing)

        '// filter contours
        Dim idx As Integer
        Dim rect As Rectangle
        Dim maskROI As Mat
        Dim r As Double
        For Each hierarchyItem In hierarchy.GetData
            rect = CvInvoke.BoundingRectangle(contours(idx))
            maskROI = New Mat(mask, rect)
            maskROI.SetTo(New MCvScalar(0, 0, 0))

            '// fill the contour
            CvInvoke.DrawContours(mask, contours, idx, New MCvScalar(255), -1)

            '// ratio of non-zero pixels in the filled region
            r = CvInvoke.CountNonZero(maskROI) / (rect.Width * rect.Height)

            '/* assume at least 45% of the area Is filled if it contains text */
            '/* constraints on region size */
            '/* these two conditions alone are Not very robust. better to use something 
            'Like the number of significant peaks in a horizontal projection as a third condition */
            If r > 0.45 AndAlso rect.Height > 8 AndAlso rect.Width > 8 Then
                'draw green rectangle
                CvInvoke.Rectangle(rgb, rect, New MCvScalar(0, 255, 0), 2)
            End If
            idx += 1
        Next
        rgb.Save(IO.Path.Combine(Application.StartupPath, "rgb.jpg"))

— 哈坎·乌萨克利（Hakan Usakli）
source