python sklearn 垂直分割投影法分割svm训练文字识别

流程：颜色反转（白色字体需要）–>灰度–>二值化–>去干扰线及噪点–>切割成单个字符–>标注–>识别学习并得到模型–>使用模型识别

|--image_after_split                        // 切割后的图片
|--image_raw                                // 原图片
|--model_data                               // 训练后的模型存放
|  `--letter.pkl
|--test_img                                 // 验证图片
|--train_img                                // 用于训练的图片存放
|  |--capital                               // 大写字母
|  |--lowercase                             // 小写字母
|  |--num                                   // 数字
|─utils
|  `--deal_image.py                         // 图片处理程序                      
|-- requirements.txt                        // 项目依赖
|-- split_image.py                          // 图片预处理及切割
|-- test_model.py                           // 测试模型的识别效果
|-- train.py                                // 开始训练

1.1图片分割

分割的图片都保存image_raw，根据垂直分割投影法分割自动拆分到image_after_split文件夹

通过颜色反转，将白色字体变为黑色字体。
将图像转换为灰度图。
对灰度图进行二值化处理，将图像变为黑白。
使用噪点移除函数去除图像中的孤立噪点。
使用垂直分割投影法对图像进行分割，将单个字符分割成单独的图像。

verification_code_txsp/split_image.py


import os
import cv2
from matplotlib import pyplot as plt, cm
import numpy as np
from utils.deal_image import noise_remove_cv2, cut_vertical

"""图片的预处理及切割"""

# 显示灰度图像
def show_gray_img(img):
    plt.imshow(img, cmap=cm.gray)
    plt.show()

if __name__ == '__main__':
    # 获取当前脚本的路径作为根路径
    source_root = os.path.dirname(os.path.abspath(__file__))
    
    # 定义原始图像文件夹路径和切割后图像文件夹路径
    image_path = os.path.join(source_root, "image_raw")
    new_image_path = os.path.join(source_root, "image_after_split")
    
    # 获取原始图像文件夹中的所有文件列表
    files = os.listdir(image_path)
   
    # 遍历每个文件
    for key, file in enumerate(files):
        # 构建文件的完整路径
        path = os.path.join(image_path, file)
        
        # 使用 OpenCV 读取图像文件
        img = cv2.imdecode(np.fromfile(path, dtype=np.uint8), 1)

        # 如果图像读取失败，输出错误信息并继续下一个文件
        if img is None:
            print(path, "Failed to read image.")
            continue

        # 转换为灰度图像
        im_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        # 对灰度图像进行二值化处理
        ret, im_inv = cv2.threshold(im_gray, 140, 255, 0)
        # 去除孤立点和噪点
        img_clear = noise_remove_cv2(im_inv, 1)
        
        # 显示预处理后的图像
        # show_gray_img(img_clear)
        
        # 使用垂直分割投影法对图像进行切割
        img_list = cut_vertical(img_clear)
        
        t = 1
        # 遍历切割后的字符图像列表
        for i in img_list:
            # 重新定义图像大小
            resize_img = cv2.resize(i, (15, 30))
            
            # 将切割后的图像保存到新的文件夹
            output_file = os.path.join(new_image_path, f"{file.split('.')[0]}_{t}.jpg")
            cv2.imwrite(output_file, resize_img)
            
            t += 1

\verification_code_txsp\utils\deal_image.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np


# 去除噪点
def noise_remove_cv2(image, k):
    """
    去除图片的噪点
    :param image:
    :param k:
    :return:
    """

    def calculate_noise_count(img_obj, w, h):
        """
        计算邻域非白色的个数
        """
        count = 0
        width, height = img_obj.shape
        for _w_ in [w - 1, w, w + 1]:
            for _h_ in [h - 1, h, h + 1]:
                if _w_ > width - 1:
                    continue
                if _h_ > height - 1:
                    continue
                if _w_ == w and _h_ == h:
                    continue
                if img_obj[_w_, _h_] < 230:  # 二值化的图片设置为255
                    count += 1
        return count

    w, h = image.shape
    for _w in range(w):
        for _h in range(h):
            if _w == 0 or _h == 0:
                image[_w, _h] = 255
                continue
            # 计算邻域pixel值小于255的个数
            pixel = image[_w, _h]
            if pixel == 255:
                continue

            if calculate_noise_count(image, _w, _h) < k:
                image[_w, _h] = 255
    return image


def count_number(num_list, num):
    """
    统计一维数组中某个数字的个数
    :param num_list:
    :param num:
    :return: num的数量
    """
    t = 0
    # print('num=%s,num_list=%s' %(num, num_list))
    for i in num_list:
        if i == num:
            t += 1
    return t


# 切割图片
def cut_vertical(img_list, c_value=255):
    """
    投影法竖直切割图片的数组
    :param img_list: 传入的数据为一个由（二维）图片构成的数组，不是单纯的图片
    :param c_value: 切割的值 c_value
    :return: 切割之后的图片的数组
    """
    # 如果传入的是一个普通的二值化的图片，则需要首先将这个二值化的图片升维为图片的数组
    if len(np.array(img_list).shape) == 2:
        img_list = img_list[None]
    r_list = []
    for img_i in img_list:
        end = 0
        # print(str(len(img_i.T)) + '\n\n' + "==" * 100)

        for i in range(len(img_i.T)):
            # print(count_number(img_i.T[i], c_value), img_i.shape[1] / 4)
            if count_number(img_i.T[i], c_value) >= img_i.shape[0]:
                # print(count_number(img_i.T[i], c_value), img_i.shape[0])
                star = end
                end = i
                if end - star > 1:
                    r_list.append(img_i[:, star:end])
    return r_list

1.2.模型训练

因为标注数据里将标注数据作为了文件夹的名称，所以文件夹内的文件作为训练数据，文件夹名作为标签，这里使用sklearn.svm 支持向量机的算法，来对数据进行训练。（关于svm的讲解，可以看看知乎大神的理解https://www.zhihu.com/question/21094489 ），通过fit进行训练后，将训练的结果保存到pkl文件里，其实里面都是0和1的特征值。训练结果保存到F:\svm\verification_code_txsp\model_data文件下

遍历训练图像文件夹，将图像预处理为28x28像素的灰度图。
将处理后的图像展平为一维数组，作为训练数据。
使用支持向量机（SVM）算法进行训练，将模型保存为文件。

verification_code_txsp\train.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import cv2
import joblib
from sklearn.svm import SVC


train_set_x = []
train_set_y = []

path = "./train_img"

# 遍历文件夹 获取下面的目录
for category in os.listdir(path):  # listdir的参数是文件夹的路径
    for dir_name in os.listdir(os.path.join(path, category)):
        for file_name in os.listdir(os.path.join(path, category, dir_name)):
            img1 = cv2.imread(os.path.join(path, category, dir_name, file_name), cv2.IMREAD_GRAYSCALE)
            res1 = cv2.resize(img1, (28, 28))
            res1_1 = res1.reshape(784)  # 将表示图片的二维矩阵转换成一维
            res1_1_1 = res1_1.tolist()  # 将numpy.narray类型的矩阵转换成list
            train_set_x.append(res1_1_1)  # 将list添加到已有的list中
            train_set_y.append(dir_name)

letterSVM = SVC(kernel="linear", C=1).fit(train_set_x, train_set_y)
# 生成训练结果
joblib.dump(letterSVM, './model_data/letter.pkl')

1.3.使用训练好的模型进行识别

读取待识别的验证码图像。
进行颜色反转、灰度、二值化等预处理步骤。
使用之前训练好的模型进行识别，对每个切割后的字符进行分类。
将识别的字符拼接为验证码字符串，并打印出来。

\verification_code_txsp\test_model.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import cv2
import joblib
from matplotlib import pyplot as plt, cm
from split_image import noise_remove_cv2, cut_vertical
import numpy as np

def ocr_img(file_name):
    captcha = []
    clf = joblib.load('model_data/letter.pkl')
    img = cv2.imread(file_name)

    # 颜色反转,白色字体需要,黑色字体慎用
    height,width, td=img.shape
    dst=np.zeros((height,width,1),np.uint8)
    for i in range(height):
        for j in range(width):
            img[i,j]=255-img[i,j]

    # 转换为灰度图
    im_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # 二值化处理
    ret, im_inv = cv2.threshold(im_gray, 80, 255, 0)
    # 去除孤立点，噪点
    img_clear = noise_remove_cv2(im_inv, 1)
    # 垂直分割投影法分割图片
    img_list = cut_vertical(img_clear)
    for i in img_list:
        res1 = cv2.resize(i, (28, 28))
        data = res1.reshape(784)
        data = data.reshape(1, -1)
        one_letter = clf.predict(data)[0]
        captcha.append(one_letter)
    captcha = [str(i) for i in captcha]
    print("the captcha is :{}".format("".join(captcha)))
    plt.imshow(img, cmap=cm.gray)
    plt.show()


if __name__ == '__main__':
    ocr_img(r"F:\cnn\verification_code\image_raw\192.168.31.41.png")

参考https://blog.csdn.net/weixin_41798704/article/details/108382491

https://gitee.com/yang_yongsheng/

1.4 扩展：黑色像素转白色

import numpy as np

# 黑色像素范围设定
    black_pixels = np.where(
(img[:, :, 0] <= 100) & 
(img[:, :, 1] <= 100) & 
(img[:, :, 2] <= 100)
)
    # 转白色
    img[black_pixels] = [255, 255, 255]

2.水平垂直分割图片

#-*- coding: UTF-8 -*-     
import cv2
import numpy as np 
from matplotlib import pyplot as plt 
from PIL import Image,ImageDraw
import numpy as np
def first():
    """
    根据图片确定A4之的边缘，并裁切出A4纸部分
    
    """
    np.set_printoptions(threshold=np.inf)
    img=cv2.imread('test.jpg')
    gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    ret, binary = cv2.threshold(gray,127,255,cv2.THRESH_BINARY)
    contours, hierarchy = cv2.findContours(binary,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
    print (len(contours))
    for i in range(0,len(contours)):
        x, y, w, h = cv2.boundingRect(contours[i])
        #cv2.rectangle(img, (x,y), (x+w,y+h), (153,153,0), 5)
    #cv2.imshow("img", img)
    #cv2.waitKey(0)
    #cv2.destroyWindow("img")
    newimage=img[y+5:y+h-5,x+40:x+w-30]
    cv2.imwrite( "output.jpg",newimage)

def shuiping(image_data):
    """首先传递参数，读取的灰度图片；
    随后，将图片二值化；接着进行水平投影；
    利用水平投影的最后位置，确认出切割图片的坐标。
    """
    image=image_data
    limit = 130
    table = []

    #二值化，阈值200以下全部变为为0，其余变为255
    for i in range(256):
        if i < limit:
            table.append(0)
        else:
            table.append(1)
    image= image.point(table, '1')
    #image.show()
    
    image= np.array(image)
    (h,w)=image.shape 
    a = [0 for z in range(0, h)]

    #水平投影，数组里的数字对应水平位置黑色区域像素点长度
    num =0 
    for j in range(0,h):  
        for i in range(0,w):
            if  image[j,i]==0:
                num+=1
                a[j]+=1 
                image[j,i]=255
    for j  in range(0,h):  
        for i in range(0,a[j]):   
            image[j,i]=0

    image=Image.fromarray(image)
    image.show()

    #水平投影最后会有结束点，结束点最后一个位置是黑色的，
    #对应数组不为0，白色位置对应数组位置全部为0
    x1=0
    y1=0
    y2=0
    x2=image.width
    list_pic=[]
    for i in range(len(a)):
        if a[i-1]==0 or a[i]!=0:
            y2+=1
        elif a[i-1]!=0 and a[i]==0:
            y2+=1
            list_pic.append([x1,y1,x2,y1+y2])
            y1=y1+y2
            y2=0 




    num=1
    pic_data=[]
    for i in list_pic:
        croping = image_data.crop(i)
        croping.save( "shuiping/"+str(num) + '.jpg')
        num+=1
        pic_data.append(croping)
            
    
    print("分割完成，已保存！")
    return pic_data

def chuizhi(image_data):
    """首先传递参数，读取的灰度图片；
    随后，将图片二值化；接着进行垂直投影；
    利用垂直投影的间断特点，确认出切割图片的坐标。
    """
    num=1
    count=1
    for pic in image_data:
        image=pic
        limit = 200
        table = []

        #二值化，阈值200以下全部变为为0，其余变为255
        for i in range(256):
            if i < limit:
                table.append(0)
            else:
                table.append(1)
        image= image.point(table, '1')
        image= np.array(image)
        
        (h,w)=image.shape
        print (h,w)

        #垂直投影，数组里的数字对应垂直位置黑色区域像素点长度
        b = [0 for z in range(0, w)] 
        for i in range(0,w):
            for j in range(0,h):
                if  image[j,i]==0:
                    b[i]+=1 
                    image[j,i]=255
        for i  in range(0,w):  
            for j in range(0,b[i]):   
                image[j,i]=0
        image=Image.fromarray(image)
        #image.show()
        x1=0
        x2=0
        y1=h
        y2=0
        list=[]

        #垂直投影最后会有结束点，结束点最后一个位置是黑色的，
        #对应数组不为0，白色位置对应数组位置全部为0
        for i in range(len(b)):
            if b[i-1]==0 or b[i]!=0:
                y2+=1
            elif b[i-1]!=0 and b[i]==0:
                y2+=1
                list.append([x1,x2,(x1+y2),y1-1])
                x1=x1+y2
                y2=0 
        
        for i in list:
            croping = pic.crop(i)
            croping.save( "chuizhi/"+str(num) + '.jpg')
            num+=1

        for i in list:
            draw=ImageDraw.Draw(pic)
            draw.rectangle(i)
          

        #pic.show()
        pic.save("rec"+ str(count)+".jpg")
        count+=1

    print("分割完成，已保存！")


def main():
    #first()
    img_path=r"G:\hv\scree2022-03-27 105946.png"
    image_data=(Image.open(img_path).convert('L'))
    #image_data.show()
    pic=shuiping(image_data)
    chuizhi(pic)
main()

https://www.pythonheidong.com/blog/article/635486/2e268e976e8a319c9cc1/

https://gitee.com/yang_yongsheng/image_raw.git

python sklearn 垂直分割投影法分割svm训练文字识别

评论内容 *

评论列表 (0 条评论)

文章目录