admin管理员组

文章数量:1794759

项目

项目

首先,生成训练集,用PIL库生成150张验证码图片

from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
import randomdef getRandomColor():"""获取一个随机颜色(r,g,b)格式的:return:"""c1 = random.randint(0, 255)c2 = random.randint(0, 255)c3 = random.randint(0, 255)if c1 == 255:c1 = 0if c2 == 255:c2 = 0if c3 == 255:c3 = 0return(c1, c2, c3)def getRandomStr():"""获取一个随机数字,每个数字的颜色也是随机的:return:"""random_num = str(random.randint(0, 9))return random_numdef generate_captcha():# 获取一个Image对象,参数分别是RGB模式。宽150,高30, 随机颜色image = Image.new('RGB', (150, 50), (255,255,255))# 获取一个画笔对象,将图片对象传过去draw = ImageDraw.Draw(image)# 获取一个font字体对象参数是ttf的字体文件的目录,以及字体的大小font = ImageFont.truetype("Lohit-Bengali.ttf", size=32)label = ""for i in range(5):random_char = getRandomStr()label += random_char# 在图片上写东西,参数是:定位,字符串,颜色,字体draw.text((10+i*30, 0), random_char, getRandomColor(), font=font)# 噪点噪线width = 150height = 30# 画线for i in range(3):x1 = random.randint(0, width)x2 = random.randint(0, width)y1 = random.randint(0, height)y2 = random.randint(0, height)draw.line((x1, y1, x2, y2), fill=(0, 0, 0))# 画点for i in range(5):draw.point([random.randint(0, width), random.randint(0, height)], fill=getRandomColor())x = random.randint(0, width)y = random.randint(0, height)draw.arc((x, y, x + 4, y + 4), 0, 90, fill=(0, 0, 0))# 保存到硬盘,名为test.png格式为png的图片image.save(open(''.join(['captcha_images/', label, '.png']), 'wb'), 'png')# image.save(open(''.join(['captcha_predict/', label, '.png']), 'wb'), 'png')if __name__ == '__main__':for i in range(150):generate_captcha()

对验证码进行处理,首先将图片的RGB值(0~255,0~255,0~255)转为灰度值,再对灰度值二值化,得到像素点只有黑白(0,1)的图片,对该图片降噪,切分,得到训练集

from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import osdef binarizaion(path):img = Image.open(path)img_gray = img.convert('L')img_gray = np.array(img_gray)w, h = img_gray.shapefor x in range(w):for y in range(h):gray = img_gray[x, y]if gray <= 220:img_gray[x, y] = 0else:img_gray[x, y] = 1plt.figure('')plt.imshow(img_gray, cmap='gray')plt.axis('off')plt.show()return img_graydef noiseReduction(img_gray, label):height, width = img_gray.shapefor x in range(height):for y in range(width):cnt = 0# 白色的点不用管if img_gray[x, y] == 1:continueelse:try:if img_gray[x-1, y-1] == 0:cnt += 1except:passtry:if img_gray[x-1, y] == 0:cnt += 1except:passtry:if img_gray[x-1, y+1] == 0:cnt += 1except:passtry:if img_gray[x, y-1] == 0:cnt += 1except:passtry:if img_gray[x, y+1] == 0:cnt += 1except:passtry:if img_gray[x+1, y-1] == 0:cnt += 1except:passtry:if img_gray[x+1, y] == 0:cnt += 1except:passtry:if img_gray[x+1, y+1] == 0:cnt += 1except:passif cnt < 4:  # 周围少于4点就算是噪点img_gray[x, y] = 1plt.figure('')plt.imshow(img_gray, cmap='gray')plt.axis('off')# plt.show()plt.savefig(''.join(['clean_captcha_img/', label, '.png']))def cutImg(label):labels = list(label)img = Image.open(''.join(['clean_captcha_img/', label, '.png']))for i in range(5):pic = img.crop((100*(1+i), 170, 100*(1+i)+100, 280))plt.imshow(pic)seq = get_save_seq(label[i])pic.save(''.join(['cut_number/', str(label[i]), '/', str(seq), '.png']))def get_save_seq(num):numlist = os.listdir(''.join(['cut_number/', num, '/']))if len(numlist) == 0 or numlist is None:return 0else:max_file = 0for file in numlist:if int(file.split('.')[0]) > max_file:max_file = int(file.split('.')[0])return int(max_file)+1def create_dir():for i in range(10):os.mkdir(''.join(['cut_number/', str(i)]))def img_2_clean():captchas = os.listdir(''.join(['captcha_images/']))for captcha in captchas:label = captcha.split('.')[0]img_path = ''.join(['captcha_images/', captcha])# 二值化im = binarizaion(img_path)# 降噪noiseReduction(im, label)def clean_to_cut():captchas = os.listdir(''.join(['clean_captcha_img/']))for captcha in captchas:label = captcha.split('.')[0]cutImg(label)if __name__ == '__main__':img_2_clean()create_dir()clean_to_cut()lee = os.listdir('captcha_images/')for name in lee:lable = name.split('.')[0]path = 'captcha_images/'+namepic = binarizaion(path)noiseReduction(pic,lable)

本文标签: 项目