各位用户为了找寻关于python入门教程之识别验证码的资料费劲了很多周折。这里教程网为您整理了关于python入门教程之识别验证码的相关资料,仅供查阅,以下为您介绍关于python入门教程之识别验证码的详细内容
前言
验证码?我也能破解?
关于验证码的介绍就不多说了,各种各样的验证码在人们生活中时不时就会冒出来,身为学生日常接触最多的就是教务处系统的验证码了,比如如下的验证码:
识别办法
模拟登陆有着复杂的步骤,在这里咱们不管其他操作,只负责根据输入的一张验证码图片返回一个答案字符串。
我们知道验证码为了制作干扰,会把图片弄成五颜六色的样子,而我们首先就是要去除这些干扰,这一步就需要不断试验了,增强图片色彩,加大对比度等等都可以产生帮助。
在经过各种对图片的操作之后,终于找到了比较完美的去除干扰方案。可以看到在去除干扰之后,最优情况下,我们将得到一张十分纯净的黑白字符图片。一张图片上有四个字符,没办法一下子就把四个字符全部识别,需要把图片进行裁剪,裁剪成每张小图只有一个字符的样子,再对每张图片分别进行识别。
接下来就是识别文字了,我们首先把得到的小图转换成01表示的矩阵,每个矩阵代表一个字符。
比如数字六的矩阵
? 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22num_6=[
0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,1,1,0,0,0,0,0,0,
0,0,0,0,1,1,1,0,0,0,0,0,0,
0,0,0,1,1,1,0,0,0,0,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,0,0,0,
0,1,1,1,1,1,1,1,0,0,0,0,0,
0,1,1,1,1,1,1,1,1,0,0,0,0,
0,1,1,0,0,0,0,1,1,1,0,0,0,
0,1,1,0,0,0,0,0,1,1,0,0,0,
0,1,1,0,0,0,0,0,1,1,0,0,0,
0,1,1,1,0,0,0,1,1,1,0,0,0,
0,0,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,1,1,1,1,1,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,
]
远远望过去,眯着眼睛还是能分辨出来的。
因为验证码十分规整,每个数字所在的位置都是固定的,所以并不需要涉及什么机器学习的算法,只是简单的进行一下矩阵的比对就可以了,在所有的实现做好的矩阵中找到相似度最高的矩阵就可以了,在这里的比对方法多种多样,反正数据简单能正确识别出来就好。
至此,咱们的验证码识别工作就结束了。
这次进行的验证码识别主要采用python的PIL进行图片操作,模拟登陆自动填写验证码的全部代码请看这里:
示例代码
? 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145# -*- coding: utf-8 -*
import
sys
reload
(sys)
sys.setdefaultencoding(
"utf-8"
)
import
re
import
requests
import
io
import
os
import
json
from
PIL
import
Image
from
PIL
import
ImageEnhance
from
bs4
import
BeautifulSoup
import
mdata
class
Student:
def
__init__(
self
, user,password):
self
.user
=
str
(user)
self
.password
=
str
(password)
self
.s
=
requests.Session()
def
login(
self
):
url
=
"http://202.118.31.197/ACTIONLOGON.APPPROCESS?mode=4"
res
=
self
.s.get(url).text
imageUrl
=
'http://202.118.31.197/'
+
re.findall(
'<img src="(.+?)" width="55"'
,res)[
0
]
im
=
Image.
open
(io.BytesIO(
self
.s.get(imageUrl).content))
enhancer
=
ImageEnhance.Contrast(im)
im
=
enhancer.enhance(
7
)
x,y
=
im.size
for
i
in
range
(y):
for
j
in
range
(x):
if
(im.getpixel((j,i))!
=
(
0
,
0
,
0
)):
im.putpixel((j,i),(
255
,
255
,
255
))
num
=
[
6
,
19
,
32
,
45
]
verifyCode
=
""
for
i
in
range
(
4
):
a
=
im.crop((num[i],
0
,num[i]
+
13
,
20
))
l
=
[]
x,y
=
a.size
for
i
in
range
(y):
for
j
in
range
(x):
if
(a.getpixel((j,i))
=
=
(
0
,
0
,
0
)):
l.append(
1
)
else
:
l.append(
0
)
his
=
0
chrr
=
"";
for
i
in
mdata.data:
r
=
0
;
for
j
in
range
(
260
):
if
(l[j]
=
=
mdata.data[i][j]):
r
+
=
1
if
(r>his):
his
=
r
chrr
=
i
verifyCode
+
=
chrr
# print "辅助输入验证码完毕:",verifyCode
data
=
{
'WebUserNO'
:
str
(
self
.user),
'Password'
:
str
(
self
.password),
'Agnomen'
:verifyCode,
}
url
=
"http://202.118.31.197/ACTIONLOGON.APPPROCESS?mode=4"
t
=
self
.s.post(url,data
=
data).text
if
re.findall(
"images/Logout2"
,t)
=
=
[]:
l
=
'[0,"'
+
re.findall(
'alert((.+?));'
,t)[
1
][
1
][
2
:
-
2
]
+
'"]'
+
" "
+
self
.user
+
" "
+
self
.password
+
"n"
# print l
# return '[0,"'+re.findall('alert((.+?));',t)[1][1][2:-2]+'"]'
return
[
False
,l]
else
:
l
=
'登录成功 '
+
re.findall(
'! (.+?) '
,t)[
0
]
+
" "
+
self
.user
+
" "
+
self
.password
+
"n"
# print l
return
[
True
,l]
def
getInfo(
self
):
imageUrl
=
'http://202.118.31.197/ACTIONDSPUSERPHOTO.APPPROCESS'
data
=
self
.s.get(
'http://202.118.31.197/ACTIONQUERYBASESTUDENTINFO.APPPROCESS?mode=3'
).text
#学籍信息
data
=
BeautifulSoup(data,
"lxml"
)
q
=
data.find_all(
"table"
,attrs
=
{
'align'
:
"left"
})
a
=
[]
for
i
in
q[
0
]:
if
type
(i)
=
=
type
(q[
0
]) :
for
j
in
i :
if
type
(j)
=
=
type
(i):
a.append(j.text)
for
i
in
q[
1
]:
if
type
(i)
=
=
type
(q[
1
]) :
for
j
in
i :
if
type
(j)
=
=
type
(i):
a.append(j.text)
data
=
{}
for
i
in
range
(
1
,
len
(a),
2
):
data[a[i
-
1
]]
=
a[i]
# data['照片'] = io.BytesIO(self.s.get(imageUrl).content)
return
json.dumps(data)
def
getPic(
self
):
imageUrl
=
'http://202.118.31.197/ACTIONDSPUSERPHOTO.APPPROCESS'
pic
=
Image.
open
(io.BytesIO(
self
.s.get(imageUrl).content))
return
pic
def
getScore(
self
):
score
=
self
.s.get(
'http://202.118.31.197/ACTIONQUERYSTUDENTSCORE.APPPROCESS'
).text
#成绩单
score
=
BeautifulSoup(score,
"lxml"
)
q
=
score.find_all(attrs
=
{
'height'
:
"36"
})[
0
]
point
=
q.text
print
point[point.find(
'平均学分绩点'
):]
table
=
score.html.body.table
people
=
table.find_all(attrs
=
{
'height'
:
'36'
})[
0
].string
r
=
table.find_all(
'table'
,attrs
=
{
'align'
:
'left'
})[
0
].find_all(
'tr'
)
subject
=
[]
lesson
=
[]
for
i
in
r[
0
]:
if
type
(r[
0
])
=
=
type
(i):
subject.append(i.string)
for
i
in
r:
k
=
0
temp
=
{}
for
j
in
i:
if
type
(r[
0
])
=
=
type
(j):
temp[subject[k]]
=
j.string
k
+
=
1
lesson.append(temp)
lesson.pop()
lesson.pop(
0
)
return
json.dumps(lesson)
def
logoff(
self
):
return
self
.s.get(
'http://202.118.31.197/ACTIONLOGOUT.APPPROCESS'
).text
if
__name__
=
=
"__main__"
:
a
=
Student(
20150000
,
20150000
)
r
=
a.login()
print
r[
1
]
if
r[
0
]:
r
=
json.loads(a.getScore())
for
i
in
r:
for
j
in
i:
print
i[j],
print
q
=
json.loads(a.getInfo())
for
i
in
q:
print
i,q[i]
a.getPic().show()
a.logoff()
总结
以上就是这篇文章的全部内容了,希望本文的内容对大家的学习或者使用python能带来一定的帮助,如果有疑问大家可以留言交流,谢谢大家对服务器之家的支持。
原文链接:http://www.cnblogs.com/xfangs/p/6500611.html