各位用户为了找寻关于Python3中使用urllib的方法详解(header,代理,超时,认证,异常处理)的资料费劲了很多周折。这里教程网为您整理了关于Python3中使用urllib的方法详解(header,代理,超时,认证,异常处理)的相关资料,仅供查阅,以下为您介绍关于Python3中使用urllib的方法详解(header,代理,超时,认证,异常处理)的详细内容
我们可以利用urllib来抓取远程的数据进行保存哦,以下是python3 抓取网页资源的多种方法,有需要的可以参考借鉴。
1、最简单
? 1 2 3import
urllib.request
response
=
urllib.request.urlopen(
'http://python.org/'
)
html
=
response.read()
2、使用 Request
? 1 2 3 4import
urllib.request
req
=
urllib.request.Request(
'http://python.org/'
)
response
=
urllib.request.urlopen(req)
the_page
=
response.read()
3、发送数据
? 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16#! /usr/bin/env python3
import
urllib.parse
import
urllib.request
url
=
'http://localhost/login.php'
user_agent
=
'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
values
=
{
'act'
:
'login'
,
'login[email]'
:
'yzhang@i9i8.com'
,
'login[password]'
:
'123456'
}
data
=
urllib.parse.urlencode(values)
req
=
urllib.request.Request(url, data)
req.add_header(
'Referer'
,
'http://www.python.org/'
)
response
=
urllib.request.urlopen(req)
the_page
=
response.read()
print
(the_page.decode(
"utf8"
))
4、发送数据和header
? 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16#! /usr/bin/env python3
import
urllib.parse
import
urllib.request
url
=
'http://localhost/login.php'
user_agent
=
'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
values
=
{
'act'
:
'login'
,
'login[email]'
:
'yzhang@i9i8.com'
,
'login[password]'
:
'123456'
}
headers
=
{
'User-Agent'
: user_agent }
data
=
urllib.parse.urlencode(values)
req
=
urllib.request.Request(url, data, headers)
response
=
urllib.request.urlopen(req)
the_page
=
response.read()
print
(the_page.decode(
"utf8"
))
5、http 错误
? 1 2 3 4 5 6 7 8#! /usr/bin/env python3
import
urllib.request
req
=
urllib.request.Request(
'http://www.zzvips.com '
)
try
:
urllib.request.urlopen(req)
except
urllib.error.HTTPError as e:
print
(e.code)
print
(e.read().decode(
"utf8"
))
6、异常处理1
? 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15#! /usr/bin/env python3
from
urllib.request
import
Request, urlopen
from
urllib.error
import
URLError, HTTPError
req
=
Request(
"http://www.zzvips.com /"
)
try
:
response
=
urlopen(req)
except
HTTPError as e:
print
(
'The server couldn'
t fulfill the request.')
print
(
'Error code: '
, e.code)
except
URLError as e:
print
(
'We failed to reach a server.'
)
print
(
'Reason: '
, e.reason)
else
:
print
(
"good!"
)
print
(response.read().decode(
"utf8"
))
7、异常处理2
? 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16#! /usr/bin/env python3
from
urllib.request
import
Request, urlopen
from
urllib.error
import
URLError
req
=
Request(
"http://www.zzvips.com /"
)
try
:
response
=
urlopen(req)
except
URLError as e:
if
hasattr
(e,
'reason'
):
print
(
'We failed to reach a server.'
)
print
(
'Reason: '
, e.reason)
elif
hasattr
(e,
'code'
):
print
(
'The server couldn'
t fulfill the request.')
print
(
'Error code: '
, e.code)
else
:
print
(
"good!"
)
print
(response.read().decode(
"utf8"
))
8、HTTP 认证
? 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20#! /usr/bin/env python3
import
urllib.request
# create a password manager
password_mgr
=
urllib.request.HTTPPasswordMgrWithDefaultRealm()
# Add the username and password.
# If we knew the realm, we could use it instead of None.
top_level_url
=
"http://www.zzvips.com /"
password_mgr.add_password(
None
, top_level_url,
'rekfan'
,
'xxxxxx'
)
handler
=
urllib.request.HTTPBasicAuthHandler(password_mgr)
# create "opener" (OpenerDirector instance)
opener
=
urllib.request.build_opener(handler)
# use the opener to fetch a URL
a_url
=
"http://www.zzvips.com /"
x
=
opener.
open
(a_url)
print
(x.read())
# Install the opener.
# Now all calls to urllib.request.urlopen use our opener.
urllib.request.install_opener(opener)
a
=
urllib.request.urlopen(a_url).read().decode(
'utf8'
)
print
(a)
9、使用代理
? 1 2 3 4 5 6 7 8#! /usr/bin/env python3
import
urllib.request
proxy_support
=
urllib.request.ProxyHandler({
'sock5'
:
'localhost:1080'
})
opener
=
urllib.request.build_opener(proxy_support)
urllib.request.install_opener(opener)
a
=
urllib.request.urlopen(
"http://www.zzvips.com "
).read().decode(
"utf8"
)
print
(a)
10、超时
? 1 2 3 4 5 6 7 8 9 10 11#! /usr/bin/env python3
import
socket
import
urllib.request
# timeout in seconds
timeout
=
2
socket.setdefaulttimeout(timeout)
# this call to urllib.request.urlopen now uses the default timeout
# we have set in the socket module
req
=
urllib.request.Request(
'http://www.zzvips.com /'
)
a
=
urllib.request.urlopen(req).read()
print
(a)
总结
以上就是这篇文章的全部内容,希望本文的内容对大家学习或使用python能有所帮助,如果有疑问大家可以留言交流。