各位用户为了找寻关于Python实现从订阅源下载图片的方法的资料费劲了很多周折。这里教程网为您整理了关于Python实现从订阅源下载图片的方法的相关资料,仅供查阅,以下为您介绍关于Python实现从订阅源下载图片的方法的详细内容

本文实例讲述了Python实现从订阅源下载图片的方法。分享给大家供大家参考。具体如下:

这段代码是基于python 3.4实现的,和python2.X 比起来有了好多差别啊。 这是一个练习,数据源来自网易订阅。代码如下:

代码如下: __author__ = 'Saint' import os import urllib.request import json from html.parser import HTMLParser # 从获取的网页内容筛选图片的内容 class MyHtmlParser(HTMLParser):     links = []     def handle_starttag(self, tag, attrs):         if tag == "img":             if len(attrs) == 0:                 pass             else:                 for name, value in attrs:                     if name == "src":                         self.links.append(value) class Down(object):     # 总的目录     img_path = "E:/saint"     # 下载目录     dir = ''     # 采集源地址     collect_links = ["http://dy.163.com/v2/media/articlelist/T1374483113516-1", "http://dy.163.com/v2/media/articlelist/T1420776257254-1", "http://dy.163.com/v2/media/articlelist/T1376641060407-1"]     img_links = "http://dy.163.com/v2/article"     def handleCollect(self):         for collect_link in self.collect_links:             notice = "开始从[" + collect_link + "]采集图片"             print(notice)             # 建立下载的目录             dir_name = collect_link.split("/")[-1]             self.isDirExists(dir_name)             dict = self.getListFromSubscribe(collect_link)             if dict == False:                 print("数据采集失败,是否继续(y/n)")                 op = input();                 if op == "y":                     os.system("cls")                     pass                 elif op == "n":                     print("停止采集")                     break                 else:                     os.system("cls")                     print("非法输入")                     break             else:                 for page in dict:                     page_uri = self.img_links + "/" + page["tid"] + "/" + page["docid"]                     self.getImgFromUri(page_uri)                     print("是否继续(y/n)")                     new_op = input();                     if new_op == "n":                         os.system("cls")                         print("采集完毕")                         break         print("OK")     # 从订阅源获取目录     def getListFromSubscribe(self, uri):         res = urllib.request.urlopen(uri)         if res.code < 200 or res.code > 300:             os.system("clear")             return False         else:             result = res.read().decode("gbk") # 3.4版本的read()返回的是byte类型,需要decode()处理,选项是网页编码             dict = json.loads(result)             if dict['code'] != 1:                 print(dict['msg'])                 return False             else:                 return dict['data']     # 获取本期订阅的网页,并从网页中提取出来需要的图片     def getImgFromUri(self, uri):         html_code = urllib.request.urlopen(uri).read().decode("gbk")         hp = MyHtmlParser()         hp.feed(html_code)         hp.close()           for link in hp.links: # hp.links 是图片的下载地址的列表             self.writeToDisk(link)     # 检查文件目录是否存在,如果不存在,则创建目录     def isDirExists(self, dir_name):         self.dir = self.img_path + dir_name         isExists = os.path.exists(self.dir)         if not isExists:             os.makedirs(self.dir)             return True         else:             return True     # 下载文件,并且写入磁盘     def writeToDisk(self, url):         os.chdir(self.dir)         file = urllib.request.urlopen(url).read()         file_name = url.split("/")[-1]         open(file_name, "wb").write(file)         return True if __name__ == "__main__":     down = Down()     down.handleCollect()

 

希望本文所述对大家的Python程序设计有所帮助。