import urllib.request import re import os import urllib import json # 制造新的url def makeNewUrl(url): url_1 = re.findall('com/(.*)/m',url) url_2 = re.findall('/m/(.*)null',url) if len(url_1) == 0 or len(url_2) == 0: return False new_url = "http://r.photo.store.qq.com/" new_url += url_1[0] new_url += "/r/" new_url += url_2[0] return new_url def fun(blocknum,blocksize,totalsize): """ blocknum:当前的块编号 blocksize:每次传输的块大小 totalsize:网页文件总大小 """ percent = blocknum*blocksize/totalsize if percent > 1.0: percent = 1.0 percent = percent*100 print("download : %.2f%%" %(percent)) # 读取json文件 fp = open('./imgs1.json') data = json.load(fp) # 初始化计数 index = 1 for img_url in data: print('第%s个开始'%(index)) if index > 10: print("长度终止") break new_url = makeNewUrl(img_url) if not new_url: print("地址提取失败") continue print('开始请求') urllib.request.urlretrieve(new_url, '{0}{1}.jpg'.format('D:\\Code\\imgCrawler\\mapDepot1\\', index), fun) index +=1