48 lines
1.1 KiB
Python
48 lines
1.1 KiB
Python
import urllib.request
|
|
import re
|
|
import os
|
|
import urllib
|
|
import json
|
|
|
|
# 制造新的url
|
|
def makeNewUrl(url):
|
|
url_1 = re.findall('com/(.*)/m',url)
|
|
url_2 = re.findall('/m/(.*)null',url)
|
|
if len(url_1) == 0 or len(url_2) == 0:
|
|
return False
|
|
new_url = "http://r.photo.store.qq.com/"
|
|
new_url += url_1[0]
|
|
new_url += "/r/"
|
|
new_url += url_2[0]
|
|
return new_url
|
|
|
|
def fun(blocknum,blocksize,totalsize):
|
|
"""
|
|
blocknum:当前的块编号
|
|
blocksize:每次传输的块大小
|
|
totalsize:网页文件总大小
|
|
"""
|
|
percent = blocknum*blocksize/totalsize
|
|
if percent > 1.0:
|
|
percent = 1.0
|
|
percent = percent*100
|
|
print("download : %.2f%%" %(percent))
|
|
|
|
# 读取json文件
|
|
fp = open('./imgs1.json')
|
|
data = json.load(fp)
|
|
# 初始化计数
|
|
index = 1
|
|
|
|
for img_url in data:
|
|
print('第%s个开始'%(index))
|
|
if index > 10:
|
|
print("长度终止")
|
|
break
|
|
new_url = makeNewUrl(img_url)
|
|
if not new_url:
|
|
print("地址提取失败")
|
|
continue
|
|
print('开始请求')
|
|
urllib.request.urlretrieve(new_url, '{0}{1}.jpg'.format('D:\\Code\\imgCrawler\\mapDepot1\\', index), fun)
|
|
index +=1 |