img_crawler/cache.py
2020-12-02 22:31:54 +08:00

48 lines
1.1 KiB
Python

import urllib.request
import re
import os
import urllib
import json
# 制造新的url
def makeNewUrl(url):
url_1 = re.findall('com/(.*)/m',url)
url_2 = re.findall('/m/(.*)null',url)
if len(url_1) == 0 or len(url_2) == 0:
return False
new_url = "http://r.photo.store.qq.com/"
new_url += url_1[0]
new_url += "/r/"
new_url += url_2[0]
return new_url
def fun(blocknum,blocksize,totalsize):
"""
blocknum:当前的块编号
blocksize:每次传输的块大小
totalsize:网页文件总大小
"""
percent = blocknum*blocksize/totalsize
if percent > 1.0:
percent = 1.0
percent = percent*100
print("download : %.2f%%" %(percent))
# 读取json文件
fp = open('./imgs1.json')
data = json.load(fp)
# 初始化计数
index = 1
for img_url in data:
print('%s个开始'%(index))
if index > 10:
print("长度终止")
break
new_url = makeNewUrl(img_url)
if not new_url:
print("地址提取失败")
continue
print('开始请求')
urllib.request.urlretrieve(new_url, '{0}{1}.jpg'.format('D:\\Code\\imgCrawler\\mapDepot1\\', index), fun)
index +=1