qqzone_crawler/jsonManage.py
2020-12-02 22:45:37 +08:00

49 lines
1.0 KiB
Python

import json
import re
all_data = []
# 制造新的url
def makeNewUrl(url):
url_1 = re.findall('com/(.*)/m',url)
url_2 = re.findall('/m/(.*)null',url)
if len(url_1) == 0 or len(url_2) == 0:
return False
new_url = "http://r.photo.store.qq.com/"
new_url += url_1[0]
new_url += "/r/"
new_url += url_2[0]
return new_url
# 处理文件
def manageJson(i):
map_url = './imgs'+str(i)+'.json'
row_map_url = './row/rowImgs'+str(i)+'.json'
fp = open(map_url)
data_row = json.load(fp)
fp.close()
data = []
for url in data_row:
if not url:
continue
new_url = makeNewUrl(url)
if not new_url:
continue
data.append(new_url)
fp = open(row_map_url, mode='w')
fp.write(json.dumps(data))
fp.close()
all_data.extend(data)
print('%s个文件处理完成'%(i))
print('%s个文件有效长度为%s'%(i, len(data)))
# 合成全部数据
def composeAllData():
fp = open('./row/allData.json', mode='w')
fp.write(json.dumps(all_data))
fp.close()
for i in range(1,33):
manageJson(i)
composeAllData()