From 90c292f35f0a87dcf49e3a6dca371fbcda603e83 Mon Sep 17 00:00:00 2001 From: RainSun Date: Fri, 17 Apr 2020 11:46:18 +0800 Subject: [PATCH] rewrite crawler --- README.md | 11 +++++----- cherry.py | 4 ++-- lib/allFunction.py | 4 ++-- lib/crawler.py | 40 ++++++++++++++--------------------- {test => lib}/crawler_test.py | 2 +- 5 files changed, 27 insertions(+), 34 deletions(-) rename {test => lib}/crawler_test.py (94%) diff --git a/README.md b/README.md index b58d965..2097c8f 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ export FLASK_ENV=development // 启动临时服务 flask run --host=0.0.0.0 -p 8001 // beta -flask run --host=127.0.0.1 -p 5005 +flask run --host=127.0.0.1 -p 6000 // 启动永久服务 gunicorn coc:cherry -c gunicorn.conf.py // 查看已启动服务 @@ -37,10 +37,11 @@ deactivate ## 错误代码一览 /api/login -100:数据校验失败 -101:账户错误 -102:教务挂了 -200:ok +400:数据校验失败 +510:账号或密码错误 +511:请填写手机号 +512:教务挂了 +200:OK /api/game/schedule/upload 400:数据校验失败 diff --git a/cherry.py b/cherry.py index c0d0608..a12643b 100644 --- a/cherry.py +++ b/cherry.py @@ -9,9 +9,9 @@ from flask import Flask, request, session, redirect app = Flask(__name__) # 引入跨域访问处理模块 -# from flask_cors import CORS +from flask_cors import CORS # 取消跨域访问限制,方便本地测试 注册CORS, "/*" 允许访问所有api -# CORS(app, resources=r'/*') +CORS(app, resources=r'/*') # 测试用根路由 @app.route('/api/') diff --git a/lib/allFunction.py b/lib/allFunction.py index 353319a..5fbd8c2 100644 --- a/lib/allFunction.py +++ b/lib/allFunction.py @@ -25,9 +25,9 @@ def manageLogin(request): c.getSchedule() return c.getData() else: - return {'errcode': res[-1], 'errmsg': res[0]} + return res else: - return {'errcode': 100, 'errmsg':'数据校验失败'} + return '数据校验失败', 400 # 处理更新课表游戏排名信息 def manageScheduleUpload(request): diff --git a/lib/crawler.py b/lib/crawler.py index 7b3e91b..f633362 100644 --- a/lib/crawler.py +++ b/lib/crawler.py @@ -16,8 +16,8 @@ class Crawler(object): self.__ip = None self.__student_id = None self.__student_name = None - self.__grade_data = None - self.__schedule_data = None + self.__grade_data = '' + self.__schedule_data = '' # 链接教务 ----------------------------------------------------------------------------- def connection(self): @@ -41,7 +41,7 @@ class Crawler(object): flag = soup.find(name='title') if(flag.text == "手机号设置"): if self.__phone == '': - return ('账号或者密码错误', 401) + return ('请填写手机号', 511) execution = soup.find_all(name='input')[1]['value'] formdata = { 'phone': self.__phone, @@ -57,7 +57,7 @@ class Crawler(object): try: self.__ip = soup.findAll(name='a')[7]['href'][7:].split("-") except: - return ('账号或者密码错误', 401) + return ('账号或者密码错误', 510) r = self.__session.get(url='http://mysso-cust-edu-cn-s.webvpn.cust.edu.cn:8118/cas/login?service=http://' + self.__ip[0] + '.' + self.__ip[1] + '.' + self.__ip[2] + '.' + self.__ip[3] + ':8080/welcome', allow_redirects=False) ticket = r.headers['Location'][68:] @@ -75,7 +75,7 @@ class Crawler(object): return ('ok', 200) except Exception as e: print(e) - return ('教务挂了', 502) + return ('教务挂了', 512) # 获取课表 ----------------------------------------------------------------------------- def getGrade(self): @@ -89,7 +89,7 @@ class Crawler(object): ) data = json.loads(r.content.decode('utf-8')) if data['state'] != 0: - return ('教务挂了', 502) + return ('教务挂了', 512) # 分解数据并重命名 total = data['data']['GradeStatistics'] split = data['data']['GradeList'] @@ -250,7 +250,7 @@ class Crawler(object): ) data = json.loads(r.content.decode('utf-8')) if data['state'] != 0: - return ('教务挂了', 502) + return ('教务挂了', 512) time = ['AM__TimePieces', 'PM__TimePieces', 'EV__TimePieces'] data = data['data']['AdjustDays'] days_per_week = [0] * 23 @@ -326,20 +326,12 @@ class Crawler(object): # 获取信息 ----------------------------------------------------------------------------- def getData(self): - # return ( - # { - # 'student_id': self.__student_id, - # 'student_name': self.__student_name, - # 'grade': self.__grade_data, - # 'schedule': self.__schedule_data - # }, - # 200 - # ) - return { - 'errcode': '200', - 'errmsg': 'ok', - 'student_name': self.__student_name, - 'student_id': self.__student_id, - 'grade': self.__grade_data, - 'schedule': self.__schedule_data - } + return ( + { + 'student_id': self.__student_id, + 'student_name': self.__student_name, + 'grade': self.__grade_data, + 'schedule': self.__schedule_data + }, + 200 + ) diff --git a/test/crawler_test.py b/lib/crawler_test.py similarity index 94% rename from test/crawler_test.py rename to lib/crawler_test.py index e25d486..cdf6c43 100644 --- a/test/crawler_test.py +++ b/lib/crawler_test.py @@ -1,5 +1,5 @@ import unittest -from ..lib.crawler import Crawler +from crawler import Crawler c = Crawler('2017002372', '623910ert&', '15143211127')