From 4306439f5d28bd24cc189344beb2a841d8ec9fc5 Mon Sep 17 00:00:00 2001 From: RainSun Date: Wed, 15 Apr 2020 17:21:54 +0800 Subject: [PATCH] to class --- README.md | 2 + lib/__init__.py | 0 lib/allFunction.py | 40 +-- lib/crawler.py | 587 ++++++++++++++++++++++++------------------- logs/error.log | 204 ++++++++++++++- test/__init__.py | 0 test/crawler_test.py | 25 ++ 7 files changed, 570 insertions(+), 288 deletions(-) create mode 100644 lib/__init__.py create mode 100644 test/__init__.py create mode 100644 test/crawler_test.py diff --git a/README.md b/README.md index 6b9b000..b58d965 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,8 @@ pip install bs4 pip install gunicorn pip install pymongo pip install flask_cors +// 根据依赖文件安装环境 +pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple // 设置全局变量 export FLASK_APP=coc.py export FLASK_ENV=development diff --git a/lib/__init__.py b/lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/allFunction.py b/lib/allFunction.py index b65b87f..353319a 100644 --- a/lib/allFunction.py +++ b/lib/allFunction.py @@ -1,5 +1,5 @@ # coding=utf-8 -from crawler import connection, getGrade, getSchedule +from crawler import Crawler import json from hashlib import md5 from urllib.parse import urlencode, unquote_plus @@ -10,20 +10,22 @@ from db import addRank, getRank # 这里三个接口公用一个session所以合并成一个接口,一个session走到底,一次性返回所有数据 def manageLogin(request): # json化,应该能当dict用 - data_cache = json.loads(request.form['data']) + data = json.loads(request.form['data']) # MD5校验 - checked = checkData(data_cache) - data_cache.pop('sign') + checked = checkData(data) if checked: # 创建会话 - res = connect(data_cache) - if res['errcode'] == 200: - # 登录成功并进行查询 - grade = getGrade(res['ip'], res['s']) - schedule = getSchedule(res['ip'], res['s']) - return {'errcode': '200', 'errmsg': 'ok', 'student_name':res['student_name'], 'student_id':res['student_id'], 'grade': grade, 'schedule': schedule} + phone = '' + if data.get('phone'): + phone = data['phone'] + c = Crawler(data['cid'], data['pwd'], phone) + res = c.connection() + if res[-1] == 200: + c.getGrade() + c.getSchedule() + return c.getData() else: - return res + return {'errcode': res[-1], 'errmsg': res[0]} else: return {'errcode': 100, 'errmsg':'数据校验失败'} @@ -70,19 +72,3 @@ def checkData(data): md.update(d.encode('utf-8')) r = md.hexdigest().upper() return r == data['sign'] - -# 创建会话 -def connect(data): - cid = data['cid'] - pwd = data['pwd'] - # 进行登录 - phone = '' - if data.get('phone'): - phone = data['phone'] - try: - # 这里教务没问题,账户没问题就是200,密码错了就是101 - res = connection(cid,pwd,phone) - return res - except: - # 这了就是教务挂了 - return {'errcode': 102, 'errmsg':'教务挂了'} diff --git a/lib/crawler.py b/lib/crawler.py index 51bd93e..7b3e91b 100644 --- a/lib/crawler.py +++ b/lib/crawler.py @@ -4,275 +4,342 @@ from urllib.parse import quote import base64 from bs4 import BeautifulSoup import random +import sys -def connection(username,password,phone): - s = requests.Session() - # 获取统一身份系统的网页 - r = s.get(url='http://mysso-cust-edu-cn-s.webvpn.cust.edu.cn:8118/cas/login?service=https%3A%2F%2Fwebvpn.cust.edu.cn%2Fauth%2Fcas_validate%3Fentry_id%3D1') - # soup = BeautifulSoup(r.text,'lxml') - soup = BeautifulSoup(r.text,'html.parser') - execution=soup.find_all(name='input')[6]['value'] - formdata={ - 'username':username, - 'password':password, - 'execution':execution, - '_eventId':'submit', - 'geolocation':'' - } - r = s.post(url='http://mysso-cust-edu-cn-s.webvpn.cust.edu.cn:8118/cas/login?service=https%3A%2F%2Fwebvpn.cust.edu.cn%2Fauth%2Fcas_validate%3Fentry_id%3D1',data=formdata) - soup=BeautifulSoup(r.text,'html.parser') - flag = soup.find(name='title') - if(flag.text=="手机号设置"): - execution=soup.find_all(name='input')[1]['value'] - formdata = { - 'phone': phone, - 'execution': execution, - '_eventId': 'submit', - 'submit': '提交' + +class Crawler(object): + def __init__(self, username, password, phone): + self.__username = username + self.__password = password + self.__phone = phone + self.__session = None + self.__ip = None + self.__student_id = None + self.__student_name = None + self.__grade_data = None + self.__schedule_data = None + + # 链接教务 ----------------------------------------------------------------------------- + def connection(self): + try: + self.__session = requests.Session() + # 获取统一身份系统的网页 + r = self.__session.get( + url='http://mysso-cust-edu-cn-s.webvpn.cust.edu.cn:8118/cas/login?service=https%3A%2F%2Fwebvpn.cust.edu.cn%2Fauth%2Fcas_validate%3Fentry_id%3D1') + soup = BeautifulSoup(r.text, 'html.parser') + execution = soup.find_all(name='input')[6]['value'] + formdata = { + 'username': self.__username, + 'password': self.__password, + 'execution': execution, + '_eventId': 'submit', + 'geolocation': '' + } + r = self.__session.post( + url='http://mysso-cust-edu-cn-s.webvpn.cust.edu.cn:8118/cas/login?service=https%3A%2F%2Fwebvpn.cust.edu.cn%2Fauth%2Fcas_validate%3Fentry_id%3D1', data=formdata) + soup = BeautifulSoup(r.text, 'html.parser') + flag = soup.find(name='title') + if(flag.text == "手机号设置"): + if self.__phone == '': + return ('账号或者密码错误', 401) + execution = soup.find_all(name='input')[1]['value'] + formdata = { + 'phone': self.__phone, + 'execution': execution, + '_eventId': 'submit', + 'submit': '提交' + } + r = self.__session.post( + url="http://mysso-cust-edu-cn-s.webvpn.cust.edu.cn:8118/cas/login?service=https%3A%2F%2Fwebvpn.cust.edu.cn%2Fauth%2Fcas_validate%3Fentry_id%3D1", data=formdata) + r = self.__session.get( + url='http://portal-cust-edu-cn-s.webvpn.cust.edu.cn:8118/custp/index') + soup = BeautifulSoup(r.text, 'html.parser') + try: + self.__ip = soup.findAll(name='a')[7]['href'][7:].split("-") + except: + return ('账号或者密码错误', 401) + r = self.__session.get(url='http://mysso-cust-edu-cn-s.webvpn.cust.edu.cn:8118/cas/login?service=http://' + + self.__ip[0] + '.' + self.__ip[1] + '.' + self.__ip[2] + '.' + self.__ip[3] + ':8080/welcome', allow_redirects=False) + ticket = r.headers['Location'][68:] + asp_net_sessionid_param = {'Ticket': ticket, 'Url': 'http://' + + self.__ip[0] + '.' + self.__ip[1] + '.' + self.__ip[2] + '.' + self.__ip[3] + ':8080/welcome'} + asp_net_sessionid_param = base64.b64encode( + quote(json.dumps(asp_net_sessionid_param)).encode('utf-8')).decode('utf-8') + asp_net_sessionid_param = {'param': asp_net_sessionid_param} + headers = {'Content-Type': 'application/json'} + r = self.__session.post(url='http://' + self.__ip[0] + '-' + self.__ip[1] + '-' + self.__ip[2] + '-' + self.__ip[3] + + '-8080-p.webvpn.cust.edu.cn:8118/api/LoginApi/LGSSOLocalLogin?sf_request_type=ajax', data=json.dumps(asp_net_sessionid_param), headers=headers) + data = json.loads(r.content.decode('utf-8')) + self.__student_name = data['data']['StudentDto']['XM'] + self.__student_id = data['data']['StudentDto']['XH'] + return ('ok', 200) + except Exception as e: + print(e) + return ('教务挂了', 502) + + # 获取课表 ----------------------------------------------------------------------------- + def getGrade(self): + headers = {'Content-Type': 'application/json'} + r = self.__session.post( + url='http://' + self.__ip[0] + '-' + self.__ip[1] + '-' + self.__ip[2] + '-' + self.__ip[3] + + '-8080-p.webvpn.cust.edu.cn:8118/api/ClientStudent/QueryService/GradeQueryApi/GetDataByStudent?sf_request_type=ajax', + data=json.dumps({"param": "JTdCJTIyU2hvd0dyYWRlVHlwZSUyMiUzQTElN0Q=", "__permission": {"MenuID": "4443798E-EB6E-4D88-BFBD-BB0A76FF6BD5", + "Operation": 0}, "__log": {"MenuID": "4443798E-EB6E-4D88-BFBD-BB0A76FF6BD5", "Logtype": 6, "Context": "查询"}}), + headers=headers + ) + data = json.loads(r.content.decode('utf-8')) + if data['state'] != 0: + return ('教务挂了', 502) + # 分解数据并重命名 + total = data['data']['GradeStatistics'] + split = data['data']['GradeList'] + # 成绩总览 + total_grade = { + 'total_GPA': total['PJJD'], + 'total_credit': total['SDXF'], + 'total_kill': total['TGMS'], + 'total_dead': total['WTGMS'] } - r = s.post(url="http://mysso-cust-edu-cn-s.webvpn.cust.edu.cn:8118/cas/login?service=https%3A%2F%2Fwebvpn.cust.edu.cn%2Fauth%2Fcas_validate%3Fentry_id%3D1",data=formdata) - r = s.get(url='http://portal-cust-edu-cn-s.webvpn.cust.edu.cn:8118/custp/index') - soup=BeautifulSoup(r.text,'html.parser') - try: - ip = soup.findAll(name='a')[7]['href'][7:].split("-") - except: - return {'errcode': 101, 'errmsg':'账号或者密码错误'} - r = s.get(url='http://mysso-cust-edu-cn-s.webvpn.cust.edu.cn:8118/cas/login?service=http://'+ip[0]+'.'+ip[1]+'.'+ip[2]+'.'+ip[3]+':8080/welcome',allow_redirects=False) - ticket = r.headers['Location'][68:] - asp_net_sessionid_param = {'Ticket':ticket,'Url':'http://'+ip[0]+'.'+ip[1]+'.'+ip[2]+'.'+ip[3]+':8080/welcome'} - asp_net_sessionid_param = base64.b64encode(quote(json.dumps(asp_net_sessionid_param)).encode('utf-8')).decode('utf-8') - asp_net_sessionid_param = {'param':asp_net_sessionid_param} - headers = {'Content-Type': 'application/json'} - r = s.post(url='http://'+ip[0]+'-'+ip[1]+'-'+ip[2]+'-'+ip[3]+'-8080-p.webvpn.cust.edu.cn:8118/api/LoginApi/LGSSOLocalLogin?sf_request_type=ajax',data=json.dumps(asp_net_sessionid_param),headers=headers) - data = json.loads(r.content.decode('utf-8')) - student_name = data['data']['StudentDto']['XM'] - student_id = data['data']['StudentDto']['XH'] - return {'errcode': 200, 'errmsg': 'ok', 'ip': ip, 's': s,'student_name':student_name,'student_id':student_id} - -def getGrade(Ip, S): - headers = {'Content-Type': 'application/json'} - r = S.post( - url='http://'+Ip[0]+'-'+Ip[1]+'-'+Ip[2]+'-'+Ip[3]+'-8080-p.webvpn.cust.edu.cn:8118/api/ClientStudent/QueryService/GradeQueryApi/GetDataByStudent?sf_request_type=ajax', - data=json.dumps({"param":"JTdCJTIyU2hvd0dyYWRlVHlwZSUyMiUzQTElN0Q=","__permission":{"MenuID":"4443798E-EB6E-4D88-BFBD-BB0A76FF6BD5","Operation":0},"__log":{"MenuID":"4443798E-EB6E-4D88-BFBD-BB0A76FF6BD5","Logtype":6,"Context":"查询"}}), - headers=headers - ) - data = json.loads(r.content.decode('utf-8')) - if data['state'] != 0: - return {'errcode': 102, 'errmsg': '教务挂了'} - #分解数据并重命名 - total = data['data']['GradeStatistics'] - split = data['data']['GradeList'] - #成绩总览 - GradeStatistics = { - 'total_GPA': total['PJJD'], - 'total_credit': total['SDXF'], - 'total_kill': total['TGMS'], - 'total_dead': total['WTGMS'] - } - #提取第一和最后一学期 - first_term = split[0]['KSXNXQ'] - last_term = split[len(split)-1]['KSXNXQ'] - #转换成int元组 - first_term = (int(first_term[0:4]),int(first_term[4:5])) - last_term = (int(last_term[0:4]),int(last_term[4:5])) - print(first_term, last_term) - #生成中间学期 - total_term = []; - for i in range(last_term[0],first_term[0] + 1): - for j in range(1,3): - total_term.append(str(i) + str(j)) - if i == first_term[0] and j == first_term[1]: - break - total_term.reverse() - grade_list = [] - #当前学期索引/上学期总通过/上学期总挂科/上学期总学分/上学期总学分绩点/上一课程通过次数 - this_term, last_term_kill, last_term_dead, last_term_credit, last_term_c_x_g, last_lesson_kill = 0, 0, 0, 0, 0, 0 - #上学期课程列表 - last_term_grade_list = [] - #上一课程名称 - last_lesson_name = '' - #flag,将是否通过延后一个循环 - flag = True - #总必修学分 - total_bixiu_credit = 0; - #总必修学分绩点 - total_bixiu_c_x_g = 0; - #遍历课程 - for item in split: - if not item['YXCJ']: - continue - #如果和上一个课程重名 - if item['LessonInfo']['KCMC'] == last_lesson_name: - #判断是否通过 - if item['YXCJ'] >= 60: - #如果通过贡献1通过 - last_lesson_kill += 1 - #贡献总学分绩点 - last_term_c_x_g += item['XF'] * (item['YXCJ'] - 50) / 10 - if item['KCXZ'] == '必修': - total_bixiu_c_x_g += item['XF'] * (item['YXCJ'] - 50) / 10 - total_bixiu_credit += item['XF'] - else:#如果不重名 - if flag:#将else中的判断延后一个循环 - flag = False - else: - #如果上一课程通过 - if last_lesson_kill > 0: - #贡献上学期通过数 - last_term_kill += 1 - else: - #贡献上学期挂科数 - last_term_dead += 1 - last_lesson_kill = 0 - #更新上一课程名称 - last_lesson_name = item['LessonInfo']['KCMC'] - #如果不是当前学期 - if item['KSXNXQ'] != total_term[this_term]: - #成绩列表添加上学期数据 - grade_list.append({ - 'term_time': total_term[this_term], - 'term_GPA': last_term_c_x_g / last_term_credit, - 'term_kill': last_term_kill, - 'term_dead': last_term_dead, - 'term_credit': last_term_credit, - 'term_grade': last_term_grade_list - }) - #当前学期索引+1 - while item['KSXNXQ'] != total_term[this_term]: - this_term += 1 - #初始化所有值 - last_term_kill, last_term_dead, last_term_credit = 0, 0, item['XF'] - last_term_grade_list = [] - #如果通过 + # 提取第一和最后一学期 + first_term = split[0]['KSXNXQ'] + last_term = split[len(split)-1]['KSXNXQ'] + # 转换成int元组 + first_term = (int(first_term[0:4]), int(first_term[4:5])) + last_term = (int(last_term[0:4]), int(last_term[4:5])) + # 生成中间学期 + total_term = [] + for i in range(last_term[0], first_term[0] + 1): + for j in range(1, 3): + total_term.append(str(i) + str(j)) + if i == first_term[0] and j == first_term[1]: + break + total_term.reverse() + grade_list = [] + # 当前学期索引/上学期总通过/上学期总挂科/上学期总学分/上学期总学分绩点/上一课程通过次数 + this_term, last_term_kill, last_term_dead, last_term_credit, last_term_c_x_g, last_lesson_kill = 0, 0, 0, 0, 0, 0 + # 上学期课程列表 + last_term_grade_list = [] + # 上一课程名称 + last_lesson_name = '' + # flag,将是否通过延后一个循环 + flag = True + # 总必修学分 + total_bixiu_credit = 0 + # 总必修学分绩点 + total_bixiu_c_x_g = 0 + # 遍历课程 + for item in split: + if not item['YXCJ']: + continue + # 如果和上一个课程重名 + if item['LessonInfo']['KCMC'] == last_lesson_name: + # 判断是否通过 if item['YXCJ'] >= 60: - #贡献总学分绩点 - last_term_c_x_g = item['XF'] * (item['YXCJ'] - 50) / 10 - #贡献通过次数 + # 如果通过贡献1通过 last_lesson_kill += 1 - if item['KCXZ'] == '必修': - total_bixiu_c_x_g += item['XF'] * (item['YXCJ'] - 50) / 10 - total_bixiu_credit += item['XF'] - else: - last_term_c_x_g = 0 - else:#如果是当前学期 - #贡献总学分 - last_term_credit += item['XF'] - #如果通过 - if item['YXCJ'] >= 60: - #贡献通过数 - last_lesson_kill += 1 - #贡献学分绩点 + # 贡献总学分绩点 last_term_c_x_g += item['XF'] * (item['YXCJ'] - 50) / 10 if item['KCXZ'] == '必修': - total_bixiu_c_x_g += item['XF'] * (item['YXCJ'] - 50) / 10 + total_bixiu_c_x_g += item['XF'] * \ + (item['YXCJ'] - 50) / 10 total_bixiu_credit += item['XF'] - #加入学期成绩列表 - last_term_grade_list.append({ - 'title': item['LessonInfo']['KCMC'], - 'credit': item['XF'], - 'grade': item['ShowYXCJ'], - 'kill': 'yes' if (item['YXCJ'] >= 60) else 'no', - 'class': item['KSXZ'] + else: # 如果不重名 + if flag: # 将else中的判断延后一个循环 + flag = False + else: + # 如果上一课程通过 + if last_lesson_kill > 0: + # 贡献上学期通过数 + last_term_kill += 1 + else: + # 贡献上学期挂科数 + last_term_dead += 1 + last_lesson_kill = 0 + # 更新上一课程名称 + last_lesson_name = item['LessonInfo']['KCMC'] + # 如果不是当前学期 + if item['KSXNXQ'] != total_term[this_term]: + # 成绩列表添加上学期数据 + grade_list.append({ + 'term_time': total_term[this_term], + 'term_GPA': last_term_c_x_g / last_term_credit, + 'term_kill': last_term_kill, + 'term_dead': last_term_dead, + 'term_credit': last_term_credit, + 'term_grade': last_term_grade_list + }) + # 当前学期索引+1 + while item['KSXNXQ'] != total_term[this_term]: + this_term += 1 + # 初始化所有值 + last_term_kill, last_term_dead, last_term_credit = 0, 0, item['XF'] + last_term_grade_list = [] + # 如果通过 + if item['YXCJ'] >= 60: + # 贡献总学分绩点 + last_term_c_x_g = item['XF'] * (item['YXCJ'] - 50) / 10 + # 贡献通过次数 + last_lesson_kill += 1 + if item['KCXZ'] == '必修': + total_bixiu_c_x_g += item['XF'] * \ + (item['YXCJ'] - 50) / 10 + total_bixiu_credit += item['XF'] + else: + last_term_c_x_g = 0 + else: # 如果是当前学期 + # 贡献总学分 + last_term_credit += item['XF'] + # 如果通过 + if item['YXCJ'] >= 60: + # 贡献通过数 + last_lesson_kill += 1 + # 贡献学分绩点 + last_term_c_x_g += item['XF'] * \ + (item['YXCJ'] - 50) / 10 + if item['KCXZ'] == '必修': + total_bixiu_c_x_g += item['XF'] * \ + (item['YXCJ'] - 50) / 10 + total_bixiu_credit += item['XF'] + # 加入学期成绩列表 + last_term_grade_list.append({ + 'title': item['LessonInfo']['KCMC'], + 'credit': item['XF'], + 'grade': item['ShowYXCJ'], + 'kill': 'yes' if (item['YXCJ'] >= 60) else 'no', + 'class': item['KSXZ'] + }) + # 补充最后一次遍历的数据 + if last_lesson_kill > 0: + last_term_kill += 1 + else: + last_term_dead += 1 + grade_list.append({ + 'term_time': total_term[this_term], + 'term_GPA': last_term_c_x_g / last_term_credit, + 'term_kill': last_term_kill, + 'term_dead': last_term_dead, + 'term_credit': last_term_credit, + 'term_grade': last_term_grade_list }) - #补充最后一次遍历的数据 - if last_lesson_kill > 0: - last_term_kill += 1 - else: - last_term_dead += 1 - grade_list.append({ - 'term_time': total_term[this_term], - 'term_GPA': last_term_c_x_g / last_term_credit, - 'term_kill': last_term_kill, - 'term_dead': last_term_dead, - 'term_credit': last_term_credit, - 'term_grade': last_term_grade_list - }) - GradeStatistics['total_bixiu_GPA'] = total_bixiu_c_x_g / total_bixiu_credit; - #合并数据 - data_cache = { - 'total': GradeStatistics, - 'split': grade_list - } - return {'errcode': 200, 'errmsg': 'ok', 'data': data_cache} + total_grade['total_bixiu_GPA'] = total_bixiu_c_x_g / \ + total_bixiu_credit + # 合并数据 + self.__grade_data = { + 'total': total_grade, + 'split': grade_list + } + return ('ok', 200) -def getSchedule(Ip, S): - headers = {'Content-Type': 'application/json'} - r = S.post( - url='http://'+Ip[0]+'-'+Ip[1]+'-'+Ip[2]+'-'+Ip[3]+'-8080-p.webvpn.cust.edu.cn:8118/api/ClientStudent/Home/StudentHomeApi/GetHomeCurWeekTime?sf_request_type=ajax', - data=json.dumps({"param":"JTdCJTdE","__permission":{"MenuID":"F71C97D5-D3E2-4FDA-9209-D7FA8626390E","Operation":0},"__log":{"MenuID":"F71C97D5-D3E2-4FDA-9209-D7FA8626390E","Logtype":6,"Context":"查询"}}), - headers=headers - ) - CurWeek = json.loads(r.content.decode('utf-8'))['data']['CurWeek'] - r = S.post( - url='http://'+Ip[0]+'-'+Ip[1]+'-'+Ip[2]+'-'+Ip[3]+'-8080-p.webvpn.cust.edu.cn:8118/api/ClientStudent/Home/StudentHomeApi/QueryStudentScheduleData?sf_request_type=ajax', - data=json.dumps({"param":"JTdCJTdE","__permission":{"MenuID":"F71C97D5-D3E2-4FDA-9209-D7FA8626390E","Operation":0},"__log":{"MenuID":"F71C97D5-D3E2-4FDA-9209-D7FA8626390E","Logtype":6,"Context":"查询"}}), - headers=headers - ) - data = json.loads(r.content.decode('utf-8')) - if data['state'] != 0: - return {'errcode': 102, 'errmsg':'教务挂了'} - time = ['AM__TimePieces','PM__TimePieces','EV__TimePieces'] - data = data['data']['AdjustDays'] - days_per_week = [0] * 23 - lesson = [[0] * 6 for _ in range(7)] - lesson_set = {} - color_set = [0] * 9 - color_used = 9 - for i in range(7): - for j in range(3): - for k in range(2): - if( data[i][time[j]][k]['Dtos'] ): - lesson[i][j*2+k] = [] - for l in data[i][time[j]][k]['Dtos']: - temp_lesson = {} - Time = [0] * 23 - mod = '' - for m in l['Content']: - if temp_lesson.get(m['Key']): - temp_lesson[m['Key']] += ','+m['Name'] + # 获取成绩 ----------------------------------------------------------------------------- + def getSchedule(self): + headers = {'Content-Type': 'application/json'} + r = self.__session.post( + url='http://'+self.__ip[0]+'-'+self.__ip[1]+'-'+self.__ip[2]+'-'+self.__ip[3] + + '-8080-p.webvpn.cust.edu.cn:8118/api/ClientStudent/Home/StudentHomeApi/GetHomeCurWeekTime?sf_request_type=ajax', + data=json.dumps({"param": "JTdCJTdE", "__permission": {"MenuID": "F71C97D5-D3E2-4FDA-9209-D7FA8626390E", + "Operation": 0}, "__log": {"MenuID": "F71C97D5-D3E2-4FDA-9209-D7FA8626390E", "Logtype": 6, "Context": "查询"}}), + headers=headers + ) + CurWeek = json.loads(r.content.decode('utf-8'))['data']['CurWeek'] + r = self.__session.post( + url='http://'+self.__ip[0]+'-'+self.__ip[1]+'-'+self.__ip[2]+'-'+self.__ip[3] + + '-8080-p.webvpn.cust.edu.cn:8118/api/ClientStudent/Home/StudentHomeApi/QueryStudentScheduleData?sf_request_type=ajax', + data=json.dumps({"param": "JTdCJTdE", "__permission": {"MenuID": "F71C97D5-D3E2-4FDA-9209-D7FA8626390E", + "Operation": 0}, "__log": {"MenuID": "F71C97D5-D3E2-4FDA-9209-D7FA8626390E", "Logtype": 6, "Context": "查询"}}), + headers=headers + ) + data = json.loads(r.content.decode('utf-8')) + if data['state'] != 0: + return ('教务挂了', 502) + time = ['AM__TimePieces', 'PM__TimePieces', 'EV__TimePieces'] + data = data['data']['AdjustDays'] + days_per_week = [0] * 23 + lesson = [[0] * 6 for _ in range(7)] + lesson_set = {} + color_set = [0] * 9 + color_used = 9 + for i in range(7): + for j in range(3): + for k in range(2): + if(data[i][time[j]][k]['Dtos']): + lesson[i][j*2+k] = [] + for l in data[i][time[j]][k]['Dtos']: + temp_lesson = {} + Time = [0] * 23 + mod = '' + for m in l['Content']: + if temp_lesson.get(m['Key']): + temp_lesson[m['Key']] += ','+m['Name'] + else: + temp_lesson[m['Key']] = m['Name'] + if lesson_set.get(l['Content'][0]['Name']): + temp_lesson['color'] = lesson_set[l['Content'][0]['Name']] else: - temp_lesson[m['Key']] = m['Name'] - if lesson_set.get(l['Content'][0]['Name']): - temp_lesson['color'] = lesson_set[l['Content'][0]['Name']] - else: - color = random.randint(0, 8) - while color_set[color]: - if color_used <= 0: - break; color = random.randint(0, 8) - temp_lesson['color'] = color - lesson_set[l['Content'][0]['Name']] = color - color_used -= 1 - color_set[color] = 1 - temp_Time = temp_lesson['Time'] - if '单周' in temp_Time: - mod = 'single' - temp_Time = temp_Time[0:len(temp_Time)-5] - elif '双周' in temp_Time: - mod = 'double' - temp_Time = temp_Time[0:len(temp_Time)-5] - else: - mod = 'all' - temp_Time = temp_Time[0:-1] - temp_Time = temp_Time.split(',') - index = 0 - for n in temp_Time: - temp_Time[index] = n.split('-') - index += 1 - index = 0 - for n in temp_Time: - if len(n) > 1 : - for o in range( int(n[0]) , int(n[1]) + 1): - if ( o%2==0 and mod is 'double' ) or ( o%2==1 and mod is 'single' ) or ( mod is 'all' ): - days_per_week[o] = max(days_per_week[o] , i+1) - Time[o] = 1 - else: - Time[o] = 0 + while color_set[color]: + if color_used <= 0: + break + color = random.randint(0, 8) + temp_lesson['color'] = color + lesson_set[l['Content'][0]['Name']] = color + color_used -= 1 + color_set[color] = 1 + temp_Time = temp_lesson['Time'] + temp_lesson['Time'] = temp_Time[0:int( + temp_Time.find('周') + 1)] + if '单周' in temp_Time: + mod = 'single' + # temp_Time = temp_Time[0:len(temp_Time)-5] + elif '双周' in temp_Time: + mod = 'double' + # temp_Time = temp_Time[0:len(temp_Time)-5] else: - days_per_week[int(n[0])] = max(days_per_week[int(n[0])] , i+1) - Time[int(n[0])] = 1 - index += 1 - temp_lesson['Time_split'] = Time - lesson[i][j*2+k].append(temp_lesson) - data_cache = {'lesson':lesson,'days_per_week':days_per_week,'cur_week':CurWeek} - return {'errcode': 200, 'errmsg': 'ok', 'data': data_cache} \ No newline at end of file + mod = 'all' + # temp_Time = temp_Time[0:-1] + zhou_pos = temp_Time.find('周') + temp_Time = temp_Time[0:zhou_pos] + temp_Time = temp_Time.split(',') + index = 0 + for n in temp_Time: + temp_Time[index] = n.split('-') + index += 1 + index = 0 + for n in temp_Time: + if len(n) > 1: + for o in range(int(n[0]), int(n[1]) + 1): + if (o % 2 == 0 and mod == 'double') or (o % 2 == 1 and mod == 'single') or (mod == 'all'): + days_per_week[o] = max( + days_per_week[o], i+1) + Time[o] = 1 + else: + Time[o] = 0 + else: + days_per_week[int(n[0])] = max( + days_per_week[int(n[0])], i+1) + Time[int(n[0])] = 1 + index += 1 + temp_lesson['Time_split'] = Time + lesson[i][j*2+k].append(temp_lesson) + self.__schedule_data = {'lesson': lesson, + 'days_per_week': days_per_week, 'cur_week': CurWeek} + return ('ok', 200) + + # 获取信息 ----------------------------------------------------------------------------- + def getData(self): + # return ( + # { + # 'student_id': self.__student_id, + # 'student_name': self.__student_name, + # 'grade': self.__grade_data, + # 'schedule': self.__schedule_data + # }, + # 200 + # ) + return { + 'errcode': '200', + 'errmsg': 'ok', + 'student_name': self.__student_name, + 'student_id': self.__student_id, + 'grade': self.__grade_data, + 'schedule': self.__schedule_data + } diff --git a/logs/error.log b/logs/error.log index 8b13789..c62aba3 100644 --- a/logs/error.log +++ b/logs/error.log @@ -1 +1,203 @@ - +[2020-04-10 10:47:35 +0000] [9] [INFO] Starting gunicorn 20.0.4 +[2020-04-10 10:47:35 +0000] [9] [INFO] Listening at: http://0.0.0.0:80 (9) +[2020-04-10 10:47:35 +0000] [9] [INFO] Using worker: sync +[2020-04-10 10:47:35 +0000] [11] [INFO] Booting worker with pid: 11 +[2020-04-10 10:47:35 +0000] [12] [INFO] Booting worker with pid: 12 +[2020-04-10 10:47:35 +0000] [13] [INFO] Booting worker with pid: 13 +[2020-04-10 10:47:35 +0000] [14] [INFO] Booting worker with pid: 14 +[2020-04-10 11:58:56 +0000] [10] [INFO] Starting gunicorn 20.0.4 +[2020-04-10 11:58:56 +0000] [10] [INFO] Listening at: http://0.0.0.0:80 (10) +[2020-04-10 11:58:56 +0000] [10] [INFO] Using worker: sync +[2020-04-10 11:58:56 +0000] [12] [INFO] Booting worker with pid: 12 +[2020-04-10 11:58:56 +0000] [13] [INFO] Booting worker with pid: 13 +[2020-04-10 11:58:56 +0000] [14] [INFO] Booting worker with pid: 14 +[2020-04-10 11:58:56 +0000] [15] [INFO] Booting worker with pid: 15 +[2020-04-10 11:58:56 +0000] [12] [ERROR] Exception in worker process +Traceback (most recent call last): + File "/usr/local/lib/python3.7/site-packages/gunicorn/arbiter.py", line 583, in spawn_worker + worker.init_process() + File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/base.py", line 119, in init_process + self.load_wsgi() + File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/base.py", line 144, in load_wsgi + self.wsgi = self.app.wsgi() + File "/usr/local/lib/python3.7/site-packages/gunicorn/app/base.py", line 67, in wsgi + self.callable = self.load() + File "/usr/local/lib/python3.7/site-packages/gunicorn/app/wsgiapp.py", line 49, in load + return self.load_wsgiapp() + File "/usr/local/lib/python3.7/site-packages/gunicorn/app/wsgiapp.py", line 39, in load_wsgiapp + return util.import_app(self.app_uri) + File "/usr/local/lib/python3.7/site-packages/gunicorn/util.py", line 358, in import_app + mod = importlib.import_module(module) + File "/usr/local/lib/python3.7/importlib/__init__.py", line 127, in import_module + return _bootstrap._gcd_import(name[level:], package, level) + File "", line 1006, in _gcd_import + File "", line 983, in _find_and_load + File "", line 967, in _find_and_load_unlocked + File "", line 677, in _load_unlocked + File "", line 728, in exec_module + File "", line 219, in _call_with_frames_removed + File "/app/cherry.py", line 5, in + from allFunction import manageLogin, managePhoto, manageAdd, manageDel, manageGet, manageDetail, manageComment, manageScheduleUpload, manageScheduleGet +ImportError: cannot import name 'managePhoto' from 'allFunction' (./lib/allFunction.py) +[2020-04-10 11:58:56 +0000] [12] [INFO] Worker exiting (pid: 12) +[2020-04-10 11:58:56 +0000] [13] [ERROR] Exception in worker process +Traceback (most recent call last): + File "/usr/local/lib/python3.7/site-packages/gunicorn/arbiter.py", line 583, in spawn_worker + worker.init_process() + File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/base.py", line 119, in init_process + self.load_wsgi() + File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/base.py", line 144, in load_wsgi + self.wsgi = self.app.wsgi() + File "/usr/local/lib/python3.7/site-packages/gunicorn/app/base.py", line 67, in wsgi + self.callable = self.load() + File "/usr/local/lib/python3.7/site-packages/gunicorn/app/wsgiapp.py", line 49, in load + return self.load_wsgiapp() + File "/usr/local/lib/python3.7/site-packages/gunicorn/app/wsgiapp.py", line 39, in load_wsgiapp + return util.import_app(self.app_uri) + File "/usr/local/lib/python3.7/site-packages/gunicorn/util.py", line 358, in import_app + mod = importlib.import_module(module) + File "/usr/local/lib/python3.7/importlib/__init__.py", line 127, in import_module + return _bootstrap._gcd_import(name[level:], package, level) + File "", line 1006, in _gcd_import + File "", line 983, in _find_and_load + File "", line 967, in _find_and_load_unlocked + File "", line 677, in _load_unlocked + File "", line 728, in exec_module + File "", line 219, in _call_with_frames_removed + File "/app/cherry.py", line 5, in + from allFunction import manageLogin, managePhoto, manageAdd, manageDel, manageGet, manageDetail, manageComment, manageScheduleUpload, manageScheduleGet +ImportError: cannot import name 'managePhoto' from 'allFunction' (./lib/allFunction.py) +[2020-04-10 11:58:56 +0000] [13] [INFO] Worker exiting (pid: 13) +[2020-04-10 11:58:56 +0000] [14] [ERROR] Exception in worker process +Traceback (most recent call last): + File "/usr/local/lib/python3.7/site-packages/gunicorn/arbiter.py", line 583, in spawn_worker + worker.init_process() + File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/base.py", line 119, in init_process + self.load_wsgi() + File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/base.py", line 144, in load_wsgi + self.wsgi = self.app.wsgi() + File "/usr/local/lib/python3.7/site-packages/gunicorn/app/base.py", line 67, in wsgi + self.callable = self.load() + File "/usr/local/lib/python3.7/site-packages/gunicorn/app/wsgiapp.py", line 49, in load + return self.load_wsgiapp() + File "/usr/local/lib/python3.7/site-packages/gunicorn/app/wsgiapp.py", line 39, in load_wsgiapp + return util.import_app(self.app_uri) + File "/usr/local/lib/python3.7/site-packages/gunicorn/util.py", line 358, in import_app + mod = importlib.import_module(module) + File "/usr/local/lib/python3.7/importlib/__init__.py", line 127, in import_module + return _bootstrap._gcd_import(name[level:], package, level) + File "", line 1006, in _gcd_import + File "", line 983, in _find_and_load + File "", line 967, in _find_and_load_unlocked + File "", line 677, in _load_unlocked + File "", line 728, in exec_module + File "", line 219, in _call_with_frames_removed + File "/app/cherry.py", line 5, in + from allFunction import manageLogin, managePhoto, manageAdd, manageDel, manageGet, manageDetail, manageComment, manageScheduleUpload, manageScheduleGet +ImportError: cannot import name 'managePhoto' from 'allFunction' (./lib/allFunction.py) +[2020-04-10 11:58:56 +0000] [14] [INFO] Worker exiting (pid: 14) +[2020-04-10 11:58:56 +0000] [15] [ERROR] Exception in worker process +Traceback (most recent call last): + File "/usr/local/lib/python3.7/site-packages/gunicorn/arbiter.py", line 583, in spawn_worker + worker.init_process() + File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/base.py", line 119, in init_process + self.load_wsgi() + File "/usr/local/lib/python3.7/site-packages/gunicorn/workers/base.py", line 144, in load_wsgi + self.wsgi = self.app.wsgi() + File "/usr/local/lib/python3.7/site-packages/gunicorn/app/base.py", line 67, in wsgi + self.callable = self.load() + File "/usr/local/lib/python3.7/site-packages/gunicorn/app/wsgiapp.py", line 49, in load + return self.load_wsgiapp() + File "/usr/local/lib/python3.7/site-packages/gunicorn/app/wsgiapp.py", line 39, in load_wsgiapp + return util.import_app(self.app_uri) + File "/usr/local/lib/python3.7/site-packages/gunicorn/util.py", line 358, in import_app + mod = importlib.import_module(module) + File "/usr/local/lib/python3.7/importlib/__init__.py", line 127, in import_module + return _bootstrap._gcd_import(name[level:], package, level) + File "", line 1006, in _gcd_import + File "", line 983, in _find_and_load + File "", line 967, in _find_and_load_unlocked + File "", line 677, in _load_unlocked + File "", line 728, in exec_module + File "", line 219, in _call_with_frames_removed + File "/app/cherry.py", line 5, in + from allFunction import manageLogin, managePhoto, manageAdd, manageDel, manageGet, manageDetail, manageComment, manageScheduleUpload, manageScheduleGet +ImportError: cannot import name 'managePhoto' from 'allFunction' (./lib/allFunction.py) +[2020-04-10 11:58:56 +0000] [15] [INFO] Worker exiting (pid: 15) +[2020-04-10 12:01:52 +0000] [9] [INFO] Starting gunicorn 20.0.4 +[2020-04-10 12:01:52 +0000] [9] [INFO] Listening at: http://0.0.0.0:80 (9) +[2020-04-10 12:01:52 +0000] [9] [INFO] Using worker: sync +[2020-04-10 12:01:52 +0000] [11] [INFO] Booting worker with pid: 11 +[2020-04-10 12:01:52 +0000] [12] [INFO] Booting worker with pid: 12 +[2020-04-10 12:01:52 +0000] [13] [INFO] Booting worker with pid: 13 +[2020-04-10 12:01:53 +0000] [14] [INFO] Booting worker with pid: 14 +[2020-04-15 08:34:13,823] ERROR in app: Exception on /api/login [POST] +Traceback (most recent call last): + File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 2446, in wsgi_app + response = self.full_dispatch_request() + File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 1951, in full_dispatch_request + rv = self.handle_user_exception(e) + File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 1820, in handle_user_exception + reraise(exc_type, exc_value, tb) + File "/usr/local/lib/python3.7/site-packages/flask/_compat.py", line 39, in reraise + raise value + File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 1949, in full_dispatch_request + rv = self.dispatch_request() + File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 1935, in dispatch_request + return self.view_functions[rule.endpoint](**req.view_args) + File "/app/cherry.py", line 24, in login + res = manageLogin(request) + File "./lib/allFunction.py", line 23, in manageLogin + schedule = getSchedule(res['ip'], res['s']) + File "./lib/crawler.py", line 265, in getSchedule + for o in range( int(n[0]) , int(n[1]) + 1): +ValueError: invalid literal for int() with base 10: '16周[3' +[2020-04-15 08:39:41 +0000] [9] [INFO] Starting gunicorn 20.0.4 +[2020-04-15 08:39:41 +0000] [9] [INFO] Listening at: http://0.0.0.0:80 (9) +[2020-04-15 08:39:41 +0000] [9] [INFO] Using worker: sync +[2020-04-15 08:39:41 +0000] [11] [INFO] Booting worker with pid: 11 +[2020-04-15 08:39:41 +0000] [12] [INFO] Booting worker with pid: 12 +[2020-04-15 08:39:41 +0000] [13] [INFO] Booting worker with pid: 13 +[2020-04-15 08:39:41 +0000] [14] [INFO] Booting worker with pid: 14 +[2020-04-15 08:42:08,965] ERROR in app: Exception on /api/login [POST] +Traceback (most recent call last): + File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 2446, in wsgi_app + response = self.full_dispatch_request() + File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 1951, in full_dispatch_request + rv = self.handle_user_exception(e) + File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 1820, in handle_user_exception + reraise(exc_type, exc_value, tb) + File "/usr/local/lib/python3.7/site-packages/flask/_compat.py", line 39, in reraise + raise value + File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 1949, in full_dispatch_request + rv = self.dispatch_request() + File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 1935, in dispatch_request + return self.view_functions[rule.endpoint](**req.view_args) + File "/app/cherry.py", line 24, in login + res = manageLogin(request) + File "./lib/allFunction.py", line 23, in manageLogin + schedule = getSchedule(res['ip'], res['s']) + File "./lib/crawler.py", line 265, in getSchedule + for o in range( int(n[0]) , int(n[1]) + 1): +ValueError: invalid literal for int() with base 10: '4周[3' +[2020-04-15 08:42:29,909] ERROR in app: Exception on /api/login [POST] +Traceback (most recent call last): + File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 2446, in wsgi_app + response = self.full_dispatch_request() + File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 1951, in full_dispatch_request + rv = self.handle_user_exception(e) + File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 1820, in handle_user_exception + reraise(exc_type, exc_value, tb) + File "/usr/local/lib/python3.7/site-packages/flask/_compat.py", line 39, in reraise + raise value + File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 1949, in full_dispatch_request + rv = self.dispatch_request() + File "/usr/local/lib/python3.7/site-packages/flask/app.py", line 1935, in dispatch_request + return self.view_functions[rule.endpoint](**req.view_args) + File "/app/cherry.py", line 24, in login + res = manageLogin(request) + File "./lib/allFunction.py", line 23, in manageLogin + schedule = getSchedule(res['ip'], res['s']) + File "./lib/crawler.py", line 265, in getSchedule + for o in range( int(n[0]) , int(n[1]) + 1): +ValueError: invalid literal for int() with base 10: '4周[3' diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/crawler_test.py b/test/crawler_test.py new file mode 100644 index 0000000..e25d486 --- /dev/null +++ b/test/crawler_test.py @@ -0,0 +1,25 @@ +import unittest +from ..lib.crawler import Crawler + +c = Crawler('2017002372', '623910ert&', '15143211127') + +class TestCrawler(unittest.TestCase): + # 测试链接 + def test_connection(self): + self.assertEqual(c.connection(), ('ok', 200)) + + # 测试获取成绩 + def test_grade(self): + self.assertEqual(c.getGrade(), ('ok', 200)) + + # 测试获取课表 + def test_schedule(self): + self.assertEqual(c.getSchedule(), ('ok', 200)) + + # 测试返回信息 + def test_getData(self): + get_res = c.getData() + self.assertEqual(get_res['errcode'], '200') + +if __name__ == '__main__': + unittest.main()