From b660dd033c843f820b3c7326776d85291ddf0b74 Mon Sep 17 00:00:00 2001 From: ElonLi <531347509@qq.com> Date: Sat, 4 Sep 2021 10:50:46 +0800 Subject: [PATCH] cherry reborn!!! --- .gitignore | 2 +- README.md | 7 --- cherry.py | 9 ++- lib/allFunction.py | 37 +++++++---- lib/process/login.py | 2 +- lib/public/crawler.py | 138 +++++++++++++++++++++++++----------------- lib/public/db.py | 6 +- lib/public/ocr.py | 12 ++++ 8 files changed, 133 insertions(+), 80 deletions(-) create mode 100644 lib/public/ocr.py diff --git a/.gitignore b/.gitignore index 3c84a8c..575828a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ __pycache__ .vscode/settings.json -venv \ No newline at end of file +venv diff --git a/README.md b/README.md index 070b2a3..bb33952 100644 --- a/README.md +++ b/README.md @@ -8,13 +8,6 @@ python3 -m venv venv . venv/bin/activate // 升级pip pip install --upgrade pip -// 安装flask... -pip install Flask -pip install requests -pip install bs4 -pip install gunicorn -pip install pymongo -pip install flask_cors // 根据依赖文件安装环境 pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple // 设置全局变量 diff --git a/cherry.py b/cherry.py index a12643b..4a8ac75 100644 --- a/cherry.py +++ b/cherry.py @@ -2,7 +2,7 @@ import sys sys.path.append('./lib') # 引入处理函数 -from allFunction import manageLogin, manageScheduleUpload, manageScheduleGet +from allFunction import manageLogin, manageScheduleUpload, manageScheduleGet, manageSubmitVerificationCode # 引入flask from flask import Flask, request, session, redirect # 初始化app @@ -24,6 +24,11 @@ def login(): res = manageLogin(request) return res +@app.route('/api/submitVC', methods=['POST']) +def submitVC(): + res = manageSubmitVerificationCode(request) + return res + # 更新课表游戏排名信息 @app.route('/api/game/schedule/upload',methods=['POST']) def schedule_upload(): @@ -43,4 +48,4 @@ def miss(e): # 本地运行启动 if __name__ == '__main__': - app.run(host="0.0.0.0", debug=True, port="80") \ No newline at end of file + app.run(host="127.0.0.1", debug=True, port="7980") \ No newline at end of file diff --git a/lib/allFunction.py b/lib/allFunction.py index 4db5c6b..611df39 100644 --- a/lib/allFunction.py +++ b/lib/allFunction.py @@ -1,11 +1,16 @@ # coding=utf-8 +import sys + +from requests.sessions import session +sys.path.append('./lib/public') from crawler import Crawler import json from hashlib import md5 from urllib.parse import urlencode, unquote_plus -from db import addRank, getRank +from db import insertRank, findRank # 主函数 +crawlerCache = {} # 处理登录操作 data:{cid,pwd,sign} # 这里三个接口公用一个session所以合并成一个接口,一个session走到底,一次性返回所有数据 def manageLogin(request): @@ -14,16 +19,26 @@ def manageLogin(request): # MD5校验 checked = checkData(data) if checked: - # 创建会话 - phone = '' - if data.get('phone'): - phone = data['phone'] - c = Crawler(data['cid'], data['pwd'], phone) + c = Crawler() + c.defaultInit(data) res = c.connection() + crawlerCache[data['cid']] = c + return res + else: + return '数据校验失败', 400 + +def manageSubmitVerificationCode(request): + data = json.loads(request.form['data']) + + checked = checkData(data) + if checked: + c = crawlerCache[data['cid']] + res = c.submitVerificationCode(data) if res[-1] == 200: - c.getGrade() - c.getSchedule() - return c.getData() + grade = c.getGrade() + ownSchedule = c.getOwnSchedule() + crawlerCache[data['cid']] = None + return json.dumps({'grade': grade, 'ownSchedule': ownSchedule}), 200 else: return res else: @@ -37,7 +52,7 @@ def manageScheduleUpload(request): checked = checkData(data_cache) data_cache.pop('sign') if checked: - add_res = addRank( data_cache['nick'], data_cache['count'], data_cache['time']) + add_res = insertRank( data_cache['cid'], data_cache['nick'], data_cache['count'], data_cache['time']) return add_res else: return '数据校验失败', 400 @@ -51,7 +66,7 @@ def manageScheduleGet(request): data_cache.pop('sign') if checked: # 获取排名表 - get_res = getRank() + get_res = findRank() return get_res else: return '数据校验失败', 400 diff --git a/lib/process/login.py b/lib/process/login.py index 45ddf57..59ec960 100644 --- a/lib/process/login.py +++ b/lib/process/login.py @@ -78,7 +78,7 @@ def manageCrawler(cid, pwd, phone): return get_res grade = get_res[0] return { - 'user_info': init_res[0] + 'user_info': init_res[0], 'grade': grade, 'schedule': schedule, }, 200 diff --git a/lib/public/crawler.py b/lib/public/crawler.py index 694988e..d20c67e 100644 --- a/lib/public/crawler.py +++ b/lib/public/crawler.py @@ -1,21 +1,27 @@ import json +from json.encoder import JSONEncoder import requests from urllib.parse import quote import base64 from bs4 import BeautifulSoup import random import sys + +from werkzeug.utils import redirect from utils import btoa, signCode +from ocr import getCaptcha class Crawler(object): def __init__(self): - self.__session = None + self.__session = requests.Session() + self.__response = None self.__pwd = None self.__phone = None self.cid = None self.sid = None self.uid = None self.real_name = None + self.baseUrl = None # 获取用户基本信息 def getUserInfo(self): @@ -32,70 +38,82 @@ class Crawler(object): try: # 获取统一身份系统的网页 r = self.__session.get( - url='https://mysso.cust.edu.cn/cas/login?service=https://jwgls1.cust.edu.cn/welcome') + url='https://mysso.cust.edu.cn/cas/login?service=https://jwgl.cust.edu.cn/welcome') soup = BeautifulSoup(r.text, 'html.parser') - execution = soup.find_all(name='input')[6]['value'] + execution = soup.find_all(name='input')[3]['value'] + r = self.__session.get('https://mysso.cust.edu.cn/cas/captcha') formdata = { 'username': self.cid, 'password': self.__pwd, 'execution': execution, + 'captcha': getCaptcha(r.content), '_eventId': 'submit', 'geolocation': '' } r = self.__session.post( - url='https://mysso.cust.edu.cn/cas/login?service=https://jwgls1.cust.edu.cn/welcome', data=formdata) + url='https://mysso.cust.edu.cn/cas/login?service=https://jwgl.cust.edu.cn/welcome', data = formdata) soup = BeautifulSoup(r.text, 'html.parser') - flag = soup.find(name='title') - if(flag.text == "手机号设置"): - if self.__phone == '': - return '请填写手机号', 513 - execution = soup.find_all(name='input')[1]['value'] - formdata = { - 'phone': self.__phone, - 'execution': execution, - '_eventId': 'submit', - 'submit': '提交' - } - r = self.__session.post( - url="https://mysso.cust.edu.cn/cas/login?service=https://jwgls1.cust.edu.cn/welcome", data=formdata) - r = self.__session.get( - url='https://portal.cust.edu.cn/custp/index') - soup = BeautifulSoup(r.text, 'html.parser') - try: - if soup.findAll(name='a')[4]['href'] != 'logout': - raise('账号或密码错误') - except: + loginSuccess = (soup.find(name='title').text != "统一身份认证系统") + self.__response = r + if not loginSuccess: return '账号或者密码错误', 511 - r = self.__session.get( - url='https://mysso.cust.edu.cn/cas/login?service=https://jwgls1.cust.edu.cn/welcome', allow_redirects=False) - ticket = r.headers['Location'][42:] - asp_net_sessionid_param = { - 'Ticket': ticket, 'Url': 'https://jwgls1.cust.edu.cn/welcome'} - asp_net_sessionid_param = base64.b64encode( - quote(json.dumps(asp_net_sessionid_param)).encode('utf-8')).decode('utf-8') - asp_net_sessionid_param = {'param': asp_net_sessionid_param} - headers = {'Content-Type': 'application/json'} - r = self.__session.post(url='https://jwgls1.cust.edu.cn/api/LoginApi/LGSSOLocalLogin?sf_request_type=ajax', - data=json.dumps(asp_net_sessionid_param), headers=headers) - data = json.loads(r.content.decode('utf-8')) - # 提示未建立教务信息 - if data['state'] == 1: - return data['message'], 514 - self.real_name = data['data']['StudentDto']['XM'] - self.sid = data['data']['StudentDto']['XH'] - self.uid = data['data']['StudentDto']['SMXSJBXXID'] - return self.getUserInfo(), 200 + return '登录成功', 200 + except Exception as e: + print(e) + return '教务挂了', 515 + + def submitVerificationCode(self, data): + try: + r = self.__response + soup = BeautifulSoup(r.text, 'html.parser') + execution = soup.find_all(name='input')[2]['value'] + formdata = { + 'yzm': data['vc'], + 'msgType': '', + 'execution': execution, + '_eventId': 'submit' + } + r = self.__session.post( + url = "https://mysso.cust.edu.cn/cas/login?service=https://jwgl.cust.edu.cn/welcome", data = formdata, allow_redirects = True) + self.__response = r + return 'gotcha!', 200 except Exception as e: print(e) return '教务挂了', 515 # 获取成绩 ----------------------------------------------------------------------------- def getGrade(self): + r = self.__response + urlBase = r.url.split('.')[0] + ticket = r.url.split('?')[-1] + param = base64.b64encode(quote(json.dumps({ + "Ticket":ticket.split('=')[-1], + "Url":"https://jwgl.cust.edu.cn/welcome" + })).encode('utf-8')) + self.baseUrl = urlBase + r = self.__session.post( + url = urlBase + '.cust.edu.cn/api/LoginApi/LGSSOLocalLogin', + data = { + "param": param, + "__log": {}, + "__permission": {} + }) headers = {'Content-Type': 'application/json'} r = self.__session.post( - url='https://jwgls1.cust.edu.cn/api/ClientStudent/QueryService/GradeQueryApi/GetDataByStudent?sf_request_type=ajax', - data=json.dumps({"param": "JTdCJTIyU2hvd0dyYWRlVHlwZSUyMiUzQTElN0Q=", "__permission": {"MenuID": "4443798E-EB6E-4D88-BFBD-BB0A76FF6BD5", - "Operation": 0}, "__log": {"MenuID": "4443798E-EB6E-4D88-BFBD-BB0A76FF6BD5", "Logtype": 6, "Context": "查询"}}), + url=urlBase + '.cust.edu.cn/api/ClientStudent/QueryService/GradeQueryApi/GetDataByStudent', + data=json.dumps({ + "param": "JTdCJTIyU2hvd0dyYWRlVHlwZSUyMiUzQTAlN0Q=", + "__permission": { + "MenuID": "4443798E-EB6E-4D88-BFBD-BB0A76FF6BD5", + "Operate": "select", + "Operation": 0 + }, + "__log": { + "MenuID": "4443798E-EB6E-4D88-BFBD-BB0A76FF6BD5", + "Logtype": 6, + "Context": "查询" + } + }), headers=headers ) data = json.loads(r.content.decode('utf-8')) @@ -320,9 +338,20 @@ class Crawler(object): def getOwnSchedule(self): headers = {'Content-Type': 'application/json'} r = self.__session.post( - url='https://jwgls1.cust.edu.cn/api/ClientStudent/Home/StudentHomeApi/QueryStudentScheduleData?sf_request_type=ajax', - data=json.dumps({"param": "JTdCJTdE", "__permission": {"MenuID": "F71C97D5-D3E2-4FDA-9209-D7FA8626390E", - "Operation": 0}, "__log": {"MenuID": "F71C97D5-D3E2-4FDA-9209-D7FA8626390E", "Logtype": 6, "Context": "查询"}}), + url=self.baseUrl + '.cust.edu.cn/api/ClientStudent/Home/StudentHomeApi/QueryStudentScheduleData', + data=json.dumps({ + "param": "JTdCJTdE", + "__permission": { + "MenuID": "00000000-0000-0000-0000-000000000000", + "Operate": "select", + "Operation": "0" + }, + "__log": { + "MenuID": "00000000-0000-0000-0000-000000000000", + "Logtype": 6, + "Context": "查询" + } + }), headers=headers ) data = json.loads(r.content.decode('utf-8')) @@ -336,7 +365,7 @@ class Crawler(object): params = {"KBLX":"2","CXLX":"0","XNXQ":"20202","CXID":self.uid,"CXZC":"0","JXBLX":""} params = str(btoa(json.dumps(params)))[2:-1] r = self.__session.post( - url='https://jwgls1.cust.edu.cn/api/ClientStudent/QueryService/OccupyQueryApi/QueryScheduleData?sf_request_type=ajax', + url=self.baseUrl + '.cust.edu.cn/api/ClientStudent/QueryService/OccupyQueryApi/QueryScheduleData?sf_request_type=ajax', data=json.dumps({"param": params, "__permission": {"MenuID": "F71C97D5-D3E2-4FDA-9209-D7FA8626390E", "Operation": 0}, "__log": {"MenuID": "F71C97D5-D3E2-4FDA-9209-D7FA8626390E", "Logtype": 6, "Context": "查询"}}), headers=headers @@ -357,12 +386,11 @@ class Crawler(object): return 'OK', 200 # 默认初始化 - def defaultInit(self, cid, pwd, phone): - self.cid = cid - self.__pwd = pwd - self.__phone = phone + def defaultInit(self, data): + self.cid = data['cid'] + self.__pwd = data['pwd'] + self.__phone = data['phone'] self.__session = requests.Session() - return self.connection() # 使用我的cookie初始化,用于快速刷新课表 def cookieInit(self, cookies, uid, cid, sid, real_name): diff --git a/lib/public/db.py b/lib/public/db.py index 7b4b52d..ba385e8 100644 --- a/lib/public/db.py +++ b/lib/public/db.py @@ -154,7 +154,7 @@ def updateBg(cid, img_id): 用户更新背景图片 """ try: - col('user').update('cid': cid, {'$set': {'setting.bg': img_id}}) + col('user').update({'cid': cid}, {'$set': {'setting.bg': img_id}}) return 'OK', 200 except Exception as e: print(e) @@ -305,7 +305,7 @@ def userInsertAllCrouse(crouses): print(e) return '用户所有课程数据库插入失败', 108 -def insertRank(nick, count, time): +def insertRank(cid, nick, count, time): """ 向排名表里增加或者覆写数据 """ @@ -345,7 +345,7 @@ def findUserCrouse(sid): return crouse_list, 200 except Exception as e: print(e) - reutrn '用户课程数据库查询失败', 304 + return '用户课程数据库查询失败', 304 def groupInsertCrouse(crouse): """ diff --git a/lib/public/ocr.py b/lib/public/ocr.py new file mode 100644 index 0000000..06a4557 --- /dev/null +++ b/lib/public/ocr.py @@ -0,0 +1,12 @@ +from aip import AipOcr + +APP_ID = '24797034' +API_KEY = 'ykebbHq8GaK2cD1sIPy7PEPu' +SECRET_KEY = '4rU7QPC1oGFbZgIbsDvMGcqPky4kv7kV' +import base64 +from urllib.parse import quote +import json +client = AipOcr(APP_ID, API_KEY, SECRET_KEY) +def getCaptcha(image): + return client.numbers(image)['words_result'][0]['words'] +