cherry_be/lib/crawler.py
2020-10-09 16:09:02 +08:00

336 lines
16 KiB
Python

import json
import requests
from urllib.parse import quote
import base64
from bs4 import BeautifulSoup
import random
import sys
class Crawler(object):
def __init__(self, username, password, phone):
self.__username = username
self.__password = password
self.__phone = phone
self.__session = None
self.__student_id = None
self.__student_name = None
self.__grade_data = ''
self.__schedule_data = ''
# 链接教务 -----------------------------------------------------------------------------
def connection(self):
try:
self.__session = requests.Session()
# 获取统一身份系统的网页
r = self.__session.get(
url='http://mysso-cust-edu-cn-s.webvpn.cust.edu.cn:8118/cas/login?service=https%3A%2F%2Fwebvpn.cust.edu.cn%2Fauth%2Fcas_validate%3Fentry_id%3D1')
soup = BeautifulSoup(r.text, 'html.parser')
execution = soup.find_all(name='input')[6]['value']
formdata = {
'username': self.__username,
'password': self.__password,
'execution': execution,
'_eventId': 'submit',
'geolocation': ''
}
r = self.__session.post(
url='http://mysso-cust-edu-cn-s.webvpn.cust.edu.cn:8118/cas/login?service=https%3A%2F%2Fwebvpn.cust.edu.cn%2Fauth%2Fcas_validate%3Fentry_id%3D1', data=formdata)
soup = BeautifulSoup(r.text, 'html.parser')
flag = soup.find(name='title')
if(flag.text == "手机号设置"):
if self.__phone == '':
return ('请填写手机号', 511)
execution = soup.find_all(name='input')[1]['value']
formdata = {
'phone': self.__phone,
'execution': execution,
'_eventId': 'submit',
'submit': '提交'
}
r = self.__session.post(
url="http://mysso-cust-edu-cn-s.webvpn.cust.edu.cn:8118/cas/login?service=https%3A%2F%2Fwebvpn.cust.edu.cn%2Fauth%2Fcas_validate%3Fentry_id%3D1", data=formdata)
r = self.__session.get(
url='http://portal-cust-edu-cn-s.webvpn.cust.edu.cn:8118/custp/index')
soup = BeautifulSoup(r.text, 'html.parser')
if soup.findAll(name='a')[-2]['href'] != 'logout':
return ('账号或者密码错误', 510)
r = self.__session.get(
url='http://mysso-cust-edu-cn-s.webvpn.cust.edu.cn:8118/cas/login?service=http://jwgls1.cust.edu.cn:8080/welcome', allow_redirects=False)
ticket = r.headers['Location'][72:]
asp_net_sessionid_param = {
'Ticket': ticket, 'Url': 'http://jwgls1.cust.edu.cn:8080/welcome'}
asp_net_sessionid_param = base64.b64encode(
quote(json.dumps(asp_net_sessionid_param)).encode('utf-8')).decode('utf-8')
asp_net_sessionid_param = {'param': asp_net_sessionid_param}
headers = {'Content-Type': 'application/json'}
r = self.__session.post(url='http://jwgls1-cust-edu-cn-8080-p.webvpn.cust.edu.cn:8118/api/LoginApi/LGSSOLocalLogin?sf_request_type=ajax',
data=json.dumps(asp_net_sessionid_param), headers=headers)
data = json.loads(r.content.decode('utf-8'))
# 提示未建立教务信息
if data['state'] == 1:
return (data['message'], 513)
self.__student_name = data['data']['StudentDto']['XM']
self.__student_id = data['data']['StudentDto']['XH']
return ('ok', 200)
except Exception as e:
print(e)
return ('教务挂了', 512)
# 获取课表 -----------------------------------------------------------------------------
def getGrade(self):
headers = {'Content-Type': 'application/json'}
r = self.__session.post(
url='http://jwgls1-cust-edu-cn-8080-p.webvpn.cust.edu.cn:8118/api/ClientStudent/QueryService/GradeQueryApi/GetDataByStudent?sf_request_type=ajax',
data=json.dumps({"param": "JTdCJTIyU2hvd0dyYWRlVHlwZSUyMiUzQTElN0Q=", "__permission": {"MenuID": "4443798E-EB6E-4D88-BFBD-BB0A76FF6BD5",
"Operation": 0}, "__log": {"MenuID": "4443798E-EB6E-4D88-BFBD-BB0A76FF6BD5", "Logtype": 6, "Context": "查询"}}),
headers=headers
)
data = json.loads(r.content.decode('utf-8'))
print(data)
if data['state'] != 0:
return ('教务挂了', 512)
# 分解数据并重命名
total = data['data']['GradeStatistics']
split = data['data']['GradeList']
# 成绩总览
total_grade = {
'total_GPA': total['PJJD'],
'total_credit': total['SDXF'],
'total_kill': total['TGMS'],
'total_dead': total['WTGMS']
}
# 提取第一和最后一学期
first_term = split[0]['KSXNXQ']
last_term = split[len(split)-1]['KSXNXQ']
# 转换成int元组
first_term = (int(first_term[0:4]), int(first_term[4:5]))
last_term = (int(last_term[0:4]), int(last_term[4:5]))
# 生成中间学期
total_term = []
for i in range(last_term[0], first_term[0] + 1):
for j in range(1, 3):
total_term.append(str(i) + str(j))
if i == first_term[0] and j == first_term[1]:
break
total_term.reverse()
grade_list = []
# 当前学期索引/上学期总通过/上学期总挂科/上学期总学分/上学期总学分绩点/上一课程通过次数
this_term, last_term_kill, last_term_dead, last_term_credit, last_term_c_x_g, last_lesson_kill = 0, 0, 0, 0, 0, 0
# 上学期课程列表
last_term_grade_list = []
# 上一课程名称
last_lesson_name = ''
# flag,将是否通过延后一个循环
flag = True
# 总必修学分
total_bixiu_credit = 0
# 总必修学分绩点
total_bixiu_c_x_g = 0
# 遍历课程
for item in split:
if not item['YXCJ']:
continue
# 如果和上一个课程重名
if item['LessonInfo']['KCMC'] == last_lesson_name:
# 判断是否通过
if item['YXCJ'] >= 60:
# 如果通过贡献1通过
last_lesson_kill += 1
# 贡献总学分绩点
last_term_c_x_g += item['XF'] * (item['YXCJ'] - 50) / 10
if item['KCXZ'] == '必修':
total_bixiu_c_x_g += item['XF'] * \
(item['YXCJ'] - 50) / 10
total_bixiu_credit += item['XF']
else: # 如果不重名
if flag: # 将else中的判断延后一个循环
flag = False
else:
# 如果上一课程通过
if last_lesson_kill > 0:
# 贡献上学期通过数
last_term_kill += 1
else:
# 贡献上学期挂科数
last_term_dead += 1
last_lesson_kill = 0
# 更新上一课程名称
last_lesson_name = item['LessonInfo']['KCMC']
# 如果不是当前学期
if item['KSXNXQ'] != total_term[this_term]:
# 成绩列表添加上学期数据
grade_list.append({
'term_time': total_term[this_term],
'term_GPA': last_term_c_x_g / last_term_credit,
'term_kill': last_term_kill,
'term_dead': last_term_dead,
'term_credit': last_term_credit,
'term_grade': last_term_grade_list
})
# 当前学期索引+1
while item['KSXNXQ'] != total_term[this_term]:
this_term += 1
# 初始化所有值
last_term_kill, last_term_dead, last_term_credit = 0, 0, item['XF']
last_term_grade_list = []
# 如果通过
if item['YXCJ'] >= 60:
# 贡献总学分绩点
last_term_c_x_g = item['XF'] * (item['YXCJ'] - 50) / 10
# 贡献通过次数
last_lesson_kill += 1
if item['KCXZ'] == '必修':
total_bixiu_c_x_g += item['XF'] * \
(item['YXCJ'] - 50) / 10
total_bixiu_credit += item['XF']
else:
last_term_c_x_g = 0
else: # 如果是当前学期
# 贡献总学分
last_term_credit += item['XF']
# 如果通过
if item['YXCJ'] >= 60:
# 贡献通过数
last_lesson_kill += 1
# 贡献学分绩点
last_term_c_x_g += item['XF'] * \
(item['YXCJ'] - 50) / 10
if item['KCXZ'] == '必修':
total_bixiu_c_x_g += item['XF'] * \
(item['YXCJ'] - 50) / 10
total_bixiu_credit += item['XF']
# 加入学期成绩列表
last_term_grade_list.append({
'title': item['LessonInfo']['KCMC'],
'credit': item['XF'],
'grade': item['YXCJ'],
'kill': 'yes' if (item['YXCJ'] >= 60) else 'no',
'class': item['KSXZ']
})
# 补充最后一次遍历的数据
if last_lesson_kill > 0:
last_term_kill += 1
else:
last_term_dead += 1
grade_list.append({
'term_time': total_term[this_term],
'term_GPA': last_term_c_x_g / last_term_credit,
'term_kill': last_term_kill,
'term_dead': last_term_dead,
'term_credit': last_term_credit,
'term_grade': last_term_grade_list
})
total_grade['total_bixiu_GPA'] = total_bixiu_c_x_g / \
total_bixiu_credit
# 合并数据
self.__grade_data = {
'total': total_grade,
'split': grade_list
}
return ('ok', 200)
# 获取成绩 -----------------------------------------------------------------------------
def getSchedule(self):
headers = {'Content-Type': 'application/json'}
r = self.__session.post(
url='http://jwgls1-cust-edu-cn-8080-p.webvpn.cust.edu.cn:8118/api/ClientStudent/Home/StudentHomeApi/GetHomeCurWeekTime?sf_request_type=ajax',
data=json.dumps({"param": "JTdCJTdE", "__permission": {"MenuID": "F71C97D5-D3E2-4FDA-9209-D7FA8626390E",
"Operation": 0}, "__log": {"MenuID": "F71C97D5-D3E2-4FDA-9209-D7FA8626390E", "Logtype": 6, "Context": "查询"}}),
headers=headers
)
CurWeek = json.loads(r.content.decode('utf-8'))['data']['CurWeek']
r = self.__session.post(
url='http://jwgls1-cust-edu-cn-8080-p.webvpn.cust.edu.cn:8118/api/ClientStudent/Home/StudentHomeApi/QueryStudentScheduleData?sf_request_type=ajax',
data=json.dumps({"param": "JTdCJTdE", "__permission": {"MenuID": "F71C97D5-D3E2-4FDA-9209-D7FA8626390E",
"Operation": 0}, "__log": {"MenuID": "F71C97D5-D3E2-4FDA-9209-D7FA8626390E", "Logtype": 6, "Context": "查询"}}),
headers=headers
)
data = json.loads(r.content.decode('utf-8'))
if data['state'] != 0:
return ('教务挂了', 512)
time = ['AM__TimePieces', 'PM__TimePieces', 'EV__TimePieces']
data = data['data']['AdjustDays']
days_per_week = [0] * 23
lesson = [[0] * 6 for _ in range(7)]
lesson_set = {}
color_set = [0] * 9
color_used = 9
for i in range(7):
for j in range(3):
for k in range(2):
if(data[i][time[j]][k]['Dtos']):
lesson[i][j*2+k] = []
for l in data[i][time[j]][k]['Dtos']:
temp_lesson = {}
Time = [0] * 23
mod = ''
for m in l['Content']:
if temp_lesson.get(m['Key']):
temp_lesson[m['Key']] += ','+m['Name']
else:
temp_lesson[m['Key']] = m['Name']
if lesson_set.get(l['Content'][0]['Name']):
temp_lesson['color'] = lesson_set[l['Content'][0]['Name']]
else:
color = random.randint(0, 8)
while color_set[color]:
if color_used <= 0:
break
color = random.randint(0, 8)
temp_lesson['color'] = color
lesson_set[l['Content'][0]['Name']] = color
color_used -= 1
color_set[color] = 1
temp_Time = temp_lesson['Time']
temp_lesson['Time'] = temp_Time[0:int(
temp_Time.find('') + 1)]
if '单周' in temp_Time:
mod = 'single'
# temp_Time = temp_Time[0:len(temp_Time)-5]
elif '双周' in temp_Time:
mod = 'double'
# temp_Time = temp_Time[0:len(temp_Time)-5]
else:
mod = 'all'
# temp_Time = temp_Time[0:-1]
zhou_pos = temp_Time.find('')
temp_Time = temp_Time[0:zhou_pos]
temp_Time = temp_Time.split(',')
index = 0
for n in temp_Time:
temp_Time[index] = n.split('-')
index += 1
index = 0
for n in temp_Time:
if len(n) > 1:
for o in range(int(n[0]), int(n[1]) + 1):
if (o % 2 == 0 and mod == 'double') or (o % 2 == 1 and mod == 'single') or (mod == 'all'):
days_per_week[o] = max(
days_per_week[o], i+1)
Time[o] = 1
else:
Time[o] = 0
else:
days_per_week[int(n[0])] = max(
days_per_week[int(n[0])], i+1)
Time[int(n[0])] = 1
index += 1
temp_lesson['Time_split'] = Time
lesson[i][j*2+k].append(temp_lesson)
self.__schedule_data = {'lesson': lesson,
'days_per_week': days_per_week, 'cur_week': CurWeek}
return ('ok', 200)
# 获取信息 -----------------------------------------------------------------------------
def getData(self):
return (
{
'student_id': self.__student_id,
'student_name': self.__student_name,
'grade': self.__grade_data,
'schedule': self.__schedule_data
},
200
)