From 7ec91c5073fc31684be4994d6968a92be07d3082 Mon Sep 17 00:00:00 2001 From: RainSun Date: Fri, 9 Oct 2020 16:09:02 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E8=B4=A6=E6=88=B7?= =?UTF-8?q?=E9=94=99=E8=AF=AF=E5=88=A4=E6=96=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/crawler.py | 29 ++++++++++++----------------- lib/crawler_test.py | 15 ++++++++------- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/lib/crawler.py b/lib/crawler.py index 72f7cd3..6a80965 100644 --- a/lib/crawler.py +++ b/lib/crawler.py @@ -13,7 +13,6 @@ class Crawler(object): self.__password = password self.__phone = phone self.__session = None - self.__ip = None self.__student_id = None self.__student_name = None self.__grade_data = '' @@ -54,21 +53,19 @@ class Crawler(object): r = self.__session.get( url='http://portal-cust-edu-cn-s.webvpn.cust.edu.cn:8118/custp/index') soup = BeautifulSoup(r.text, 'html.parser') - try: - self.__ip = soup.findAll(name='a')[7]['href'][7:].split("-") - except: + if soup.findAll(name='a')[-2]['href'] != 'logout': return ('账号或者密码错误', 510) - r = self.__session.get(url='http://mysso-cust-edu-cn-s.webvpn.cust.edu.cn:8118/cas/login?service=http://' + - self.__ip[0] + '.' + self.__ip[1] + '.' + self.__ip[2] + '.' + self.__ip[3] + ':8080/welcome', allow_redirects=False) + r = self.__session.get( + url='http://mysso-cust-edu-cn-s.webvpn.cust.edu.cn:8118/cas/login?service=http://jwgls1.cust.edu.cn:8080/welcome', allow_redirects=False) ticket = r.headers['Location'][72:] - asp_net_sessionid_param = {'Ticket': ticket, 'Url': 'http://' + - self.__ip[0] + '.' + self.__ip[1] + '.' + self.__ip[2] + '.' + self.__ip[3] + ':8080/welcome'} + asp_net_sessionid_param = { + 'Ticket': ticket, 'Url': 'http://jwgls1.cust.edu.cn:8080/welcome'} asp_net_sessionid_param = base64.b64encode( quote(json.dumps(asp_net_sessionid_param)).encode('utf-8')).decode('utf-8') asp_net_sessionid_param = {'param': asp_net_sessionid_param} headers = {'Content-Type': 'application/json'} - r = self.__session.post(url='http://' + self.__ip[0] + '-' + self.__ip[1] + '-' + self.__ip[2] + '-' + self.__ip[3] + - '-8080-p.webvpn.cust.edu.cn:8118/api/LoginApi/LGSSOLocalLogin?sf_request_type=ajax', data=json.dumps(asp_net_sessionid_param), headers=headers) + r = self.__session.post(url='http://jwgls1-cust-edu-cn-8080-p.webvpn.cust.edu.cn:8118/api/LoginApi/LGSSOLocalLogin?sf_request_type=ajax', + data=json.dumps(asp_net_sessionid_param), headers=headers) data = json.loads(r.content.decode('utf-8')) # 提示未建立教务信息 if data['state'] == 1: @@ -84,13 +81,13 @@ class Crawler(object): def getGrade(self): headers = {'Content-Type': 'application/json'} r = self.__session.post( - url='http://' + self.__ip[0] + '-' + self.__ip[1] + '-' + self.__ip[2] + '-' + self.__ip[3] + - '-8080-p.webvpn.cust.edu.cn:8118/api/ClientStudent/QueryService/GradeQueryApi/GetDataByStudent?sf_request_type=ajax', + url='http://jwgls1-cust-edu-cn-8080-p.webvpn.cust.edu.cn:8118/api/ClientStudent/QueryService/GradeQueryApi/GetDataByStudent?sf_request_type=ajax', data=json.dumps({"param": "JTdCJTIyU2hvd0dyYWRlVHlwZSUyMiUzQTElN0Q=", "__permission": {"MenuID": "4443798E-EB6E-4D88-BFBD-BB0A76FF6BD5", "Operation": 0}, "__log": {"MenuID": "4443798E-EB6E-4D88-BFBD-BB0A76FF6BD5", "Logtype": 6, "Context": "查询"}}), headers=headers ) data = json.loads(r.content.decode('utf-8')) + print(data) if data['state'] != 0: return ('教务挂了', 512) # 分解数据并重命名 @@ -237,16 +234,14 @@ class Crawler(object): def getSchedule(self): headers = {'Content-Type': 'application/json'} r = self.__session.post( - url='http://'+self.__ip[0]+'-'+self.__ip[1]+'-'+self.__ip[2]+'-'+self.__ip[3] + - '-8080-p.webvpn.cust.edu.cn:8118/api/ClientStudent/Home/StudentHomeApi/GetHomeCurWeekTime?sf_request_type=ajax', + url='http://jwgls1-cust-edu-cn-8080-p.webvpn.cust.edu.cn:8118/api/ClientStudent/Home/StudentHomeApi/GetHomeCurWeekTime?sf_request_type=ajax', data=json.dumps({"param": "JTdCJTdE", "__permission": {"MenuID": "F71C97D5-D3E2-4FDA-9209-D7FA8626390E", "Operation": 0}, "__log": {"MenuID": "F71C97D5-D3E2-4FDA-9209-D7FA8626390E", "Logtype": 6, "Context": "查询"}}), headers=headers ) CurWeek = json.loads(r.content.decode('utf-8'))['data']['CurWeek'] r = self.__session.post( - url='http://'+self.__ip[0]+'-'+self.__ip[1]+'-'+self.__ip[2]+'-'+self.__ip[3] + - '-8080-p.webvpn.cust.edu.cn:8118/api/ClientStudent/Home/StudentHomeApi/QueryStudentScheduleData?sf_request_type=ajax', + url='http://jwgls1-cust-edu-cn-8080-p.webvpn.cust.edu.cn:8118/api/ClientStudent/Home/StudentHomeApi/QueryStudentScheduleData?sf_request_type=ajax', data=json.dumps({"param": "JTdCJTdE", "__permission": {"MenuID": "F71C97D5-D3E2-4FDA-9209-D7FA8626390E", "Operation": 0}, "__log": {"MenuID": "F71C97D5-D3E2-4FDA-9209-D7FA8626390E", "Logtype": 6, "Context": "查询"}}), headers=headers @@ -336,5 +331,5 @@ class Crawler(object): 'grade': self.__grade_data, 'schedule': self.__schedule_data }, - 200 + 200 ) diff --git a/lib/crawler_test.py b/lib/crawler_test.py index d4ff8c0..3cbc6eb 100644 --- a/lib/crawler_test.py +++ b/lib/crawler_test.py @@ -9,17 +9,18 @@ class TestCrawler(unittest.TestCase): self.assertEqual(c.connection(), ('ok', 200)) # 测试获取成绩 - # def test_grade(self): - # self.assertEqual(c.getGrade(), ('ok', 200)) + def test_grade(self): + self.assertEqual(c.getGrade(), ('ok', 200)) # 测试获取课表 - # def test_schedule(self): - # self.assertEqual(c.getSchedule(), ('ok', 200)) + def test_schedule(self): + self.assertEqual(c.getSchedule(), ('ok', 200)) # 测试返回信息 - # def test_getData(self): - # get_res = c.getData() - # self.assertEqual(get_res['errcode'], '200') + def test_getData(self): + get_res = c.getData() + print(get_res) + self.assertEqual(get_res[-1], 200) if __name__ == '__main__': unittest.main() From 716c8c0f7199927166c644b7de4e00cd87dcb5c2 Mon Sep 17 00:00:00 2001 From: RainSun Date: Fri, 9 Oct 2020 16:14:50 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=AF=86=E7=A0=81?= =?UTF-8?q?=E9=94=99=E8=AF=AF=E6=98=BE=E7=A4=BA=E6=95=99=E5=8A=A1=E6=8C=82?= =?UTF-8?q?=E4=BA=86=E7=9A=84=E9=97=AE=E9=A2=98=20&=20=E5=88=A0=E9=99=A4pr?= =?UTF-8?q?int?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/crawler.py | 7 ++++--- lib/crawler_test.py | 1 - 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/crawler.py b/lib/crawler.py index 6a80965..24b6f3e 100644 --- a/lib/crawler.py +++ b/lib/crawler.py @@ -53,8 +53,10 @@ class Crawler(object): r = self.__session.get( url='http://portal-cust-edu-cn-s.webvpn.cust.edu.cn:8118/custp/index') soup = BeautifulSoup(r.text, 'html.parser') - if soup.findAll(name='a')[-2]['href'] != 'logout': - return ('账号或者密码错误', 510) + try: + soup.findAll(name='a')[-2]['href'] + except Exception as e: + return ('账号或者密码错误', 510) r = self.__session.get( url='http://mysso-cust-edu-cn-s.webvpn.cust.edu.cn:8118/cas/login?service=http://jwgls1.cust.edu.cn:8080/welcome', allow_redirects=False) ticket = r.headers['Location'][72:] @@ -87,7 +89,6 @@ class Crawler(object): headers=headers ) data = json.loads(r.content.decode('utf-8')) - print(data) if data['state'] != 0: return ('教务挂了', 512) # 分解数据并重命名 diff --git a/lib/crawler_test.py b/lib/crawler_test.py index 3cbc6eb..428b718 100644 --- a/lib/crawler_test.py +++ b/lib/crawler_test.py @@ -19,7 +19,6 @@ class TestCrawler(unittest.TestCase): # 测试返回信息 def test_getData(self): get_res = c.getData() - print(get_res) self.assertEqual(get_res[-1], 200) if __name__ == '__main__':