Bootstrap

强智教务系统模拟登陆经验【附源码】

强智教务系统模拟登陆经验

1:为什么要爬虫?

2013年的时候,校园内的一些校园公众号很受欢迎,其提供了一些很便捷的功能,如便捷的查询成绩和课表等一些相关的信息,学生一次登录实现从教务系统中爬去数据,解析展示。大家都知道大学的教务系统一般都很丑,而且适用移动端的效果非常差,导致了各种的不方便。那个时候也对这块非常的感兴趣,不过折腾来折腾去一直没有办法自己写出了。技术菜了安静。然而未完成的事,总是让人印象深刻。大概是国庆前的一个周末,又心血来潮,打算折腾一下。先是各种的查看博客。没有动手之前始终是一知半解的,网上总结的大致的流程,先是模拟登陆,然后是解析数据。反正也不知道怎么样写。各种demo下载下来去跑。然后并没有什么卵用。因为每个教务系统都是不一样的。

2:用到了那些技术点?

主要是用Httpclient模拟登陆,Jousp解析HTML。其他的是SSM+maven的个架子,前端登陆的用了jQuery WEUI(挺漂亮),bootrap的table显示成绩数据。

3:过程中遇到什么问题?

知易行难。整个大致完成模拟登陆和解析显示成绩,大概花了一个周末和一个国庆。过程中遇到了最大的问题就是cookie的处理。我明白是把验证码的cookie放到登陆的请求的cookie中。然后中途虽然都是那样做的,但是一直登陆不了。坑爹的是,登陆失败解析的根本不知道是什么地方错了。这个也是纠结了很久。后来又尝试让验证码的请求和登陆的请求在一个连接池内,任然是失败的。可能还是没有写对。

4:目前已经实现的

目前已经实现数据学号和密码,登陆教务系统,解析数据。实现了绕过教务系统的验证码,这个很值得一提。纠结验证码这块还是挺久的,某天问一同事,有没有可能破解验证码。了解到原来验证码有两种生成方式。一种是后端生成图片直接返回,还有一种是后端生成验证码字符,前端生成为图片。刚好教务系统就是第二种方式实现的。这个就有意思了,调试一下发现果然验证码在reponse中。这就简单了,解析reponse的header,代码中提交验证码。前台就不用输入验证码了。这给后面的拓展解决了很大的问题。目前是在login页面输入能返回数据,然而刷新页面又得登陆。这个坑过一段时间补上。


5:模拟登陆代码解析

说明:每个学校教务系统都不同,完全照搬可能没有用。但是原理都差不多。有验证码的教务,必须保证验证码的请求的cookie和登陆的请求的cookie一致,否则会登陆失败。没有验证码的就不用了。代码源片段也是在CSDN上找的。改了很多才能适应我要爬虫的教务系统。

package com.chenlipeng.utils;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicHeader;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import net.sf.json.JSONArray;


/**
* @parameter 
* @author    ChenLiPeng
* @date      2017年10月1日
 */
public class NhkyLogin2 {
	
	private static String urlcode;
	private static String ASP_NET_SessionId;
	
	public static Map<String, Object> getBaseValue() {
		
		Map<String, Object> map = new HashMap<String, Object>();
		CloseableHttpClient hc = HttpClientUtils.getHttpClient();
		HttpPost hg = null;
		HttpPost hp = new HttpPost(HttpClientUtils.BASE_URL);    
		HttpResponse responseGet = null;
		HttpResponse responsePost = null;
		try {
			HttpContext httpContext = new BasicHttpContext();
			RequestConfig requestConfig1 = RequestConfig.custom().setSocketTimeout(5000).setConnectTimeout(5000).build();
			hp.setConfig(requestConfig1);
			responsePost = hc.execute(hp,httpContext);
			HttpUriRequest realRequest = (HttpUriRequest)httpContext.getAttribute("http.request");
			urlcode=realRequest.getURI().toString();
			urlcode = urlcode.substring(urlcode.indexOf("("), (urlcode.indexOf(")")+2));
			hp.releaseConnection(); 
			if("HTTP/1.1 200 OK".equals(responsePost.getStatusLine().toString())){
				System.out.println("GET请求===>"+HttpClientUtils.BASE_URL);
				
				map.put("urlcode", urlcode);
				hg = new HttpPost(HttpClientUtils.BASE_URL+urlcode+"/login.aspx");
				RequestConfig requestConfig = RequestConfig.custom().setSocketTimeout(200000).setConnectTimeout(200000).build();
				hg.setConfig(requestConfig);
				responseGet = hc.execute(hg);
			
				if ("HTTP/1.1 200 OK".equals(responseGet.getStatusLine().toString())) {  
					System.out.println("GET请求===>"+HttpClientUtils.BASE_URL+urlcode+"/login.aspx");
					HttpEntity entity = responseGet.getEntity();
					if (entity != null) {
						String result = EntityUtils.toString(entity, "utf-8");
						Document doc = Jsoup.parse(result);
						map.put("viewstate", doc.select("input[name=__VIEWSTATE]").val());
						map.put("__EVENTVALIDATION", doc.select("input[name=__EVENTVALIDATION]").val());
						map.put("cobRole_VI", doc.select("input[name=cobRole_VI]").val());
						map.put("cobRole", doc.select("input[name=cobRole]").val());
						map.put("cobRole_DDDWS", doc.select("input[name=cobRole_DDDWS]").val());
						map.put("cobRole$DDD$L", doc.select("input[name=cobRole$DDD$L]").val());
						map.put("gridNew$CallbackState", doc.select("input[name=gridNew$CallbackState]").val());
					}
				}
			}
		} catch (ClientProtocolException e) {
			e.printStackTrace();
		} catch (IOException e) {
			if ((e.getClass().getName()).equals("org.apache.http.conn.HttpHostConnectException") || 
					(e.getClass().getName()).equals("org.apache.http.conn.ConnectTimeoutException")) {
				map.put("exception", "教务系统挂了");
			}
			System.out.println(e.getClass().getName());
			e.printStackTrace();
			return map;
		}finally{
			if(hg != null){
				hg.abort();
			}
			if(hp != null){
				hp.abort();
			}
		}
		return map;
	}
	
	
	public static Map<String, Object> isLogin(Map<String, Object> map) {
		
		
		CookieStroeUtil CookieStroeUtil=(CookieStroeUtil) map.get("Cookie");
		CloseableHttpClient hc = HttpClients.custom().setDefaultCookieStore(CookieStroeUtil.getCookieStore()).build();
		UserInfoBean userInfoBean=new UserInfoBean();
		HttpPost hp = new HttpPost(HttpClientUtils.BASE_URL+urlcode+"/login.aspx"); 
		HttpResponse responsePost = null;
		 ArrayList<NameValuePair> par = new ArrayList<NameValuePair>();
		 	par.add(new BasicNameValuePair("__LASTFOCUS", ""));  
		 	par.add(new BasicNameValuePair("__EVENTTARGET", ""));  
		 	par.add(new BasicNameValuePair("__EVENTARGUMENT", ""));  
		 	par.add(new BasicNameValuePair("__EVENTTARGET", ""));  
		 	par.add(new BasicNameValuePair("__VIEWSTATE", (String) map.get("viewstate")));  
	        par.add(new BasicNameValuePair("__VIEWSTATEGENERATOR", "C2EE9ABB"));  
	        par.add(new BasicNameValuePair("__EVENTVALIDATION", (String) map.get("__EVENTVALIDATION")));
	        par.add(new BasicNameValuePair("cobRole_VI", (String) map.get("cobRole_VI")));
	        par.add(new BasicNameValuePair("cobRole", "学生"));  
	        par.add(new BasicNameValuePair("cobRole_DDDWS", (String) map.get("cobRole_DDDWS")));
	        par.add(new BasicNameValuePair("cobRole_DDD_LDeletedItems", ""));
	        par.add(new BasicNameValuePair("cobRole_DDD_LInsertedItems", ""));
	        par.add(new BasicNameValuePair("cobRole_DDD_LCustomCallback", ""));
	        par.add(new BasicNameValuePair("cobRole$DDD$L", (String) map.get("cobRole$DDD$L")));
	        par.add(new BasicNameValuePair("cobRole$DDD$L$CVS", ""));
	        par.add(new BasicNameValuePair("cobRole$CVS", ""));
	        par.add(new BasicNameValuePair("User_ID", (String) map.get("username")));    // 学号
	        par.add(new BasicNameValuePair("User_Pass", (String) map.get("password")));  // 密码
	        par.add(new BasicNameValuePair("txtVolidate", (String) map.get("txtVolidate"))); // 验证码
	        par.add(new BasicNameValuePair("Button1", ""));  
	        par.add(new BasicNameValuePair("gridNew$DXSelInput", ""));  
	        par.add(new BasicNameValuePair("gridNew$CallbackState", (String) map.get("gridNew$CallbackState")));
		try {
			hp.setEntity(new UrlEncodedFormEntity(par));
			hp.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
			hp.addHeader("Accept-Encoding", "gzip, deflate");
			hp.addHeader("Accept-Language", "zh-CN,zh;q=0.8");
			hp.addHeader("Cache-Control", "max-age=0");
			hp.addHeader("Connection", "keep-alive");
			hp.addHeader("Host", "jwgl.nchu.edu.cn");
			hp.addHeader("Referer", "http://jwgl.nchu.edu.cn/"+urlcode+"/login.aspx");
			hp.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36");
			hp.addHeader("Content-Type", "application/x-www-form-urlencoded"); 
			hp.addHeader(new BasicHeader("Cookie","TZ=0;"));
			hp.addHeader("Origin", "http://jwgl.nchu.edu.cn");
			responsePost = hc.execute(hp);
			
			
            if(responsePost.getStatusLine().getStatusCode() == 302) {
            	Header lheader = responsePost.getFirstHeader("Location");
				String result = lheader.getValue();
				if (result.indexOf("ischool.aspx")!=-1) { 
					Header[] headers = responsePost.getAllHeaders();
					for (Header header : headers) {
						if ((header.getName()).equals("Set-Cookie")) {
							String aString=header.getValue();
							ASP_NET_SessionId=aString.substring(aString.indexOf("=")+1, aString.indexOf(";"));
							
						}
					}
				}
            }
			
			System.out.println("responsePost:  "+responsePost.toString());
			Header locationHeader = responsePost.getFirstHeader("Location");
			if (locationHeader != null && "HTTP/1.1 302 Found".equals(responsePost.getStatusLine().toString())) {
				String login_success = locationHeader.getValue();
				HttpGet httpget = new HttpGet(HttpClientUtils.BASE_URL+login_success);
				HttpResponse re2 = hc.execute(httpget);
				Document doc = Jsoup.parse(EntityUtils.toString(re2.getEntity(), "utf-8"));
				Elements uname=doc.getElementsByClass("uname");
				Elements users=doc.getElementsByClass("keyvalue");
				
				System.out.println();
				System.out.println();
				System.out.println();
				System.out.println();
				userInfoBean.setUserName(uname.get(0).text().toString());
				userInfoBean.setMajor(users.get(2).text().toString());
				userInfoBean.setUserNO(users.get(0).text().toString());
				userInfoBean.setClassName(users.get(1).text().toString());
			}
			List<SocreResultBean> resultScore=getScore();
			Map<String, Object> mapResult=new HashMap<>();
			mapResult.put("userInfo", userInfoBean);
			mapResult.put("userScore", resultScore);
			return mapResult;
		} catch (ClientProtocolException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}finally{
			if(hp != null){
				hp.abort();
			}
		}
		return null;
	}
	
	
	private static List<SocreResultBean> getScore() {
		
		JSONArray jsonArray = null;
		ArrayList<SocreResultBean> scoreList2=new ArrayList<SocreResultBean>();
		try {
			String url=HttpClientUtils.BASE_URL+urlcode+"/Grade/SStudentGradeSelect.aspx";
			Map<String, String> cookie=new HashMap<String, String>();
			cookie.put("ASP.NET_SessionId", ASP_NET_SessionId);
			Element doc2 = Jsoup.connect(url).cookies(cookie).get();
			Element table = doc2.getElementById("grid_DXMainTable");
			table.getElementById("grid_DXHeadersRow").remove();  // 移除标题的tr
			int trCount=table.getElementsByTag("tr").toArray().length;
			for (int i = 0; i < trCount; i++) {
				Element content3 = doc2.getElementById("grid_DXDataRow"+i);
				Elements count = content3.getElementsByTag("td");  // 如果count=17,说明是2013-2014学年.如果count=15,说明是201301
					if (count.toArray().length==17) {//一个学年
						 SocreResultBean socre=new SocreResultBean();
						 socre.setYears(count.get(2).text());
						 socre.setCourse(count.get(5).text());
						 socre.setCredit(count.get(10).text());
						 socre.setScore(count.get(12).text());
						 socre.setState(count.get(16).text());
						 scoreList2.add(socre);
					}
					if(count.toArray().length==15) {  // 一个学期
						SocreResultBean socre=new SocreResultBean();
						 socre.setYears(count.get(0).text());
						 socre.setCourse(count.get(3).text());
						 socre.setCredit(count.get(8).text());
						 socre.setScore(count.get(10).text());
						 socre.setState(count.get(14).text());
						 scoreList2.add(socre);
					}
					if(count.toArray().length==13) {
						 SocreResultBean socre=new SocreResultBean();
						 socre.setCourse(count.get(1).text());
						 socre.setCredit(count.get(6).text());
						 socre.setScore(count.get(8).text());
						 socre.setState(count.get(12).text());
						 scoreList2.add(socre);
					}
			}
			jsonArray = JSONArray.fromObject(scoreList2);
			System.out.println(jsonArray.toString());
		} catch (Exception e) {
			e.printStackTrace();
		}
		return scoreList2;
		
	}
}

项目源码:https://gitee.com/chenlipeng8/QiangZhiJiaoWuPaChong



6:后续计划完善

如有不对之处欢迎email([email protected])指正大笑大笑

;