[转载]C# 微信 生活助手 空气质量 天气预报等 效果展示 数据抓取 (一) – 开心生活助手 – 博客园.
第一次在博客园写博客写的不好,大家见谅。最近工作辞了,然后感冒发烧输了一个星期的液,感觉很烦躁,心情不是很好,在帝都感觉压力大,废话不说了开始正题把!
还没有完全完成,后续考虑开源!
可以关注微信公众帐号体验一下先看下 效果把
先介绍下工具 我用的有 httpwatch,fiddler
国家环保部的数据链接 http://datacenter.mep.gov.cn/report/air_daily/airDairyCityHour.jsp
原以为直接get请求就可以了 试了下 发现没有获取了 然后看了下代码 发现
< script type = "text/javascript" >// <![CDATA[ function submitForm(){ document.citydayform.submit(); } // ]]> </ script > < form style = "margin: 0px auto 0px auto;" action = "http://datacenter.mep.gov.cn:80/report/air_daily/airDairyCityHour.jsp" method = "post" name = "citydayform" > < table class = "font" border = "1" width = "95%" cellspacing = "0" cellpadding = "0" align = "center" > < tbody > < tr > < td class = "STYLE1" align = "right" bgcolor = "#FFFFEF" width = "5%" height = "30" >城市:</ td > < td bgcolor = "#FFFFFF" width = "8%" > < input name = "city" type = "text" value = "" /></ td > < td class = "STYLE1 STYLE1" align = "right" bgcolor = "#FFFFEF" width = "5%" height = "30" >时间:</ td > < td bgcolor = "#FFFFFF" width = "30%" > 从: < input id = "startdate" class = "Wdate" style = "text-align: center; cursor: pointer; width: 160px; height: 21px; margin-top: 0px;" name = "startdate" readonly = "readonly" type = "text" value = "2015-04-16 22:00" /> 到: < input id = "enddate" class = "Wdate" style = "text-align: center; cursor: pointer; width: 160px; height: 21px; margin-top: 0px;" name = "enddate" readonly = "readonly" type = "text" value = "2015-04-16 22:00" /></ td > < td bgcolor = "#FFFFFF" width = "8%" > < div align = "center" >< input height = "19" src = "/report/main/images/search.gif" type = "image" width = "83" /></ div ></ td > </ tr > </ tbody > </ table > </ form > |
模型
复制代码 public partial class AirQuality { public int Id { get; set; } public System.DateTime Time { get; set; } public int AQI { get; set; } public int CityCode { get; set; } public string Level { get; set; } public string PrimaryPollutant { get; set; } } |
抓取代码,通过正则匹配数据
复制代码 while (true) { string param = string.Format("city=&startdate={0}&enddate={1}&page={2}", lastGrabHBTime.AddHours(1).ToString("yyyy-MM-dd HH:mm"), currentTime.ToString("yyyy-MM-dd HH:mm"), pageIndex); #region 数据抓取 while (true) { try { res = HttpHelper.PostRequest(Constant.HBUrl, param, Encoding.GetEncoding("GB2312")); break; } catch (Exception) { Thread.Sleep(1000); } } #endregion res = Regex.Replace(res, "[\r\n|\t]", "").Replace(" ", " "); var matches = regex.Matches(res); if (matches.Count == 0) break; foreach (Match match in matches) { var group = match.Groups; if (group.Count == 10) { var air = new AirQuality() { Time = DateTime.Parse(group[3].Value), AQI = int.Parse(group[5].Value), Level = group[7].Value, PrimaryPollutant = group[9].Value, CityCode = cityConfigs.FirstOrDefault(p => p.City == group[1].Value).Code }; airQualities.Add(air); } } pageIndex++; Thread.Sleep(100); } |
POST请求代码
public static string PostRequest(string url, string param, string cookie = "") { return PostRequest(url, param, Encoding.UTF8, cookie); } public static string PostRequest(string url, string param, Encoding encoding, string cookie = "") { HttpWebRequest request = (HttpWebRequest)WebRequest.Create(new Uri(url)); request.ContentType = "application/x-www-form-urlencoded"; request.UserAgent = "Mozilla/5.0 (MSIE 9.0; Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko"; request.Method = "post"; request.Timeout = 30000; request.KeepAlive = false; if (!string.IsNullOrEmpty(cookie)) { request.Headers[HttpRequestHeader.Cookie] = cookie; } byte[] bytes = encoding.GetBytes(param); request.ContentLength = bytes.Length; using (Stream stream = request.GetRequestStream()) { stream.Write(bytes, 0, bytes.Length); } HttpWebResponse response = (HttpWebResponse)request.GetResponse(); using (Stream stream = response.GetResponseStream()) { StreamReader reader = new StreamReader(stream, encoding); return reader.ReadToEnd(); } } |