using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Threading.Tasks; namespace DevicesService.Commen { public class HtmlHelper { /// /// 获取字符中指定标签的值 /// /// 字符串 /// 标签 /// public static string GetTitleContent(string str, string title) { string tmpStr = string.Format("<{0}[^>]*?>(?[^<]*)", title, title); //获取之间内容 Match TitleMatch = Regex.Match(str, tmpStr, RegexOptions.Multiline); string result = TitleMatch.Groups["Text"].Value; return result; } /// <summary> /// 获取字符中指定标签的值 /// </summary> /// <param name="str">字符串</param> /// <param name="title">标签</param> /// <param name="attrib">属性名</param> /// <returns>属性</returns> public static string GetTitleContent(string str, string title, string attrib) { string tmpStr = string.Format("<{0}[^>]*?{1}=(['\"\"]?)(?<url>[^'\"\"\\s>]+)\\1[^>]*>", title, attrib); //获取<title>之间内容 Match TitleMatch = Regex.Match(str, tmpStr, RegexOptions.IgnoreCase); string result = TitleMatch.Groups["url"].Value; return result; } /// <summary> /// 格式化页面代码 /// </summary> /// <param name="html"></param> /// <returns></returns> public static string ReplaceEmpty(string html) { html = Regex.Replace(html, "^\\s*", string.Empty, RegexOptions.Compiled | RegexOptions.Multiline);//过滤空格 html = Regex.Replace(html, "\\r\\n", string.Empty, RegexOptions.Compiled | RegexOptions.Multiline);//过滤换行 return html; } #region 抓取Html 页面内容 /// <summary> /// 抓取Html 页面内容 /// </summary> /// <returns></returns> public static string GetHtmlContent(string url) { if (string.IsNullOrEmpty(url)) { return ""; } try { //创建一个请求 HttpWebRequest httprequst = (HttpWebRequest)WebRequest.Create(url); //不建立持久性链接 httprequst.KeepAlive = true; //设置请求的方法 httprequst.Method = "GET"; //设置标头值 httprequst.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705"; httprequst.Accept = "*/*"; httprequst.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5"); httprequst.ServicePoint.Expect100Continue = false; httprequst.Timeout = 5000; httprequst.AllowAutoRedirect = true;//是否允许302 ServicePointManager.DefaultConnectionLimit = 30; //获取响应 HttpWebResponse webRes = (HttpWebResponse)httprequst.GetResponse(); //获取响应的文本流 string content = string.Empty; using (System.IO.Stream stream = webRes.GetResponseStream()) { using (System.IO.StreamReader reader = new StreamReader(stream, System.Text.Encoding.GetEncoding("gbk"))) { content = reader.ReadToEnd(); } } //取消请求 httprequst.Abort(); //返回数据内容 return content; } catch (Exception) { return ""; } } #endregion #region 抓取Html 页面内容 ///<summary> ///采用https协议访问网络 ///</summary> ///<param name="URL">url地址</param> ///<param name="strPostdata">发送的数据</param> ///<returns></returns> public static string PostHtmlContent(string URL, string strPostdata, string strEncoding = "gbk") { if (string.IsNullOrEmpty(URL)) { return ""; } try { Encoding encoding = Encoding.Default; HttpWebRequest httprequst = (HttpWebRequest)WebRequest.Create(URL); httprequst.Method = "post"; httprequst.Accept = "text/html, application/xhtml+xml, */*"; httprequst.ContentType = "application/x-www-form-urlencoded"; //设置标头值 httprequst.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705"; httprequst.Accept = "*/*"; httprequst.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5"); httprequst.ServicePoint.Expect100Continue = false; httprequst.Timeout = 5000; httprequst.AllowAutoRedirect = true;//是否允许302 ServicePointManager.DefaultConnectionLimit = 30; byte[] buffer = encoding.GetBytes(strPostdata); httprequst.ContentLength = buffer.Length; httprequst.GetRequestStream().Write(buffer, 0, buffer.Length); HttpWebResponse response = (HttpWebResponse)httprequst.GetResponse(); //获取响应的文本流 string content = string.Empty; using (StreamReader reader = new StreamReader(response.GetResponseStream(), System.Text.Encoding.GetEncoding(strEncoding))) { content = reader.ReadToEnd(); } //取消请求 httprequst.Abort(); //返回数据内容 return content; } catch (Exception) { return ""; } } #endregion } }