You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
162 lines
6.2 KiB
162 lines
6.2 KiB
using System; |
|
using System.Collections.Generic; |
|
using System.IO; |
|
using System.Linq; |
|
using System.Net; |
|
using System.Text; |
|
using System.Text.RegularExpressions; |
|
using System.Threading.Tasks; |
|
|
|
namespace DevicesService.Commen |
|
{ |
|
public class HtmlHelper |
|
{ |
|
/// <summary> |
|
/// 获取字符中指定标签的值 |
|
/// </summary> |
|
/// <param name="str">字符串</param> |
|
/// <param name="title">标签</param> |
|
/// <returns>值</returns> |
|
public static string GetTitleContent(string str, string title) |
|
{ |
|
string tmpStr = string.Format("<{0}[^>]*?>(?<Text>[^<]*)</{1}>", title, title); //获取<title>之间内容 |
|
Match TitleMatch = Regex.Match(str, tmpStr, RegexOptions.Multiline); |
|
string result = TitleMatch.Groups["Text"].Value; |
|
return result; |
|
} |
|
|
|
/// <summary> |
|
/// 获取字符中指定标签的值 |
|
/// </summary> |
|
/// <param name="str">字符串</param> |
|
/// <param name="title">标签</param> |
|
/// <param name="attrib">属性名</param> |
|
/// <returns>属性</returns> |
|
public static string GetTitleContent(string str, string title, string attrib) |
|
{ |
|
string tmpStr = string.Format("<{0}[^>]*?{1}=(['\"\"]?)(?<url>[^'\"\"\\s>]+)\\1[^>]*>", title, attrib); //获取<title>之间内容 |
|
Match TitleMatch = Regex.Match(str, tmpStr, RegexOptions.IgnoreCase); |
|
string result = TitleMatch.Groups["url"].Value; |
|
return result; |
|
} |
|
|
|
|
|
/// <summary> |
|
/// 格式化页面代码 |
|
/// </summary> |
|
/// <param name="html"></param> |
|
/// <returns></returns> |
|
public static string ReplaceEmpty(string html) |
|
{ |
|
html = Regex.Replace(html, "^\\s*", string.Empty, RegexOptions.Compiled | RegexOptions.Multiline);//过滤空格 |
|
html = Regex.Replace(html, "\\r\\n", string.Empty, RegexOptions.Compiled | RegexOptions.Multiline);//过滤换行 |
|
return html; |
|
} |
|
|
|
#region 抓取Html 页面内容 |
|
/// <summary> |
|
/// 抓取Html 页面内容 |
|
/// </summary> |
|
/// <returns></returns> |
|
public static string GetHtmlContent(string url) |
|
{ |
|
if (string.IsNullOrEmpty(url)) |
|
{ |
|
return ""; |
|
} |
|
try |
|
{ |
|
//创建一个请求 |
|
HttpWebRequest httprequst = (HttpWebRequest)WebRequest.Create(url); |
|
//不建立持久性链接 |
|
httprequst.KeepAlive = true; |
|
//设置请求的方法 |
|
httprequst.Method = "GET"; |
|
//设置标头值 |
|
httprequst.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705"; |
|
httprequst.Accept = "*/*"; |
|
httprequst.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5"); |
|
httprequst.ServicePoint.Expect100Continue = false; |
|
httprequst.Timeout = 5000; |
|
httprequst.AllowAutoRedirect = true;//是否允许302 |
|
ServicePointManager.DefaultConnectionLimit = 30; |
|
//获取响应 |
|
HttpWebResponse webRes = (HttpWebResponse)httprequst.GetResponse(); |
|
//获取响应的文本流 |
|
string content = string.Empty; |
|
using (System.IO.Stream stream = webRes.GetResponseStream()) |
|
{ |
|
using (System.IO.StreamReader reader = new StreamReader(stream, System.Text.Encoding.GetEncoding("gbk"))) |
|
{ |
|
content = reader.ReadToEnd(); |
|
} |
|
} |
|
//取消请求 |
|
httprequst.Abort(); |
|
//返回数据内容 |
|
return content; |
|
} |
|
catch (Exception) |
|
{ |
|
|
|
return ""; |
|
} |
|
} |
|
#endregion |
|
|
|
|
|
#region 抓取Html 页面内容 |
|
///<summary> |
|
///采用https协议访问网络 |
|
///</summary> |
|
///<param name="URL">url地址</param> |
|
///<param name="strPostdata">发送的数据</param> |
|
///<returns></returns> |
|
public static string PostHtmlContent(string URL, string strPostdata, string strEncoding = "gbk") |
|
{ |
|
if (string.IsNullOrEmpty(URL)) |
|
{ |
|
return ""; |
|
} |
|
try |
|
{ |
|
Encoding encoding = Encoding.Default; |
|
HttpWebRequest httprequst = (HttpWebRequest)WebRequest.Create(URL); |
|
httprequst.Method = "post"; |
|
httprequst.Accept = "text/html, application/xhtml+xml, */*"; |
|
httprequst.ContentType = "application/x-www-form-urlencoded"; |
|
//设置标头值 |
|
httprequst.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705"; |
|
httprequst.Accept = "*/*"; |
|
httprequst.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5"); |
|
httprequst.ServicePoint.Expect100Continue = false; |
|
httprequst.Timeout = 5000; |
|
httprequst.AllowAutoRedirect = true;//是否允许302 |
|
ServicePointManager.DefaultConnectionLimit = 30; |
|
|
|
byte[] buffer = encoding.GetBytes(strPostdata); |
|
httprequst.ContentLength = buffer.Length; |
|
httprequst.GetRequestStream().Write(buffer, 0, buffer.Length); |
|
HttpWebResponse response = (HttpWebResponse)httprequst.GetResponse(); |
|
//获取响应的文本流 |
|
string content = string.Empty; |
|
using (StreamReader reader = new StreamReader(response.GetResponseStream(), System.Text.Encoding.GetEncoding(strEncoding))) |
|
{ |
|
content = reader.ReadToEnd(); |
|
} |
|
//取消请求 |
|
httprequst.Abort(); |
|
//返回数据内容 |
|
return content; |
|
} |
|
catch (Exception) |
|
{ |
|
|
|
return ""; |
|
} |
|
} |
|
#endregion |
|
} |
|
|
|
|
|
}
|
|
|