You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
163 lines
6.2 KiB
163 lines
6.2 KiB
1 year ago
|
using System;
|
||
|
using System.Collections.Generic;
|
||
|
using System.IO;
|
||
|
using System.Linq;
|
||
|
using System.Net;
|
||
|
using System.Text;
|
||
|
using System.Text.RegularExpressions;
|
||
|
using System.Threading.Tasks;
|
||
|
|
||
|
namespace DevicesService.Commen
|
||
|
{
|
||
|
public class HtmlHelper
|
||
|
{
|
||
|
/// <summary>
|
||
|
/// 获取字符中指定标签的值
|
||
|
/// </summary>
|
||
|
/// <param name="str">字符串</param>
|
||
|
/// <param name="title">标签</param>
|
||
|
/// <returns>值</returns>
|
||
|
public static string GetTitleContent(string str, string title)
|
||
|
{
|
||
|
string tmpStr = string.Format("<{0}[^>]*?>(?<Text>[^<]*)</{1}>", title, title); //获取<title>之间内容
|
||
|
Match TitleMatch = Regex.Match(str, tmpStr, RegexOptions.Multiline);
|
||
|
string result = TitleMatch.Groups["Text"].Value;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
/// <summary>
|
||
|
/// 获取字符中指定标签的值
|
||
|
/// </summary>
|
||
|
/// <param name="str">字符串</param>
|
||
|
/// <param name="title">标签</param>
|
||
|
/// <param name="attrib">属性名</param>
|
||
|
/// <returns>属性</returns>
|
||
|
public static string GetTitleContent(string str, string title, string attrib)
|
||
|
{
|
||
|
string tmpStr = string.Format("<{0}[^>]*?{1}=(['\"\"]?)(?<url>[^'\"\"\\s>]+)\\1[^>]*>", title, attrib); //获取<title>之间内容
|
||
|
Match TitleMatch = Regex.Match(str, tmpStr, RegexOptions.IgnoreCase);
|
||
|
string result = TitleMatch.Groups["url"].Value;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
|
||
|
/// <summary>
|
||
|
/// 格式化页面代码
|
||
|
/// </summary>
|
||
|
/// <param name="html"></param>
|
||
|
/// <returns></returns>
|
||
|
public static string ReplaceEmpty(string html)
|
||
|
{
|
||
|
html = Regex.Replace(html, "^\\s*", string.Empty, RegexOptions.Compiled | RegexOptions.Multiline);//过滤空格
|
||
|
html = Regex.Replace(html, "\\r\\n", string.Empty, RegexOptions.Compiled | RegexOptions.Multiline);//过滤换行
|
||
|
return html;
|
||
|
}
|
||
|
|
||
|
#region 抓取Html 页面内容
|
||
|
/// <summary>
|
||
|
/// 抓取Html 页面内容
|
||
|
/// </summary>
|
||
|
/// <returns></returns>
|
||
|
public static string GetHtmlContent(string url)
|
||
|
{
|
||
|
if (string.IsNullOrEmpty(url))
|
||
|
{
|
||
|
return "";
|
||
|
}
|
||
|
try
|
||
|
{
|
||
|
//创建一个请求
|
||
|
HttpWebRequest httprequst = (HttpWebRequest)WebRequest.Create(url);
|
||
|
//不建立持久性链接
|
||
|
httprequst.KeepAlive = true;
|
||
|
//设置请求的方法
|
||
|
httprequst.Method = "GET";
|
||
|
//设置标头值
|
||
|
httprequst.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705";
|
||
|
httprequst.Accept = "*/*";
|
||
|
httprequst.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
|
||
|
httprequst.ServicePoint.Expect100Continue = false;
|
||
|
httprequst.Timeout = 5000;
|
||
|
httprequst.AllowAutoRedirect = true;//是否允许302
|
||
|
ServicePointManager.DefaultConnectionLimit = 30;
|
||
|
//获取响应
|
||
|
HttpWebResponse webRes = (HttpWebResponse)httprequst.GetResponse();
|
||
|
//获取响应的文本流
|
||
|
string content = string.Empty;
|
||
|
using (System.IO.Stream stream = webRes.GetResponseStream())
|
||
|
{
|
||
|
using (System.IO.StreamReader reader = new StreamReader(stream, System.Text.Encoding.GetEncoding("gbk")))
|
||
|
{
|
||
|
content = reader.ReadToEnd();
|
||
|
}
|
||
|
}
|
||
|
//取消请求
|
||
|
httprequst.Abort();
|
||
|
//返回数据内容
|
||
|
return content;
|
||
|
}
|
||
|
catch (Exception)
|
||
|
{
|
||
|
|
||
|
return "";
|
||
|
}
|
||
|
}
|
||
|
#endregion
|
||
|
|
||
|
|
||
|
#region 抓取Html 页面内容
|
||
|
///<summary>
|
||
|
///采用https协议访问网络
|
||
|
///</summary>
|
||
|
///<param name="URL">url地址</param>
|
||
|
///<param name="strPostdata">发送的数据</param>
|
||
|
///<returns></returns>
|
||
|
public static string PostHtmlContent(string URL, string strPostdata, string strEncoding = "gbk")
|
||
|
{
|
||
|
if (string.IsNullOrEmpty(URL))
|
||
|
{
|
||
|
return "";
|
||
|
}
|
||
|
try
|
||
|
{
|
||
|
Encoding encoding = Encoding.Default;
|
||
|
HttpWebRequest httprequst = (HttpWebRequest)WebRequest.Create(URL);
|
||
|
httprequst.Method = "post";
|
||
|
httprequst.Accept = "text/html, application/xhtml+xml, */*";
|
||
|
httprequst.ContentType = "application/x-www-form-urlencoded";
|
||
|
//设置标头值
|
||
|
httprequst.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705";
|
||
|
httprequst.Accept = "*/*";
|
||
|
httprequst.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5");
|
||
|
httprequst.ServicePoint.Expect100Continue = false;
|
||
|
httprequst.Timeout = 5000;
|
||
|
httprequst.AllowAutoRedirect = true;//是否允许302
|
||
|
ServicePointManager.DefaultConnectionLimit = 30;
|
||
|
|
||
|
byte[] buffer = encoding.GetBytes(strPostdata);
|
||
|
httprequst.ContentLength = buffer.Length;
|
||
|
httprequst.GetRequestStream().Write(buffer, 0, buffer.Length);
|
||
|
HttpWebResponse response = (HttpWebResponse)httprequst.GetResponse();
|
||
|
//获取响应的文本流
|
||
|
string content = string.Empty;
|
||
|
using (StreamReader reader = new StreamReader(response.GetResponseStream(), System.Text.Encoding.GetEncoding(strEncoding)))
|
||
|
{
|
||
|
content = reader.ReadToEnd();
|
||
|
}
|
||
|
//取消请求
|
||
|
httprequst.Abort();
|
||
|
//返回数据内容
|
||
|
return content;
|
||
|
}
|
||
|
catch (Exception)
|
||
|
{
|
||
|
|
||
|
return "";
|
||
|
}
|
||
|
}
|
||
|
#endregion
|
||
|
}
|
||
|
|
||
|
|
||
|
}
|