/// <summary> /// Html常用帮助类 /// </summary> public class HtmlHelper { #region 获取页面源代码 /// <summary> /// 获取网页源代码 /// </summary> /// <param name="url">URL路径</param> /// <param name="encoding">编码方式</param> /// <returns></returns> public static string GetHTML(string url, string encoding) { WebClient web = new WebClient(); byte[] buffer = web.DownloadData(url); return Encoding.GetEncoding(encoding).GetString(buffer); } /// <summary> /// WebClient读取源代码 /// </summary> /// <param name="url">URL路径</param> /// <param name="encoding">编码方式</param> /// <returns></returns> public static string GetWebClient(string url, string encoding) { string strHTML = ""; WebClient myWebClient = new WebClient(); Stream myStream = myWebClient.OpenRead(url); StreamReader sr = new StreamReader(myStream, System.Text.Encoding.GetEncoding(encoding)); strHTML = sr.ReadToEnd(); myStream.Close(); return strHTML; } /// <summary> /// WebRequest读取源代码 /// </summary> /// <param name="url">URL路径</param> /// <param name="encoding">编码方式</param> /// <returns></returns> public static string GetWebRequest(string url, string encoding) { Uri uri = new Uri(url); WebRequest myReq = WebRequest.Create(uri); WebResponse result = myReq.GetResponse(); Stream receviceStream = result.GetResponseStream(); StreamReader readerOfStream = new StreamReader(receviceStream, System.Text.Encoding.GetEncoding(encoding)); string strHTML = readerOfStream.ReadToEnd(); readerOfStream.Close(); receviceStream.Close(); result.Close(); return strHTML; } /// <summary> /// HttpWebRequest读取源代码 /// </summary> /// <param name="url">URL路径</param> /// <param name="encoding">编码方式</param> /// <returns></returns> public static string GetHttpWebRequest(string url, string encoding) { Uri uri = new Uri(url); HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(uri); myReq.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705"; myReq.Accept = "*/*"; myReq.KeepAlive = true; myReq.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5"); HttpWebResponse result = (HttpWebResponse)myReq.GetResponse(); Stream receviceStream = result.GetResponseStream(); StreamReader readerOfStream = new StreamReader(receviceStream, System.Text.Encoding.GetEncoding(encoding)); string strHTML = readerOfStream.ReadToEnd(); readerOfStream.Close(); receviceStream.Close(); result.Close(); return strHTML; } /// <summary> /// 获取HTML源码信息(Porschev) /// </summary> /// <param name="url">获取地址</param> /// <returns>HTML源码</returns> public static string GetHtmlCode(string url) { string str = ""; try { Uri uri = new Uri(url); WebRequest wr = WebRequest.Create(uri); Stream s = wr.GetResponse().GetResponseStream(); StreamReader sr = new StreamReader(s, Encoding.Default); do { string strLine = ""; strLine = sr.ReadLine();// 读取一行字符并返回 str += strLine + "\r\n"; } while (!sr.EndOfStream); } catch (Exception e) { } return str; } #endregion #region 清除格式化html标记 ///<summary> ///清除 获取到的 html 源码里面的所有标记 ///</summary> ///<param name="Html">html 源码</param> ///<returns>已经去除后的字符串</returns> public static string RemoveHtml(string Html) { //删除脚本 Html = Regex.Replace(Html, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase); //删除HTML Regex regex = new Regex("<.+?>", RegexOptions.IgnoreCase); Html = regex.Replace(Html, ""); Html = Regex.Replace(Html, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase); Html = Regex.Replace(Html, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase); Html = Regex.Replace(Html, @"-->", "", RegexOptions.IgnoreCase); Html = Regex.Replace(Html, @"<!--.*", "", RegexOptions.IgnoreCase); Html = Regex.Replace(Html, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase); Html = Regex.Replace(Html, @"&(amp|#38);", "&", RegexOptions.IgnoreCase); Html = Regex.Replace(Html, @"&(lt|#60);", "<", RegexOptions.IgnoreCase); Html = Regex.Replace(Html, @"&(gt|#62);", ">", RegexOptions.IgnoreCase); Html = Regex.Replace(Html, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase); Html = Regex.Replace(Html, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase); Html = Regex.Replace(Html, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase); Html = Regex.Replace(Html, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase); Html = Regex.Replace(Html, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase); Html = Regex.Replace(Html, @"&#(\d+);", "", RegexOptions.IgnoreCase); Html.Replace("<", ""); Html.Replace(">", ""); Html.Replace("\r\n", ""); return Html; } /// <summary> /// 压缩获取到的 Html 字符串(删除换行字符串) /// </summary> /// <param name="Html">Html 源代码</param> /// <returns></returns> public static string ZipHtml(string Html) { Html = Regex.Replace(Html, @">\s+?<", "><");//去除HTML中的空白字符 Html = Regex.Replace(Html, @"\r\n\s*", ""); Html = Regex.Replace(Html, @"<body([\s|\S]*?)>([\s|\S]*?)</body>", @"<body$1>$2</body>", RegexOptions.IgnoreCase); return Html; } /// <summary> /// 格式化还原获取到的 Html 特殊符合代码(直接显示的html标记元素) /// </summary> /// <param name="Html">Html 源代码</param> /// <returns></returns> public static string FormatHtml(string Html) { Regex r; Match m; #region 处理空格 Html = Html.Replace(" ", " "); #endregion #region 处理单引号 Html = Html.Replace("'", "’"); #endregion #region 处理双引号 Html = Html.Replace("\"", """); #endregion #region html标记符 Html = Html.Replace("<", "<"); Html = Html.Replace(">", ">"); #endregion #region 处理换行 //处理换行,在每个新行的前面添加两个全角空格 r = new Regex(@"(\r\n(( )| )+)(?<正文>\S+)", RegexOptions.IgnoreCase); for (m = r.Match(Html); m.Success; m = m.NextMatch()) { Html = Html.Replace(m.Groups[0].ToString(), "<BR> " + m.Groups["正文"].ToString()); } //处理换行,在每个新行的前面添加两个全角空格 Html = Html.Replace("\r\n", "<BR>"); #endregion return Html; } /// <summary> /// 除去所有在html元素中标记 /// </summary> /// <param name="strhtml">Html 源代码</param> /// <returns></returns> public static string StripHtml(string strhtml) { string stroutput = strhtml; Regex regex = new Regex(@"<[^>]+>|</[^>]+>"); stroutput = regex.Replace(stroutput, ""); return stroutput; } #endregion #region 文本中字符的转换 /// <summary> /// 将文本格式转换为html代码 /// </summary> /// <param name="str">要格式化的字符串</param> /// <returns>格式化后的字符串</returns> public static String ToHtml(string str) { if (str == null || str.Equals("")) { return str; } StringBuilder sb = new StringBuilder(str); sb.Replace("&", "&"); sb.Replace("<", "<"); sb.Replace(">", ">"); sb.Replace("\r\n", "<br>"); sb.Replace("\n", "<br>"); sb.Replace("\t", " "); sb.Replace(" ", " "); return sb.ToString(); } /// <summary> /// 将HTML代码转化成文本格式 /// </summary> /// <param name="str">要格式化的字符串</param> /// <returns>格式化后的字符串</returns> public static String ToTxt(String str) { if (str == null || str.Equals("")) { return str; } StringBuilder sb = new StringBuilder(str); sb.Replace(" ", " "); sb.Replace("<br>", "\r\n"); sb.Replace("<", "<"); sb.Replace(">", ">"); sb.Replace("&", "&"); return sb.ToString(); } #endregion #region HTML特殊字符转换 /// <summary> /// 替换html中的特殊字符 /// </summary> /// <param name="theString">需要进行替换的文本。</param> /// <returns>替换完的文本。</returns> public static string HtmlEncode(string theString) { theString = theString.Replace(">", ">"); theString = theString.Replace("<", "<"); theString = theString.Replace(" ", " "); theString = theString.Replace("\"", """); theString = theString.Replace("'", "'"); theString = theString.Replace("\r\n", "<br/> "); return theString; } /// <summary> /// 恢复html中的特殊字符 /// </summary> /// <param name="theString">需要恢复的文本。</param> /// <returns>恢复好的文本。</returns> public static string HtmlDecode(string theString) { theString = theString.Replace(">", ">"); theString = theString.Replace("<", "<"); theString = theString.Replace(" ", " "); theString = theString.Replace(""", "\""); theString = theString.Replace("'", "'"); theString = theString.Replace("<br/> ", "\r\n"); theString = theString.Replace("—", "—");//2012-05-07新加的 return theString; } #endregion #region html中读取a标签的href值 /// <summary> /// 正则表达式获取html超链接及对应链接里面的内容 /// </summary> /// <param name="content">html 源代码</param> /// <returns></returns> public static Dictionary<string, string> GetUrl(string content) { Dictionary<string, string> dics = new Dictionary<string, string>(); string pattern = @"(?is)<a[^>]*?href=(['""]?)(?<url>[^'""\s>]+)\1[^>]*>(?<text>(?:(?!</?a\b).)*)</a>"; MatchCollection mc = Regex.Matches(content, pattern); foreach (Match m in mc) { if (m.Success) { //加入集合数组 //hrefList.Add(m.Groups["href"].Value); //nameList.Add(m.Groups["name"].Value); try { dics.Add(m.Groups["url"].Value, m.Groups["text"].Value); } catch (Exception ex) { Console.WriteLine(ex.Message); } } } return dics; } #endregion #region html中获取图片 /// <summary> /// 取得HTML中首张图片的 URL /// </summary> /// <param name="sHtmlText">HTML代码</param> /// <returns>图片的源地址列表</returns> public static string getHtmlFirstImage(string sHtmlText) { // 定义正则表达式用来匹配 img 标签 Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase); // 搜索匹配的字符串 MatchCollection matches = regImg.Matches(sHtmlText); string ImgStr = ""; // 取得匹配项列表 if (matches != null && matches.Count > 0) { for (int i = 0; i < matches.Count; i++) { string sUrl = matches[i].Groups["imgUrl"].Value.ToString(); if (sUrl != "") { ImgStr = sUrl; break; } } } return ImgStr; } /// <summary> /// 取得HTML中图片的列表,用“|”分割 /// </summary> /// <param name="sHtmlText">HTML代码</param> /// <returns>图片的源地址列表</returns> public static string getHtmlImageList(string sHtmlText) { // 定义正则表达式用来匹配 img 标签 Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase); // 搜索匹配的字符串 MatchCollection matches = regImg.Matches(sHtmlText); string ImgStr = ""; // 取得匹配项列表 if (matches != null && matches.Count > 0) { for (int i = 0; i < matches.Count; i++) { string sUrl = matches[i].Groups["imgUrl"].Value.ToString(); if (ImgStr != "") { ImgStr += "|"; } ImgStr += sUrl; } } return ImgStr; } /// <summary> /// 取得HTML中所有图片src的源地址。 /// </summary> /// <param name="sHtmlText">HTML代码</param> /// <returns>src的源地址列表</returns> public static ArrayList GetHtmlSrcUrlList(string sHtmlText) { // 定义正则表达式用来匹配 img 标签 //Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase); Regex regImg = new Regex(@" \b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]* "); // 搜索匹配的字符串 MatchCollection matches = regImg.Matches(sHtmlText); int i = 0; ArrayList sUrlList = new ArrayList(); // 取得匹配项列表 foreach (Match match in matches) { if (!sUrlList.Contains(match.Groups["imgUrl"].Value)) sUrlList.Add(match.Groups["imgUrl"].Value); } return sUrlList; } /// <summary> /// 格式化HTML中图片的img,宽度100%,高度100%,请加上链接<a href=show://。 /// </summary> /// <param name="sHtmlText">HTML代码</param> /// <param name="styleStr">HTML样式代码</param> /// <returns>图片的源地址列表</returns> public static string ClearHtmlImageHW(string sHtmlText, string styleStr) { // 定义正则表达式用来匹配 img 标签 Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase); // 搜索匹配的字符串 MatchCollection matches = regImg.Matches(sHtmlText); // 取得匹配项列表 if (matches != null && matches.Count > 0) { for (int i = 0; i < matches.Count; i++) { string imgUrl = matches[i].ToString(); string sUrl = matches[i].ToString(); string nUrl = ""; if (sUrl.IndexOf("width") > -1 || sUrl.IndexOf("height") > -1 || sUrl.IndexOf("style") > -1) { nUrl = Regex.Replace(sUrl, @"(?<=<img[\s\S]*?)style=((['""])[^'""]*\2|\S+)(?=[^>]*>)", "", RegexOptions.IgnoreCase); nUrl = Regex.Replace(nUrl, @"(?<=<img[\s\S]*?)width=((['""])[^'""]*\2|\S+)(?=[^>]*>)", "", RegexOptions.IgnoreCase); nUrl = Regex.Replace(nUrl, @"(?<=<img[\s\S]*?)height=((['""])[^'""]*\2|\S+)(?=[^>]*>)", "", RegexOptions.IgnoreCase); if (nUrl.IndexOf(" />") != -1) { nUrl = nUrl.Replace(" />", ""); nUrl += " " + styleStr + " />"; } else if (nUrl.IndexOf("/>") != -1) { nUrl = nUrl.Replace("/>", ""); nUrl += " " + styleStr + " />"; } else { nUrl = nUrl.Replace(">", ""); nUrl += " " + styleStr + " />"; } sHtmlText = sHtmlText.Replace(sUrl, nUrl); } else { if (sUrl.IndexOf(" />") != -1) { nUrl = sUrl.Replace(" />", ""); nUrl += " " + styleStr + " />"; } else if (sUrl.IndexOf("/>") != -1) { nUrl = sUrl.Replace("/>", ""); nUrl += " " + styleStr + " />"; } else { nUrl = sUrl.Replace(">", ""); nUrl += " " + styleStr + " />"; } sHtmlText = sHtmlText.Replace(sUrl, nUrl); } if (nUrl != "") { imgUrl = nUrl; } else { imgUrl = sUrl; } //sHtmlText = sHtmlText.Replace(imgUrl, "<a href=show://" + (i + 1) + ">" + imgUrl + "</a>"); } } return sHtmlText; } #endregion #region 页面信息读取 /// <summary> /// 获取请求地址的主机名称 /// </summary> /// <returns></returns> public static string Host(string url) { try { if (!url.ToLower().Contains("http://") && !url.ToLower().Contains("https://")) { url = "http://" + url; } Uri uri = new Uri(url); return uri.Host; } catch (Exception) { return string.Empty; } } /// <summary> /// 获取网页标题 /// </summary> /// <param name="html">html源</param> /// <returns></returns> public static string Title(string html) { string titleReg = "<title>([^<]+)</title>"; return MatchHelper.MatchScalar(html, titleReg); } /// <summary> /// 根据主机名获取对于的IP /// </summary> /// <param name="host">url</param> /// <returns>返回Url对应的IP地址</returns> public static string Ip(string host) { try { IPHostEntry hostInfo = Dns.GetHostEntry(host); return hostInfo.AddressList[0].ToString(); } catch (Exception) { return string.Empty; } } /// <summary> /// 获取Url地址后面的参数键值集 /// </summary> /// <param name="url">url</param> /// <returns></returns> public static NameValueCollection UrlParseQuery(string url) { try { return HttpUtility.ParseQueryString(url); } catch (Exception) { return null; } } /// <summary> /// Url解码 /// </summary> /// <param name="url">url</param> /// <returns></returns> public static string UrlDecode(string url) { try { return HttpUtility.UrlDecode(url); } catch (Exception) { return url; } } /// <summary> /// Url编码 /// </summary> /// <param name="url">url</param> /// <returns></returns> public static string UrlEncode(string url) { try { return HttpUtility.UrlEncode(url); } catch (Exception) { return url; } } #endregion #region 获得用户IP /// <summary> /// 获得用户IP /// </summary> public static string GetUserIp() { string ip; string[] temp; bool isErr = false; if (System.Web.HttpContext.Current.Request.ServerVariables["HTTP_X_ForWARDED_For"] == null) ip = System.Web.HttpContext.Current.Request.ServerVariables["REMOTE_ADDR"].ToString(); else ip = System.Web.HttpContext.Current.Request.ServerVariables["HTTP_X_ForWARDED_For"].ToString(); if (ip.Length > 15) isErr = true; else { temp = ip.Split('.'); if (temp.Length == 4) { for (int i = 0; i < temp.Length; i++) { if (temp[i].Length > 3) isErr = true; } } else isErr = true; } if (isErr) return "1.1.1.1"; else return ip; } #endregion #region 通过网络获取IP private string url = "http://www.proxy360.cn/default.aspx"; private string url1 = "http://www.kuaidaili.com/"; /// <summary> /// 获取代理IP集合 /// </summary> public List<string> ProxyIP { get { return ProcessHtml(HtmlHelper.GetHtmlCode(url)); } } private List<string> ProcessHtml(string html) { try { List<string> list = new List<string>(); string regIP = "(\\d+.\\d+.\\d+.\\d+)\\s*</span>\\s*<span\\s*class=\"tbBottomLine\"\\s*style=\"width:50px;\">\\s*(\\d+)"; //string regIP = @"<td>(\d+.\d+.\d+.\d+)</td>\s*<td>(\d+)</td>"; 对应url1 DataTable dt = MatchHelper.MatchDt(html, regIP); if (dt != null && dt.Rows.Count > 0) { for (int i = 0; i < dt.Rows.Count; i++) { string tempIP = dt.Rows[i][0].ToString() + ":" + dt.Rows[i][1].ToString(); list.Add(tempIP); } } return list; } catch (Exception ee) { return null; } } #endregion #region 获取页面里面的链接信息 /// <summary> /// 获取网页里的所有图片链接 /// </summary> /// <param name="html">html源</param> /// <param name="host">当前 html 源网址中的主机名</param> /// <returns></returns> public static List<ItemImg> ItemImg(string html, string host = "") { try { string imgReg = "(<img\\s*[^>]*\\s*>)"; List<ItemImg> ImgItem = new List<ItemImg>(); List<string> ImgList = MatchHelper.MatchLists(html, imgReg); if (ImgList != null && ImgList.Count > 0) { string srcReg = "src=\"(\\S+)\"|src=\'(\\S+)\'|data-original=\"(\\S+)\"|data-original='(\\S+)'"; string altReg = "alt=\"(\\S+)\"|alt=\'(\\S+)\'"; for (int i = 0; i < ImgList.Count; i++) { string _src = MatchHelper.MatchScalar(ImgList[i], srcReg); if (FilterUrl(_src)) { ItemImg _imgitem = new ItemImg(); string _alt = MatchHelper.MatchScalar(ImgList[i], altReg); _src = FilterSrcUrl(_src, host); if (_src.ToLower().Contains("http://") || _src.ToLower().Contains("https://")) { Uri uri = new Uri(_src); _imgitem.ImgHost = uri.Host; } _imgitem.ImgSrc = _src; _imgitem.ImgAlt = _alt; _imgitem.ImgLable = ImgList[i]; ImgItem.Add(_imgitem); } } } return ImgItem; } catch (Exception ee) { return null; } } /// <summary> /// 获取 html 源中所有 a 标签的链接信息 /// </summary> /// <param name="html">html源</param> /// <param name="host">当前 html 源网址中的主机名</param> /// <returns></returns> internal static List<ItemA> ItemA(string html, string host = "") { try { List<ItemA> Item = new List<ItemA>(); string aReg = "(<a\\s*[^<]*\\s*>\\s*[^<]*\\s*<\\s*/\\s*a\\s*>)"; List<string> aList = MatchHelper.MatchLists(html, aReg); if (aList != null && aList.Count > 0) { string hrefReg = "href=\"(\\S+)\"|href='(\\S+)'"; string title = "title=\"(\\S+)\"|title=\'(\\S+)\'"; string titleShow = ">([^<]+)<"; for (int i = 0; i < aList.Count; i++) { string _url = MatchHelper.MatchScalar(aList[i], hrefReg).Replace("\"", "").Replace("'", ""); if (FilterUrl(_url)) { ItemA _aitem = new ItemA(); string _title = MatchHelper.MatchScalar(aList[i], title); string _content = MatchHelper.MatchScalar(aList[i], titleShow); _url = FilterSrcUrl(_url, host); if (_url.ToLower().Contains("http://") || _url.ToLower().Contains("https://")) { Uri uri = new Uri(_url); _aitem.AHost = uri.Host; } _aitem.Ahref = _url; _aitem.ATitle = _title; _aitem.AContent = _content; _aitem.ALable = aList[i]; Item.Add(_aitem); } } } return Item; } catch (Exception ee) { return null; } } /// <summary> /// 私有函数,过滤不合法的url /// </summary> /// <param name="url">待判别的url</param> /// <returns></returns> static bool FilterUrl(string url) { bool ok = true; if (url == "") ok = false; if (url.StartsWith("javascript:")) ok = false; if (url.StartsWith("#")) ok = false; return ok; } /// <summary> /// 判断url格式是否标准,不标准则将其标准话 /// </summary> /// <param name="srcUrl">待判别的url</param> /// <param name="host">当前 html 源网址中的主机名</param> /// <returns></returns> static string FilterSrcUrl(string srcUrl, string host) { if (!srcUrl.ToLower().Contains("http://") && !srcUrl.ToLower().Contains("https://")) { if (host.EndsWith("/") && srcUrl.StartsWith("/")) { srcUrl = host + srcUrl; srcUrl = srcUrl.Replace("//", "/"); } else if (!host.EndsWith("/") && !srcUrl.StartsWith("/")) srcUrl = host + "/" + srcUrl; else srcUrl = host + srcUrl; srcUrl = "http://" + srcUrl; } return srcUrl; } /// <summary> /// 获取 html 源中的图片链接,非img标签中的链接 /// </summary> /// <param name="html">html源</param> /// <param name="host">当前 html 源网址中的主机名</param> /// <param name="type"> /// <para>图片类型,可填</para> /// <para>1.jpg</para> /// <para>2.png</para> /// <para>3.bmp</para> /// <para>4.gif</para> /// <para>5.其他</para> /// <para>注意大小写,有可能因为大小写而导致无法匹配</para> /// </param> /// <returns></returns> public static List<string> ListImg(string html, string host = "", string type = "jpg") { try { string picReg = "[\"|']([-a-zA-Z0-9@:%_\\+.~#?&//=]+." + type + ")[\"|']"; List<string> picList = MatchHelper.MatchLists(html, picReg); if (picList != null && picList.Count > 0) { if (host != "" && (host.Contains("http://") || host.Contains("https://"))) { for (int i = 0; i < picList.Count; i++) { if (!picList[i].Contains("http://") && !picList[i].Contains("https://")) { picList[i] = host + picList[i]; } } } } return picList; } catch (Exception ee) { return null; } } #endregion #region 页面源代码读取 /// <summary> /// get方式读取数据 /// </summary> /// <param name="strUrl">地址</param> /// <returns>返回数据</returns> public static string GetModel(string strUrl) { string strRet = null; try { HttpWebRequest request = (HttpWebRequest)WebRequest.Create(strUrl); request.Timeout = 2000; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); System.IO.Stream resStream = response.GetResponseStream(); Encoding encode = System.Text.Encoding.UTF8; StreamReader readStream = new StreamReader(resStream, encode); Char[] read = new Char[256]; int count = readStream.Read(read, 0, 256); while (count > 0) { String str = new String(read, 0, count); strRet = strRet + str; count = readStream.Read(read, 0, 256); } resStream.Close(); } catch (Exception e) { strRet = ""; } return strRet; } /// <summary> /// 提供通过POST方法获取页面的方法 /// </summary> /// <param name="urlString">请求的URL</param> /// <param name="encoding">页面使用的编码</param> /// <param name="postDataString">POST数据</param> /// <param name="Method">Method方式</param> /// <returns>获取的页面</returns> public static string GetHtmlFromPost(string urlString, Encoding encoding, string postDataString) { //定义局部变量 CookieContainer cookieContainer = new CookieContainer(); HttpWebRequest httpWebRequest = null; HttpWebResponse httpWebResponse = null; Stream inputStream = null; Stream outputStream = null; StreamReader streamReader = null; string htmlString = string.Empty; //转换POST数据 byte[] postDataByte = encoding.GetBytes(postDataString); //建立页面请求 try { httpWebRequest = WebRequest.Create(urlString) as HttpWebRequest; } //处理异常 catch (Exception ex) { //throw new Exception("建立页面请求时发生错误!", ex); } //指定请求处理方式 httpWebRequest.Method = "POST"; httpWebRequest.KeepAlive = false; httpWebRequest.ContentType = "application/x-www-form-urlencoded"; httpWebRequest.CookieContainer = cookieContainer; httpWebRequest.ContentLength = postDataByte.Length; //向服务器传送数据 try { inputStream = httpWebRequest.GetRequestStream(); inputStream.Write(postDataByte, 0, postDataByte.Length); } //处理异常 catch (Exception ex) { //throw new Exception("发送POST数据时发生错误!", ex); } finally { inputStream.Close(); } //接受服务器返回信息 try { httpWebResponse = httpWebRequest.GetResponse() as HttpWebResponse; outputStream = httpWebResponse.GetResponseStream(); streamReader = new StreamReader(outputStream, encoding); htmlString = streamReader.ReadToEnd(); } //处理异常 catch (Exception ex) { //throw new Exception("接受服务器返回页面时发生错误!", ex); } finally { if (streamReader != null) { streamReader.Close(); } } if (httpWebResponse != null) { foreach (Cookie cookie in httpWebResponse.Cookies) { cookieContainer.Add(cookie); } } return htmlString; } /// <summary> /// 通过GET方式获取页面的方法 /// </summary> /// <param name="urlString">请求的URL</param> /// <param name="encoding">页面编码</param> /// <returns></returns> public static string GetHtmlFromGet(string urlString, Encoding encoding) { //定义局部变量 HttpWebRequest httpWebRequest = null; HttpWebResponse httpWebRespones = null; Stream stream = null; string htmlString = string.Empty; //请求页面 try { httpWebRequest = WebRequest.Create(urlString) as HttpWebRequest; } //处理异常 catch (Exception ex) { //throw new Exception("建立页面请求时发生错误!", ex); } httpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; Maxthon 2.0)"; //获取服务器的返回信息 try { httpWebRespones = (HttpWebResponse)httpWebRequest.GetResponse(); stream = httpWebRespones.GetResponseStream(); } //处理异常 catch (Exception ex) { //throw new Exception("接受服务器返回页面时发生错误!", ex); } StreamReader streamReader = new StreamReader(stream, encoding); //读取返回页面 try { htmlString = streamReader.ReadToEnd(); } //处理异常 catch (Exception ex) { //throw new Exception("读取页面数据时发生错误!", ex); } //释放资源返回结果 streamReader.Close(); stream.Close(); return htmlString; } #endregion #region 从QueryString截取参数 /// <summary> /// 截取参数,取不到值时返回"" /// </summary> /// <param name="s">不带?号的url参数</param> /// <param name="para">要取的参数</param> public static string QueryString(string s, string para) { if (string.IsNullOrEmpty(s)) { return s; } s = s.Trim('?').Replace("%26", "&").Replace('?', '&'); int num = s.Length; for (int i = 0; i < num; i++) { int startIndex = i; int num4 = -1; while (i < num) { char ch = s[i]; if (ch == '=') { if (num4 < 0) { num4 = i; } } else if (ch == '&') { break; } i++; } string str = null; string str2 = null; if (num4 >= 0) { str = s.Substring(startIndex, num4 - startIndex); str2 = s.Substring(num4 + 1, (i - num4) - 1); if (str == para) { return System.Web.HttpUtility.UrlDecode(str2); } } } return ""; } #endregion #region 模拟页面请求地址(可以使用在上传文件上) /// <summary> /// 同步方式发起http post请求,可以同时上传文件 /// </summary> /// <param name="url">请求URL</param> /// <param name="queryString">请求参数字符串</param> /// <param name="files">上传文件列表</param> /// <returns>请求返回值</returns> public static string HttpPostWithFile(string url, string queryString, List<QueryParameter> files) { Stream requestStream = null; string responseData = null; string boundary = DateTime.Now.Ticks.ToString("x"); HttpWebRequest webRequest = WebRequest.Create(url) as HttpWebRequest; webRequest.ServicePoint.Expect100Continue = false; webRequest.Timeout = 20000; webRequest.ContentType = "multipart/form-data;charset=utf-8;boundary=" + boundary; webRequest.Method = "POST"; webRequest.KeepAlive = false; webRequest.Credentials = CredentialCache.DefaultCredentials; try { Stream memStream = new MemoryStream(); byte[] beginBoundary = Encoding.UTF8.GetBytes("\r\n--" + boundary + "\r\n"); byte[] endBoundary = Encoding.UTF8.GetBytes("\r\n--" + boundary + "--\r\n"); // byte[] boundarybytes = System.Text.Encoding.ASCII.GetBytes("\r\n--" + boundary + "\r\n"); // string formdataTemplate = "\r\n--" + boundary + "\r\nContent-Disposition: form-data; name=\"{0}\"\r\n\r\n{1}"; string formdataTemplate = "Content-Disposition: form-data; name=\"{0}\"\r\n\r\n{1}"; List<QueryParameter> listParams = GetQueryParameters(queryString); foreach (QueryParameter param in listParams) { // 写入头 memStream.Write(beginBoundary, 0, beginBoundary.Length); string formitem = string.Format(formdataTemplate, param.Name, param.Value); byte[] formitembytes = Encoding.UTF8.GetBytes(formitem); memStream.Write(formitembytes, 0, formitembytes.Length); } // memStream.Write(boundarybytes, 0, boundarybytes.Length); string headerTemplate = "Content-Disposition: form-data; name=\"{0}\"; filename=\"{1}\"\r\nContent-Type: \"{2}\"\r\n\r\n"; foreach (QueryParameter param in files) { string name = param.Name; string filePath = param.Value; string file = Path.GetFileName(filePath); string contentType = GetContentType(file); // 写入头 memStream.Write(beginBoundary, 0, beginBoundary.Length); string header = string.Format(headerTemplate, name, file, contentType); byte[] headerbytes = System.Text.Encoding.UTF8.GetBytes(header); memStream.Write(headerbytes, 0, headerbytes.Length); FileStream fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read); byte[] buffer = new byte[1024]; int bytesRead = 0; while ((bytesRead = fileStream.Read(buffer, 0, buffer.Length)) != 0) { memStream.Write(buffer, 0, bytesRead); } // memStream.Write(boundarybytes, 0, boundarybytes.Length); // 写入结尾 memStream.Write(endBoundary, 0, endBoundary.Length); fileStream.Close(); } webRequest.ContentLength = memStream.Length; requestStream = webRequest.GetRequestStream(); memStream.Position = 0; byte[] tempBuffer = new byte[memStream.Length]; memStream.Read(tempBuffer, 0, tempBuffer.Length); memStream.Close(); requestStream.Write(tempBuffer, 0, tempBuffer.Length); } catch { throw; } finally { requestStream.Close(); requestStream = null; } try { responseData = WebResponseGet(webRequest); webRequest = null; return responseData; } catch (Exception ex) { throw ex; } } /// <summary> /// 获取返回结果http get请求 /// </summary> /// <param name="webRequest">webRequest对象</param> /// <returns>请求返回值</returns> public static string WebResponseGet(HttpWebRequest webRequest) { try { HttpWebResponse httpWebResponse = (HttpWebResponse)webRequest.GetResponse(); StreamReader responseReader = null; string responseData = String.Empty; responseReader = new StreamReader(webRequest.GetResponse().GetResponseStream()); responseData = responseReader.ReadToEnd(); webRequest.GetResponse().GetResponseStream().Close(); responseReader.Close(); responseReader = null; return responseData; } catch (Exception ex) { throw ex; } } /// <summary> /// ParseQueryString /// </summary> /// <param name="strValue"></param> /// <returns></returns> public static List<QueryParameter> GetQueryParameters(string strValue) { List<QueryParameter> list = new List<QueryParameter>(); if (!string.IsNullOrEmpty(strValue)) { foreach (var item in strValue.Trim(' ', '?', '&').Split('&')) { if (item.IndexOf('=') > -1) { var temp = item.Split('='); list.Add(new QueryParameter(temp[0], temp[1])); } else { list.Add(new QueryParameter(item, string.Empty)); } } } return list; } /// <summary> /// 字符串拼接 /// </summary> /// <param name="paras"></param> /// <returns></returns> public static string GetQueryFromParas(List<QueryParameter> paras) { if (paras == null || paras.Count == 0) return ""; StringBuilder sbList = new StringBuilder(); int count = 1; foreach (QueryParameter para in paras) { sbList.AppendFormat("{0}={1}", para.Name, para.Value); if (count < paras.Count) { sbList.Append("&"); } count++; } return sbList.ToString(); ; } /// <summary> /// 根据文件名获取文件类型 /// </summary> /// <param name="fileName"></param> /// <returns></returns> public static string GetContentType(string fileName) { string contentType = "application/octetstream"; string ext = Path.GetExtension(fileName).ToLower(); RegistryKey registryKey = Registry.ClassesRoot.OpenSubKey(ext); if (registryKey != null && registryKey.GetValue("Content Type") != null) { contentType = registryKey.GetValue("Content Type").ToString(); } return contentType; } /// <summary> /// Utc时间转本地时间,原格式:Wed Nov 17 15:07:48 +0800 2010 /// </summary> /// <param name="strValue">原格式:Wed Nov 17 15:07:48 +0800 2010</param> /// <returns></returns> public static string UtcToDateTime(string strValue) { if (!string.IsNullOrEmpty(strValue)) { //原格式:Wed Nov 17 15:07:48 +0800 2010 string[] str = strValue.Split(' '); //转格式:Wed Nov 17 2010 15:07:48 return str[0] + " " + str[1] + " " + str[2] + " " + str[5] + " " + str[3]; } else { return ""; } } #endregion } public class ItemA { /// <summary> /// a 标签中的链接 /// </summary> public string Ahref { get; set; } /// <summary> /// a 标签中的标题 /// </summary> public string ATitle { get; set; } /// <summary> /// a 标签中的内容 /// </summary> public string AContent { get; set; } /// <summary> /// a 标签中的链接的主机名 /// </summary> public string AHost { get; set; } /// <summary> /// a 标签 /// </summary> public string ALable { get; set; } } public class ItemImg { /// <summary> /// Img 标签中的链接 /// </summary> public string ImgSrc { get; set; } /// <summary> /// Img 标签中的替代文本 /// </summary> public string ImgAlt { get; set; } /// <summary> /// Img 标签链接主机名 /// </summary> public string ImgHost { get; set; } /// <summary> /// Img 标签 /// </summary> public string ImgLable { get; set; } } /// <summary> /// QueryParameter /// </summary> public class QueryParameter { private string name = string.Empty; private string value = string.Empty; public QueryParameter(string name, string value) { this.name = name; this.value = value; } public QueryParameter(string name, object value) { this.name = name; this.value = value.ToString(); } public string Name { get { return name == null ? string.Empty : name.Trim(); } } public string Value { get { return value == null ? string.Empty : value.Trim(); } } }
11
2015
07
【C#、Asp.Net 工具类大全】Html常用帮助类
发布:郑德才博客 | 分类:项目源码 | 评论:0 | 浏览:
相关文章:
C#、Asp.Net 对比两个实体信息前后是否有所改变(结构相同和不相同) (2016-9-2 20:24:29)
C#、Asp.Net 将一个实体对象转换为另一个实体对象(结构可以不一样) (2016-9-2 19:14:5)
Asp.Net 常用时间计算 (2016-3-16 23:29:20)
【C#、Asp.Net 工具类大全】图片通用操作类 (2015-12-23 13:27:30)
【C#、Asp.Net 工具类大全】Request请求工具类 (2015-12-23 13:15:56)
【C#、Asp.Net 工具类大全】弹出提示操作类 (2015-12-23 13:6:1)
为什么通过JS或JQuery加载出来的HTML,无法用选择器绑定事件? (2015-10-30 16:24:8)
【C#、Asp.Net 工具类大全】正则匹配工具类 (2015-7-26 9:36:56)
【C#、Asp.Net 工具类大全】Js常用操作类 (2015-7-15 14:23:33)
C#字符串编码帮助类 (2015-7-11 23:39:38)
发表评论:
◎欢迎参与讨论,请在这里发表您的看法、交流您的观点。