Asp.Net网站和WinForm开发过程中,常常需要抓取某个网站或地址的源代码,所以写一个读取网页源代码工具类是有必要的:
/// <summary> /// 网页操作类 /// </summary> public class HTML { /// <summary> /// 获取网页源代码 /// </summary> /// <param name="url">URL路径</param> /// <param name="encoding">编码方式</param> /// <returns></returns> public string GetHTML(string url, string encoding) { WebClient web = new WebClient(); byte[] buffer = web.DownloadData(url); return Encoding.GetEncoding(encoding).GetString(buffer); } /// <summary> /// WebClient读取源代码 /// </summary> /// <param name="url"></param> /// <param name="encoding"></param> /// <returns></returns> public string GetWebClient(string url, string encoding) { string strHTML = ""; WebClient myWebClient = new WebClient(); Stream myStream = myWebClient.OpenRead(url); StreamReader sr = new StreamReader(myStream, System.Text.Encoding.GetEncoding(encoding)); strHTML = sr.ReadToEnd(); myStream.Close(); return strHTML; } /// <summary> /// WebRequest读取源代码 /// </summary> /// <param name="url"></param> /// <param name="encoding"></param> /// <returns></returns> public string GetWebRequest(string url, string encoding) { Uri uri = new Uri(url); WebRequest myReq = WebRequest.Create(uri); WebResponse result = myReq.GetResponse(); Stream receviceStream = result.GetResponseStream(); StreamReader readerOfStream = new StreamReader(receviceStream, System.Text.Encoding.GetEncoding(encoding)); string strHTML = readerOfStream.ReadToEnd(); readerOfStream.Close(); receviceStream.Close(); result.Close(); return strHTML; } /// <summary> /// HttpWebRequest读取源代码 /// </summary> /// <param name="url"></param> /// <param name="encoding"></param> /// <returns></returns> public string GetHttpWebRequest(string url, string encoding) { Uri uri = new Uri(url); HttpWebRequest myReq = (HttpWebRequest)WebRequest.Create(uri); myReq.UserAgent = "User-Agent:Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.0.3705"; myReq.Accept = "*/*"; myReq.KeepAlive = true; myReq.Headers.Add("Accept-Language", "zh-cn,en-us;q=0.5"); HttpWebResponse result = (HttpWebResponse)myReq.GetResponse(); Stream receviceStream = result.GetResponseStream(); StreamReader readerOfStream = new StreamReader(receviceStream, System.Text.Encoding.GetEncoding(encoding)); string strHTML = readerOfStream.ReadToEnd(); readerOfStream.Close(); receviceStream.Close(); result.Close(); return strHTML; } }
评论列表: