当前位置:首页 > 利用WebClient和WebRequest类获得网页源代码

利用WebClient和WebRequest类获得网页源代码

点击次数:1155  更新日期:2010-12-29
\n

关键是ValidateRequest=”false”,要不然会说request.form,报错


\n

<%@ Page Language=”C#” AutoEventWireup=”true” CodeFile=”Default3.aspx.cs” Inherits=”test_Default3″ ValidateRequest=”false”%>


\n

<!DOCTYPE html PUBLIC “-//W3C//DTD XHTML 1.0 Transitional//EN” “http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd”>


\n

<html xmlns=”http://www.w3.org/1999/xhtml” >
<head runat=”server”>
<title>无标题页</title>
</head>
<body>
<form id=”form1″ runat=”server”>
<div align=”center” style=”FONT-WEIGHT: bold”>得到任意网页源代码</div>
<asp:TextBox id=”UrlText” runat=”server” Width=”400px”>http://www.dwww.cn
</asp:TextBox>
<asp:Button id=”WebClientButton” Runat=”server” Text=”用WebClient得到” onClick=”WebClientButton_Click”></asp:Button>
<asp:Button id=”WebRequestButton” runat=”server” Text=”用WebRequest得到” onClick=”WebRequestButton_Click”></asp:Button>
<asp:Button ID=”Button1″ runat=”server” onClick=”GetText_Click” Text=”Button” /><br>
<asp:TextBox id=”ContentHtml” runat=”server” Width=”100%” Height=”360px” TextMode=”MultiLine”>
</asp:TextBox>


\n

</form>
</body>
</html>
private string PageUrl = “”;


\n

protected void WebClientButton_Click(object sender, System.EventArgs e)
{
PageUrl = UrlText.Text;
WebClient wc = new WebClient();
wc.Credentials = CredentialCache.DefaultCredentials;


\n

/**////方法一:
Byte[] pageData = wc.DownloadData(PageUrl);
ContentHtml.Text = Encoding.Default.GetString(pageData);


\n


/**//// 方法二:
/// ***************代码开始**********
/// Stream resStream = wc.OpenRead(PageUrl);
/// StreamReader sr = new StreamReader(resStream,System.Text.Encoding.Default);
/// ContentHtml.Text = sr.ReadToEnd();
/// resStream.Close();
/// **************代码结束********
///
wc.Dispose();
}


\n

protected void WebRequestButton_Click(object sender, System.EventArgs e)
{
PageUrl = UrlText.Text;
WebRequest request = WebRequest.Create(PageUrl);
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
ContentHtml.Text = sr.ReadToEnd();
resStream.Close();
sr.Close();
}


\n

protected void GetText_Click(object sender, System.EventArgs e)
{
PageUrl = UrlText.Text;
WebRequest request = WebRequest.Create(PageUrl);
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, System.Text.Encoding.Default);
ContentHtml.Text = sr.ReadToEnd();
resStream.Close();
sr.Close();
ContentHtml.Text = Regex.Replace(ContentHtml.Text, “<[^>]*>”, “”);
//替换空格
ContentHtml.Text = Regex.Replace(ContentHtml.Text, “\\\\s+”, ” “);
}

\n