1
我有一个履带式的下载页面并处理它们。 1小时后,对源的每个请求都需要1分钟或更长时间才能完成,但在程序开始时,每个地址都在1秒内下载,我怀疑目标网站会限制我的请求或流量,但是当我关闭程序并运行它时性能恢复正常。所以有一些问题,我的代码 任何帮助将不胜感激页面抓取器中的HttpWebReqest减慢
public class PageFetcher
{
public PageFetcher() { }
public PageFetcher(string urlAddress) { URLAddress = urlAddress; }
private int relayPageCount = 0;
public string URLAddress { get; set; }
public string FetchingEncoding { get; set; }
public PageFetchResult Fetch()
{
PageFetchResult fetchResult = new PageFetchResult();
HttpWebRequest req = null;
HttpWebResponse resp = null;
try
{
req = (HttpWebRequest)HttpWebRequest.Create(URLAddress);
req.UserAgent = "Mozilla/4.0";
req.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip, deflate");
resp = (HttpWebResponse)req.GetResponse();
string resultHTML = "";
byte[] reqHTML = ResponseAsBytes(resp);
Stream resultStream = new MemoryStream(reqHTML);
resultStream.Position = 0;
string contentEncoding = resp.ContentEncoding.ToLower();
if (contentEncoding.Contains("gzip") || contentEncoding.Contains("deflate"))
{
if (contentEncoding.Contains("gzip"))
resultStream = new GZipStream(resultStream, CompressionMode.Decompress);
if (contentEncoding.Contains("deflate"))
resultStream = new DeflateStream(resultStream, CompressionMode.Decompress);
}
StreamReader readerStream = null;
if (!string.IsNullOrEmpty(FetchingEncoding))
readerStream = new StreamReader(resultStream, Encoding.GetEncoding(FetchingEncoding));
else if (!string.IsNullOrEmpty(resp.CharacterSet))
readerStream = new StreamReader(resultStream, Encoding.GetEncoding(resp.CharacterSet));
resultHTML = readerStream.ReadToEnd();
req.Abort();
resp.Close();
fetchResult.IsOK = true;
fetchResult.ResultHTML = resultHTML;
URLAddress = resp.ResponseUri.AbsoluteUri;
}
catch (Exception ex)
{
if (req != null)
req.Abort();
if (resp != null)
resp.Close();
fetchResult.IsOK = false;
fetchResult.ErrorMessage = ex.Message;
}
return fetchResult;
}
是提取是放慢速度,我看着HttpWebResponse.GetResponse()这是主要的罪犯!你认为它与目标网站或另一端有关吗? – Ehsan 2010-08-01 10:15:22