-
-
Save enkelmedia/52d6900786bf3c5cd45e to your computer and use it in GitHub Desktop.
This is one way to download a web page.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// <summary> | |
/// Returns the content of a given web adress as string. | |
/// </summary> | |
/// <param name="Url">URL of the webpage</param> | |
/// <returns>Website content</returns> | |
public static string DownloadWebPage(string Url) | |
{ | |
string strWebPage = ""; | |
try | |
{ | |
// create request | |
System.Net.WebRequest objRequest = System.Net.HttpWebRequest.Create(Url); | |
// get response | |
System.Net.HttpWebResponse objResponse; | |
objResponse = (System.Net.HttpWebResponse)objRequest.GetResponse(); | |
// get correct charset and encoding from the server's header | |
string Charset = objResponse.CharacterSet; | |
Encoding encoding = Encoding.GetEncoding(Charset); | |
// read response | |
using (StreamReader sr = | |
new StreamReader(objResponse.GetResponseStream(), encoding)) | |
{ | |
strWebPage = sr.ReadToEnd(); | |
// Close and clean up the StreamReader | |
sr.Close(); | |
} | |
// Check real charset meta-tag in HTML | |
int CharsetStart = strWebPage.IndexOf("charset="); | |
if (CharsetStart > 0) | |
{ | |
CharsetStart += 8; | |
int CharsetEnd = strWebPage.IndexOfAny(new[] { ' ', '\"', ';' }, CharsetStart); | |
string RealCharset = | |
strWebPage.Substring(CharsetStart, CharsetEnd - CharsetStart); | |
// real charset meta-tag in HTML differs from supplied server header??? | |
if (RealCharset != Charset) | |
{ | |
// get correct encoding | |
Encoding CorrectEncoding = Encoding.GetEncoding(RealCharset); | |
// read the web page again, but with correct encoding this time | |
// create request | |
System.Net.WebRequest objRequest2 = System.Net.HttpWebRequest.Create(Url); | |
// get response | |
System.Net.HttpWebResponse objResponse2; | |
objResponse2 = (System.Net.HttpWebResponse)objRequest2.GetResponse(); | |
// read response | |
using (StreamReader sr = | |
new StreamReader(objResponse2.GetResponseStream(), CorrectEncoding)) | |
{ | |
strWebPage = sr.ReadToEnd(); | |
// Close and clean up the StreamReader | |
sr.Close(); | |
} | |
} | |
} | |
} | |
catch (Exception ex) | |
{ | |
Log.Add(LogTypes.Error, 0, "Newsletter Studio error in DownloadWebPage()" + ex.InnerException); | |
throw new WebException("Could not load url: " + Url + ". Inner exception: " + ex.InnerException); | |
} | |
return strWebPage; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment