Created
February 11, 2014 21:08
-
-
Save yemrekeskin/8944186 to your computer and use it in GitHub Desktop.
Sample for WebCrawler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Linq; | |
using System.Net; | |
using System.Text; | |
using System.Text.RegularExpressions; | |
using System.Threading.Tasks; | |
namespace LinkCrawler | |
{ | |
class Program | |
{ | |
static void Main(string[] args) | |
{ | |
LinkCrawler crawler = new LinkCrawler(); | |
List<string> list=crawler.Catch("http://blog.yemrekeskin.com/en/"); | |
foreach (var item in list) | |
Console.WriteLine(item); | |
Console.ReadLine(); | |
} | |
} | |
public interface ILinkCrawler | |
{ | |
List<string> Catch(string link); | |
} | |
public class LinkCrawler | |
:ILinkCrawler | |
{ | |
public List<string> Catch(string link) | |
{ | |
if(String.IsNullOrEmpty(link)) | |
throw new ApplicationException(""); | |
List<string> rl = new List<string>(); | |
WebRequest wr = WebRequest.Create(link); | |
StreamReader sr = null; | |
WebResponse ws = null; | |
string response = String.Empty; | |
try | |
{ | |
ws = wr.GetResponse(); | |
sr = new StreamReader(ws.GetResponseStream(), Encoding.UTF8); | |
response = sr.ReadToEnd(); | |
} | |
catch (Exception ex) | |
{ | |
throw new ApplicationException(ex.Message); | |
} | |
finally | |
{ | |
if (!sr.Equals(null)) sr.Close(); | |
if (!ws.Equals(null)) ws.Close(); | |
} | |
Regex r = new Regex("<a.+href=\"http.+://(.+)\">(.*)</a>"); | |
MatchCollection mc = r.Matches(response); | |
string dummyLink = string.Empty; | |
foreach (Match m in mc) | |
{ | |
dummyLink = m.Groups[1].Value; | |
if (dummyLink.IndexOf("\"") > -1) | |
dummyLink = dummyLink.Substring(0, dummyLink.IndexOf("\"")); | |
rl.Add(dummyLink); | |
dummyLink = string.Empty; | |
} | |
return rl; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment