Skip to content

Instantly share code, notes, and snippets.

@yemrekeskin
Created February 11, 2014 21:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yemrekeskin/8944186 to your computer and use it in GitHub Desktop.
Save yemrekeskin/8944186 to your computer and use it in GitHub Desktop.
Sample for WebCrawler
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace LinkCrawler
{
class Program
{
static void Main(string[] args)
{
LinkCrawler crawler = new LinkCrawler();
List<string> list=crawler.Catch("http://blog.yemrekeskin.com/en/");
foreach (var item in list)
Console.WriteLine(item);
Console.ReadLine();
}
}
public interface ILinkCrawler
{
List<string> Catch(string link);
}
public class LinkCrawler
:ILinkCrawler
{
public List<string> Catch(string link)
{
if(String.IsNullOrEmpty(link))
throw new ApplicationException("");
List<string> rl = new List<string>();
WebRequest wr = WebRequest.Create(link);
StreamReader sr = null;
WebResponse ws = null;
string response = String.Empty;
try
{
ws = wr.GetResponse();
sr = new StreamReader(ws.GetResponseStream(), Encoding.UTF8);
response = sr.ReadToEnd();
}
catch (Exception ex)
{
throw new ApplicationException(ex.Message);
}
finally
{
if (!sr.Equals(null)) sr.Close();
if (!ws.Equals(null)) ws.Close();
}
Regex r = new Regex("<a.+href=\"http.+://(.+)\">(.*)</a>");
MatchCollection mc = r.Matches(response);
string dummyLink = string.Empty;
foreach (Match m in mc)
{
dummyLink = m.Groups[1].Value;
if (dummyLink.IndexOf("\"") > -1)
dummyLink = dummyLink.Substring(0, dummyLink.IndexOf("\""));
rl.Add(dummyLink);
dummyLink = string.Empty;
}
return rl;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment