Skip to content

Instantly share code, notes, and snippets.

@jbubriski
Created May 24, 2012 13:50
Show Gist options
  • Save jbubriski/2781650 to your computer and use it in GitHub Desktop.
Save jbubriski/2781650 to your computer and use it in GitHub Desktop.
Find all links from a base URL, then check those against a new URL
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Navigation;
using System.Windows.Shapes;
namespace MigrationBrokenLinkChecker
{
public class LinkCheckResult
{
public string Url { get; set; }
public string RelativeUrl { get; set; }
public bool Successful { get; set; }
}
/// <summary>
/// Interaction logic for MainWindow.xaml
/// </summary>
public partial class MainWindow : Window
{
private List<LinkCheckResult> _links;
private List<LinkCheckResult> _links2;
public string _baseSourceUrl { get; set; }
public string _baseTargetUrl { get; set; }
public MainWindow()
{
InitializeComponent();
}
private void uxGo_Click(object sender, RoutedEventArgs e)
{
_baseSourceUrl = uxBaseSourceUrl.Text;
_baseTargetUrl = uxBaseTargetUrl.Text;
_links = new List<LinkCheckResult>();
_links2 = new List<LinkCheckResult>();
Out("Gathering links...");
GetLinks(_baseSourceUrl);
Out("Done.");
Out("");
Out("");
Out("Checking links against new URL...");
CheckLinks(_baseTargetUrl);
}
private void Out(string text)
{
uxDebug.Text += text + "\r\n";
Debug.WriteLine(text);
}
private void GetLinks(string url)
{
try
{
var httpWebRequest = (HttpWebRequest)HttpWebRequest.Create(url);
var webResponse = httpWebRequest.GetResponse();
using (var responseStream = webResponse.GetResponseStream())
using (var streamReader = new StreamReader(responseStream))
{
_links.Add(new LinkCheckResult
{
Url = url,
RelativeUrl = url.Replace(_baseSourceUrl, ""),
Successful = true
});
Out("1 - " + url.Replace(_baseSourceUrl, ""));
var content = streamReader.ReadToEnd();
var matches = Regex.Matches(content, "href=\"(.*?)\"");
foreach (Match match in matches)
{
var childUrl = match.Groups[1].Value;
if (!_links.Any(l => l.Url == childUrl)
&& childUrl.StartsWith(_baseSourceUrl)
&& !childUrl.EndsWith(".css")
&& !childUrl.EndsWith(".png")
&& !childUrl.EndsWith(".zip")
&& !childUrl.EndsWith("/feed/")
&& !childUrl.Contains("/tag/")
&& !childUrl.Contains("/category/"))
{
GetLinks(childUrl);
}
}
}
}
catch
{
_links.Add(new LinkCheckResult
{
Url = url,
RelativeUrl = url.Replace(_baseSourceUrl, ""),
Successful = false
});
Out("0 - " + url.Replace(_baseSourceUrl, ""));
}
}
private void CheckLinks(string baseUrl)
{
foreach (var link in _links)
{
var newLink = baseUrl + link.RelativeUrl;
try
{
var httpWebRequest = (HttpWebRequest)HttpWebRequest.Create(newLink);
var webResponse = httpWebRequest.GetResponse();
using (var responseStream = webResponse.GetResponseStream())
using (var streamReader = new StreamReader(responseStream))
{
_links2.Add(new LinkCheckResult
{
Url = newLink,
RelativeUrl = link.RelativeUrl,
Successful = true
});
Out("1 - " + link.RelativeUrl);
}
}
catch
{
_links2.Add(new LinkCheckResult
{
Url = newLink,
RelativeUrl = link.RelativeUrl,
Successful = false
});
Out("0 - " + link.RelativeUrl);
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment