Skip to content

Instantly share code, notes, and snippets.

@AntMooreWebDev
Last active August 2, 2016 08:25
Show Gist options
  • Save AntMooreWebDev/f9a9beca4aea4c4a9d153198f12d0e03 to your computer and use it in GitHub Desktop.
Save AntMooreWebDev/f9a9beca4aea4c4a9d153198f12d0e03 to your computer and use it in GitHub Desktop.
[C#] Strip HTML From String
///////// Function implementation //////////
////////////////////////////////////////////
/// <summary>
/// This strips HTML from a string using regular expressions
/// </summary>
/// <param name="inputHtml">The string to be parsed</param>
/// <returns>text string void of HTML markup</returns>
private string StripHtml(string inputHtml)
{
string noHtml = System.Text.RegularExpressions.Regex.Replace(inputHtml, @"<[^>]+>|&nbsp;", "").Trim(); // First we remove the HTML tags
string noHtmlNormalised = System.Text.RegularExpressions.Regex.Replace(noHtml, @"\s{2,}", " "); // Now we ensure there are no double spaces in the text
return noHtmlNormalised;
}
////////// Extension implentation //////////
////////////////////////////////////////////
/// <summary>
/// Extension methods for the string class
/// </summary>
public static class StringExtension
{
/// <summary>
/// This strips HTML from a string using regular expressions
/// </summary>
/// <param name="str">The string to be parsed</param>
/// <returns>Text string void of HTML markup</returns>
public static string StripHtml(this string str)
{
string noHtml = System.Text.RegularExpressions.Regex.Replace(str, @"<[^>]+>|&nbsp;", "").Trim(); // First we remove the HTML tags
string noHtmlNormalised = System.Text.RegularExpressions.Regex.Replace(noHtml, @"\s{2,}", " "); // Now we ensure there are no double spaces in the text
return noHtmlNormalised;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment