Skip to content

Instantly share code, notes, and snippets.

@mouhong
Created April 26, 2015 16:51
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mouhong/c09487502e261f7ce53d to your computer and use it in GitHub Desktop.
Save mouhong/c09487502e261f7ce53d to your computer and use it in GitHub Desktop.
HtmlTagClosing
using System;
using System.Collections.Generic;
using System.Text;
namespace HtmlTagClosing
{
public static class StringExtensions
{
public static string CloseTags(this string html)
{
if (String.IsNullOrWhiteSpace(html))
{
return html;
}
return new TagCloser().CloseTags(html);
}
class TagCloser
{
private Stack<string> _openTags = new Stack<string>();
private string _html;
private int _currentCharIndex;
private bool Eof
{
get { return _currentCharIndex == _html.Length - 1; }
}
private int _pendingTagNameStartIndex;
private string _pendingTagName;
private int _currentEndTagStartIndex;
private State _state;
public string CloseTags(string html)
{
_html = html;
_currentCharIndex = -1;
while (!Eof)
{
var ch = ReadNext();
if (_state == State.InTagName)
{
// Self closing
if (ch == '/' && PeekNext() == '>')
{
ReadNext();
_state = _openTags.Count > 0 ? State.InsideTag : State.None;
continue;
}
if (ch == ' ' || ch == '>')
{
_pendingTagName = _html.Substring(_pendingTagNameStartIndex, _currentCharIndex - _pendingTagNameStartIndex);
}
if (ch == ' ')
{
_state = State.InAttributes;
}
else if (ch == '>')
{
_openTags.Push(_pendingTagName);
_state = State.InsideTag;
}
}
else if (_state == State.InsideTag)
{
if (ch == '<' && PeekNext() == '/')
{
_currentEndTagStartIndex = _currentCharIndex;
ReadNext();
_state = State.InEndTag;
continue;
}
if (ch == '<')
{
_state = State.InTagName;
_pendingTagNameStartIndex = _currentCharIndex + 1;
continue;
}
}
else if (_state == State.InEndTag)
{
if (ch == '>')
{
_openTags.Pop();
_state = _openTags.Count > 0 ? State.InsideTag : State.None;
}
}
else
{
if (ch == '<')
{
_state = State.InTagName;
_pendingTagNameStartIndex = _currentCharIndex + 1;
continue;
}
}
}
// Broken start tag
if (_state == State.InTagName || _state == State.InAttributes)
{
_html = _html.Substring(0, _pendingTagNameStartIndex - 1);
}
// Broken end tag
else if (_state == State.InEndTag)
{
_html = _html.Substring(0, _currentEndTagStartIndex);
}
var sb = new StringBuilder();
sb.Append(_html);
while (_openTags.Count > 0)
{
var tag = _openTags.Pop();
sb.Append("</" + tag + ">");
}
return sb.ToString();
}
private char ReadNext()
{
return Eof ? '\0' : _html[++_currentCharIndex];
}
private char PeekNext()
{
return Eof ? '\0' : _html[_currentCharIndex + 1];
}
enum State
{
None, InTagName, InAttributes, InsideTag, InEndTag
}
}
}
class Program
{
static void Main(string[] args)
{
// Missing end tag -> will be closed
Console.WriteLine("<div>Hello World".CloseTags());
Console.WriteLine("<div>Hello, <b>World".CloseTags());
// Broken end tag -> will be closed
Console.WriteLine("<div>Hello World</di".CloseTags());
Console.WriteLine("<div>Hello, <b>World</".CloseTags());
// Broken start tag -> will be ignored
Console.WriteLine("<div>Hello World. <span".CloseTags());
Console.WriteLine("Press any key to continue...");
Console.ReadKey();
}
}
}
@fiatuno60
Copy link

fiatuno60 commented May 20, 2019

Hi
Great code piece thanks :)

Looks like line 43 need to change as below

if (_state == State.InTagName || _state == State.InAttributes)

I had a HTML snippet as below

<P>We’re looking forward to seeing you at the ggg gg gg gg, please <A class=debt href="mailto: engage@ggg.ggg.gg?subject=gg%gg"><STRONG><B><U>engage@gg.ggg.gg</B></U></STRONG></A> if you have any questions.</P>|XXXX|<P>Happy paddling!</P>|XXXX|<P>ggg hhh dd& Sport</P>|XXXX|<P>hh hhh | Assistant Events Coordinator | dd& Sport</P>|XXXX|<P>hh 5,hhY h | gg Point | 2 gg Street | dfgdfg gdfg 4001 ||XXXX|e: s17.dfgdfg@fgdfg.gg.gg</P><P><BR>|XXXX||XXXX||XXXX||XXXX|<P><STRONG><B><input type="button" value="Yes, I have read the Guide to gg. Please close page." onclick="self.close()"></B></STRONG></P><BR><BR>

@fiatuno60
Copy link

fiatuno60 commented May 20, 2019

thanks

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment