wizard04wsu/Excel_stripHTML.vbs

## Excel_stripHTML.vbs
'requires reference to Microsoft VBScript Regular Expressions 5.5

Sub stripHTML()

    Dim cell As Object

    For Each cell In Selection
        If Not (IsEmpty(cell) Or IsNumeric(cell) Or cell.HasFormula) Then
            On Error Resume Next
            cell.Value = doStripHTML(cell.Formula)
            If Err > 0 Then
                cell.Value = doStripHTML(cell.Value)
            End If
            On Error GoTo 0
        End If
    Next

End Sub

Private Function doStripHTML(strHTML) As String
'Strips the HTML tags from strHTML

    Dim rxp
    Set rxp = New RegExp

    rxp.IgnoreCase = True
    rxp.Global = True
    rxp.Pattern = "</?[a-z]+([^<]|\n)*?>|<!--(.|\n)*?-->"

    'Replace all HTML tag matches with the empty string
    strHTML = rxp.Replace(strHTML, "")

    strHTML = Replace(strHTML, "&lt;", "<")
    strHTML = Replace(strHTML, "&gt;", ">")
    strHTML = Replace(strHTML, "&quot;", """")
    strHTML = Replace(strHTML, "&amp;", "&")

    rxp.Pattern = "(?:[\s\xA0\n]|&nbsp;)+"
    strHTML = rxp.Replace(strHTML, " ")
    strHTML = Trim(strHTML)

    If Left(strHTML, 1) = "=" Then strHTML = "'" & strHTML

    doStripHTML = strHTML

End Function
	'requires reference to Microsoft VBScript Regular Expressions 5.5

	Sub stripHTML()

	Dim cell As Object

	For Each cell In Selection
	If Not (IsEmpty(cell) Or IsNumeric(cell) Or cell.HasFormula) Then
	On Error Resume Next
	cell.Value = doStripHTML(cell.Formula)
	If Err > 0 Then
	cell.Value = doStripHTML(cell.Value)
	End If
	On Error GoTo 0
	End If
	Next

	End Sub

	Private Function doStripHTML(strHTML) As String
	'Strips the HTML tags from strHTML

	Dim rxp
	Set rxp = New RegExp

	rxp.IgnoreCase = True
	rxp.Global = True
	rxp.Pattern = "</?[a-z]+([^<]\|\n)?>\|<!--(.\|\n)?-->"

	'Replace all HTML tag matches with the empty string
	strHTML = rxp.Replace(strHTML, "")

	strHTML = Replace(strHTML, "<", "<")
	strHTML = Replace(strHTML, ">", ">")
	strHTML = Replace(strHTML, """, """")
	strHTML = Replace(strHTML, "&", "&")

	rxp.Pattern = "(?:[\s\xA0\n]\| )+"
	strHTML = rxp.Replace(strHTML, " ")
	strHTML = Trim(strHTML)

	If Left(strHTML, 1) = "=" Then strHTML = "'" & strHTML

	doStripHTML = strHTML

	End Function