kch (owner)

Revisions

gist: 99037 Download_button fork
public
Description:
my super duper html stripper regexp
Public Clone URL: git://gist.github.com/99037.git
strip_html.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
class String
  def strip_html!
    gsub!(/
<
\/? # optional end tag
([\w:-]+) # tag name (capturing)
(?: # optional attribute set (allowing even for end tags)
(?: # group for attribute repetition
\s+ # mandatory space before first attribute
[\w:-]+ # attribute name
(?: # optional attribute value
\s*=\s* # optionally space-wrapped equal sign
(?: # attribute value group (for |)
'[^']*' | # either a single quoted attribute '#happy color coding
"[^"]*" | # or a double quoted attribute "#happy color coding
[^\s>]+ # or a non-space non tag end value
) # end attribute value group
)? # attr value is optional
)* # can have zero or more attributes
)? # may not have attributes at all
\s* # optional trailing spaces
\/? # optional self-closing empty tag
> # end tag
/ix, '')
    self
  end
  
  def strip_html
    dup.strip_html!
  end
end