Codercise/gist:2827919

## gistfile1.txt
#################################
FIRST VARIATION (only removes the first tag)
#################################
def remove_tags(html):
    start_tag = '>'
    end_tag = '</'
    find_stag = html.find(start_tag)
    find_etag = html.find(end_tag)
    if start_tag == -1:
        return None
    find_stag = html.find(start_tag)
    find_etag = html.find(end_tag)
    content = html[find_stag + 1:find_etag]
    derp = []
    splitted=(content.split())
    return splitted


#################################
SECOND VARIATION (still need to separate the capitalised words)
#################################
from HTMLParser import HTMLParser

class MLStripper(HTMLParser):
    def __init__(self):
        self.reset()
        self.fed = []
    def handle_data(self, d):
        self.fed.append(d)
    def get_data(self):
        return ''.join(self.fed)

def remove_tags(html):
    s = MLStripper()
    s.feed(html)
    return s.get_data().split()

##### TEST CASES

print remove_tags('''<h1>Title</h1><p>This is a
                    <a href="http://www.udacity.com">link</a>.<p>''')
#>>> ['Title','This','is','a','link','.']

print remove_tags('''<table cellpadding='3'>
                     <tr><td>Hello</td><td>World!</td></tr>
                     </table>''')
#>>> ['Hello','World!']

print remove_tags("<hello><goodbye>")
#>>> []
	#################################
	FIRST VARIATION (only removes the first tag)
	#################################
	def remove_tags(html):
	start_tag = '>'
	end_tag = '</'
	find_stag = html.find(start_tag)
	find_etag = html.find(end_tag)
	if start_tag == -1:
	return None
	find_stag = html.find(start_tag)
	find_etag = html.find(end_tag)
	content = html[find_stag + 1:find_etag]
	derp = []
	splitted=(content.split())
	return splitted


	#################################
	SECOND VARIATION (still need to separate the capitalised words)
	#################################
	from HTMLParser import HTMLParser

	class MLStripper(HTMLParser):
	def __init__(self):
	self.reset()
	self.fed = []
	def handle_data(self, d):
	self.fed.append(d)
	def get_data(self):
	return ''.join(self.fed)

	def remove_tags(html):
	s = MLStripper()
	s.feed(html)
	return s.get_data().split()

	##### TEST CASES

	print remove_tags('''<h1>Title</h1><p>This is a
	<a href="http://www.udacity.com">link</a>.<p>''')
	#>>> ['Title','This','is','a','link','.']

	print remove_tags('''<table cellpadding='3'>
	<tr><td>Hello</td><td>World!</td></tr>
	</table>''')
	#>>> ['Hello','World!']

	print remove_tags("<hello><goodbye>")
	#>>> []