Codercise/gist:2848914

## gistfile1.txt
# Question 4: Remove Tags

# When we add our words to the index, we don't really want to include
# html tags such as <body>, <head>, <table>, <a href="..."> and so on.

# Write a procedure, remove_tags, that takes as input a string and returns
# a list of words, in order, with the tags removed. Tags are defined to be
# strings surrounded by < >. Words are separated by whitespace or tags.
# You may assume the input does not include any unclosed tags, that is,
# there will be no '<' without a following '>'.
def remove_tags(html):
    start_tag = '>'
    end_tag = '</'
    find_stag = html.find(start_tag)
    find_etag = html.find(end_tag)
    if start_tag == -1:
        return None
    find_stag = html.find(start_tag)
    find_etag = html.find(end_tag)
    content = html[find_stag + 1:find_etag]
    derp = []
    splitted=(content.split())
    return splitted

def remove_tags(html):
    return html.strip('<></')

print remove_tags('''<h1>Title</h1><p>This is a
                    <a href="http://www.udacity.com">link</a>.<p>''')
#>>> ['Title','This','is','a','link','.']

print remove_tags('''<table cellpadding='3'>
                     <tr><td>Hello</td><td>World!</td></tr>
                     </table>''')
#>>> ['Hello','World!']

print remove_tags("<hello><goodbye>")
#>>> []
	# Question 4: Remove Tags

	# When we add our words to the index, we don't really want to include
	# html tags such as <body>, <head>, <table>, <a href="..."> and so on.

	# Write a procedure, remove_tags, that takes as input a string and returns
	# a list of words, in order, with the tags removed. Tags are defined to be
	# strings surrounded by < >. Words are separated by whitespace or tags.
	# You may assume the input does not include any unclosed tags, that is,
	# there will be no '<' without a following '>'.
	def remove_tags(html):
	start_tag = '>'
	end_tag = '</'
	find_stag = html.find(start_tag)
	find_etag = html.find(end_tag)
	if start_tag == -1:
	return None
	find_stag = html.find(start_tag)
	find_etag = html.find(end_tag)
	content = html[find_stag + 1:find_etag]
	derp = []
	splitted=(content.split())
	return splitted

	def remove_tags(html):
	return html.strip('<></')

	print remove_tags('''<h1>Title</h1><p>This is a
	<a href="http://www.udacity.com">link</a>.<p>''')
	#>>> ['Title','This','is','a','link','.']

	print remove_tags('''<table cellpadding='3'>
	<tr><td>Hello</td><td>World!</td></tr>
	</table>''')
	#>>> ['Hello','World!']

	print remove_tags("<hello><goodbye>")
	#>>> []