Chitrank-Dixit/extract_mail.py

## extract_mail.py
# this is the correct program
import re
import urllib2
# get_next_target() takes a page and checks for the positions of the links
def get_next_target(page):

    match=re.findall(r'[\w.-]+@[\w.-]+',page)

    if match:
        return match
    '''start_link=page.find('<a href=')
    if start_link == -1:
        return None,0
    start_quote=page.find('"',start_link)
    end_quote=page.find('"',start_quote+1)
    url=page[start_quote+1:end_quote]
    return url,end_quote
    '''

def get_page(s):
    sourceFile = urllib2.urlopen(s)
    print_all_links(sourceFile.read())


def print_all_links(page):
    url=get_next_target(page)
    if url:
        return url
    else:
        return None


print print_all_links('hi this is a chitrankdixit@hotmail. "string" with no chitrankdixit@gmail.com link chitrankdixit@yahoo.com')


#url, endpos = print_all_links('this is a <a href="http://udacity.com">link!</a> MOre things happens here with all goes good <a href="http://www.trackleech.in/">')
#print (url,endpos)


'''url, endpos = print_all_links(get_page("http://www.xkcd.com/"))
print (url,endpos)'''
#url,endpos=get_page('http://www.xkcd.com/')
#print url,endpos
	# this is the correct program
	import re
	import urllib2
	# get_next_target() takes a page and checks for the positions of the links
	def get_next_target(page):

	match=re.findall(r'[\w.-]+@[\w.-]+',page)

	if match:
	return match
	'''start_link=page.find('<a href=')
	if start_link == -1:
	return None,0
	start_quote=page.find('"',start_link)
	end_quote=page.find('"',start_quote+1)
	url=page[start_quote+1:end_quote]
	return url,end_quote
	'''

	def get_page(s):
	sourceFile = urllib2.urlopen(s)
	print_all_links(sourceFile.read())





	def print_all_links(page):
	url=get_next_target(page)
	if url:
	return url
	else:
	return None



	print print_all_links('hi this is a chitrankdixit@hotmail. "string" with no chitrankdixit@gmail.com link chitrankdixit@yahoo.com')


	#url, endpos = print_all_links('this is a <a href="http://udacity.com">link!</a> MOre things happens here with all goes good <a href="http://www.trackleech.in/">')
	#print (url,endpos)


	'''url, endpos = print_all_links(get_page("http://www.xkcd.com/"))
	print (url,endpos)'''
	#url,endpos=get_page('http://www.xkcd.com/')
	#print url,endpos