ivh/getemails.py

## getemails.py
#!/usr/bin/env python

import codecs
import lxml.html

out=codecs.open('names_emails.dat','w','utf-8')
out2=open('emails.dat','w')
for link in open('comlinks.txt'):
    html=lxml.html.parse(link)
    d=html.xpath('.//div[@class="profile_box"]')[0]
    dots=d.findall('.//img[@alt="."]')
    for dot in dots: dot.text='.'
    ats=d.findall('.//img[@alt="@"]')
    for at in ats: at.text='@'
    lxml.html.etree.strip_tags(d,'img')
    x,name,x,ema=d.getchildren()[:4]
    ema=[t for t in ema.itertext()]
    for i,s in enumerate(ema):
        if s.startswith('Email'):
            email=s.split()[-1]
            break;
    final='"%s" <%s>'%(name.text,email)
    print final
    out.write(final)
    out.write('\n')
    out2.write('%s\n'%email)
    name,email='',''
	#!/usr/bin/env python

	import codecs
	import lxml.html

	out=codecs.open('names_emails.dat','w','utf-8')
	out2=open('emails.dat','w')
	for link in open('comlinks.txt'):
	html=lxml.html.parse(link)
	d=html.xpath('.//div[@class="profile_box"]')[0]
	dots=d.findall('.//img[@alt="."]')
	for dot in dots: dot.text='.'
	ats=d.findall('.//img[@alt="@"]')
	for at in ats: at.text='@'
	lxml.html.etree.strip_tags(d,'img')
	x,name,x,ema=d.getchildren()[:4]
	ema=[t for t in ema.itertext()]
	for i,s in enumerate(ema):
	if s.startswith('Email'):
	email=s.split()[-1]
	break;
	final='"%s" <%s>'%(name.text,email)
	print final
	out.write(final)
	out.write('\n')
	out2.write('%s\n'%email)
	name,email='',''