xecgr/upwork_advanced_filters.py

## upwork_advanced_filters.py
import feedparser,requests,bs4
from datetime import datetime,timedelta
import sys,os,re
from time import mktime
from smtplib import SMTP_SSL as SMTP       # this invokes the secure SMTP protocol (port 465, uses SSL)
# from smtplib import SMTP                  # use this for standard SMTP protocol   (port 25, no encryption)
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText

##mail config
SMTPserver = 'authsmtp.mailserver.com'
USERNAME   = "username@mailserver.com"
PASSWORD   = "your_password"
sender     = 'your@email.com'

subject="[UPWORK] Sumary"
destination = sender

#upwork config
feed_url = 'https://www.upwork.com/ab/feed/topics/rss?securityToken=AUTOGENERATED_TOKEN'
login_url = 'https://www.upwork.com/ab/account-security/login'
username = 'upwork_username'
pwd      = 'upwork_password'
yesterday = datetime.now().date()- timedelta(days=1)


filters = {
    'has_no_job_score': {
        'not_present' : True,
        'element': 'span',
        'kwargs' : {'text' : 'Job Success Score:', 'class_' : 'text-muted'}
    },
    #'has_rising_talent': {
    #    'element': 'span',
    #    'kwargs' : {'text' : 'Rising Talent:', 'class_' : 'text-muted'}
    #},
    'is_not_hired': {
        'not_present' : True,
        'element': 'span',
        'kwargs' : {'text' : 'Hired:', 'class_' : 'text-muted'}
    },
    'avg_hour': {
        'avg_hour__gt' : 15,
        'element': 'span',
        'kwargs' : {'text' : '/hr', 'class_' : 'text-muted'}
    }
    #feel free to implement more filters
}


def send_mail(jobs, extra_columns = []):
    fields = ['title','budget','summary','link'] + extra_columns
    ths = u'\n'.join(
        [u"<th>{}</th>".format(f.capitalize()) for f in fields ]
    )
    jobs_table=u"<table><tr>"+ths+"</tr>{}</table>"

    job_rows = []
    for job in jobs:
        row = u"<tr>"
        for f in fields:
            value = job.get(f,None) or u''
            row +=u"<td>{}</td>".format(value)
        row += u"</tr>"
        job_rows.append(row)
    jobs_table = jobs_table.format(u'\n'.join(job_rows))

    try:
        msg = MIMEMultipart('alternative')
        msg['Subject']=       subject
        msg['From']   = sender # some SMTP servers will do this automatically, not all
        text = "Hi!\nHow are you?\nHere is the link you wanted:\nhttp://www.python.org"
        html = u"""\
        <html>
          <head></head>
          <body>
            {}
          </body>
        </html>
        """.format(jobs_table)
        # Record the MIME types of both parts - text/plain and text/html.
        part1 = MIMEText(text, "plain", "utf-8")
        part2 = MIMEText(html, 'html', "utf-8")
        # Attach parts into message container.
        # According to RFC 2046, the last part of a multipart message, in this case
        # the HTML message, is best and preferred.
        msg.attach(part1)
        msg.attach(part2)
        conn = SMTP(SMTPserver)
        conn.set_debuglevel(False)
        conn.login(USERNAME, PASSWORD)
        try:
            conn.sendmail(sender, destination, msg.as_string())
        finally:
            conn.quit()
    except Exception, exc:
        raise#sys.exit( "mail failed; %s" % str(exc) ) # give a error message


s = requests.session()
#get login page, to generate current token
headers = {
    'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Encoding' : 'gzip, deflate, sdch, br',
    'Accept-Language' : 'es,ca;q=0.8,en;q=0.6',
    'Connection' : 'keep-alive',
    'Upgrade-Insecure-Requests' : '1',
    'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36',
}

login_page = s.get(login_url,headers=headers)
soup = bs4.BeautifulSoup(login_page.text, "html.parser")
login_token = soup.find('input',id='login__token').attrs['value']
login_iovation = ''

payload = {    'login[username]' : username,
    'login[password]' : pwd,
    'login[redir]' : '/home',
    'login[_token]' : login_token,
    'login[iovation]' : login_iovation
}
#login
r = s.post(login_url,payload)
feed = feedparser.parse(feed_url)
jobs = []
for item in feed['items']:
    job_dt = datetime.fromtimestamp(mktime(item.published_parsed)).date()
    if job_dt<yesterday : continue
    link = item.get('link','')
    #get budget and slice description
    budget = re.findall(u'<b>Budget<\/b>: \$(\d+)',item['description'])
    if budget:
        item['budget'] = budget[0]
    item['description'] = item['description'][:100]+"..."
    if link:
        r            = s.get(link)
        soup = bs4.BeautifulSoup(r.text, "html.parser")
        matched_filters = {}
        for filter,config in filters.iteritems():
            element, kwargs,not_present,avg_hour__gt = config.get('element',''),config.get('kwargs',{}),config.get('not_present',{}),config.get('avg_hour__gt',{})
            if element or kwargs:
                values = soup.find_all(element,**kwargs)
                #we search by not presence of field
                if not_present:
                    matched_filters[filter] = not values
                else:
                    #avg hour filtering
                    if not budget and avg_hour__gt:
                        if values:
                            avg_rate = values[0].parent.get_text()
                            matches  = re.findall(r'(\$?\d+\.\d+)', avg_rate)
                            not_match = matches and float(matches[0].replace("$",""))> avg_hour__gt
                            item['budget'] = matches[0]+"/h"
                            matched_filters[filter] = not not_match
                        else:
                            #if it's a new contractor and has no avg hour or it's a pfixed price project
                            matched_filters[filter] = True
                    else:
                        #default case: only check presence
                        matched_filters[filter] = bool(values)
                #set filter value to allow it appear in summary mail
                item[filter] = matched_filters[filter]
        if all(matched_filters.values()):
            jobs.append(item)
#send summary mail, with those jobs that match with all filters
if jobs:
    send_mail(jobs,extra_columns=filters.keys())
	import feedparser,requests,bs4
	from datetime import datetime,timedelta
	import sys,os,re
	from time import mktime
	from smtplib import SMTP_SSL as SMTP # this invokes the secure SMTP protocol (port 465, uses SSL)
	# from smtplib import SMTP # use this for standard SMTP protocol (port 25, no encryption)
	from email.mime.multipart import MIMEMultipart
	from email.mime.text import MIMEText

	##mail config
	SMTPserver = 'authsmtp.mailserver.com'
	USERNAME = "username@mailserver.com"
	PASSWORD = "your_password"
	sender = 'your@email.com'

	subject="[UPWORK] Sumary"
	destination = sender

	#upwork config
	feed_url = 'https://www.upwork.com/ab/feed/topics/rss?securityToken=AUTOGENERATED_TOKEN'
	login_url = 'https://www.upwork.com/ab/account-security/login'
	username = 'upwork_username'
	pwd = 'upwork_password'
	yesterday = datetime.now().date()- timedelta(days=1)


	filters = {
	'has_no_job_score': {
	'not_present' : True,
	'element': 'span',
	'kwargs' : {'text' : 'Job Success Score:', 'class_' : 'text-muted'}
	},
	#'has_rising_talent': {
	# 'element': 'span',
	# 'kwargs' : {'text' : 'Rising Talent:', 'class_' : 'text-muted'}
	#},
	'is_not_hired': {
	'not_present' : True,
	'element': 'span',
	'kwargs' : {'text' : 'Hired:', 'class_' : 'text-muted'}
	},
	'avg_hour': {
	'avg_hour__gt' : 15,
	'element': 'span',
	'kwargs' : {'text' : '/hr', 'class_' : 'text-muted'}
	}
	#feel free to implement more filters
	}



	def send_mail(jobs, extra_columns = []):
	fields = ['title','budget','summary','link'] + extra_columns
	ths = u'\n'.join(
	[u"<th>{}</th>".format(f.capitalize()) for f in fields ]
	)
	jobs_table=u"<table><tr>"+ths+"</tr>{}</table>"

	job_rows = []
	for job in jobs:
	row = u"<tr>"
	for f in fields:
	value = job.get(f,None) or u''
	row +=u"<td>{}</td>".format(value)
	row += u"</tr>"
	job_rows.append(row)
	jobs_table = jobs_table.format(u'\n'.join(job_rows))

	try:
	msg = MIMEMultipart('alternative')
	msg['Subject']= subject
	msg['From'] = sender # some SMTP servers will do this automatically, not all
	text = "Hi!\nHow are you?\nHere is the link you wanted:\nhttp://www.python.org"
	html = u"""\
	<html>
	<head></head>
	<body>
	{}
	</body>
	</html>
	""".format(jobs_table)
	# Record the MIME types of both parts - text/plain and text/html.
	part1 = MIMEText(text, "plain", "utf-8")
	part2 = MIMEText(html, 'html', "utf-8")
	# Attach parts into message container.
	# According to RFC 2046, the last part of a multipart message, in this case
	# the HTML message, is best and preferred.
	msg.attach(part1)
	msg.attach(part2)
	conn = SMTP(SMTPserver)
	conn.set_debuglevel(False)
	conn.login(USERNAME, PASSWORD)
	try:
	conn.sendmail(sender, destination, msg.as_string())
	finally:
	conn.quit()
	except Exception, exc:
	raise#sys.exit( "mail failed; %s" % str(exc) ) # give a error message











	s = requests.session()
	#get login page, to generate current token
	headers = {
	'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8',
	'Accept-Encoding' : 'gzip, deflate, sdch, br',
	'Accept-Language' : 'es,ca;q=0.8,en;q=0.6',
	'Connection' : 'keep-alive',
	'Upgrade-Insecure-Requests' : '1',
	'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36',
	}

	login_page = s.get(login_url,headers=headers)
	soup = bs4.BeautifulSoup(login_page.text, "html.parser")
	login_token = soup.find('input',id='login__token').attrs['value']
	login_iovation = ''

	payload = { 'login[username]' : username,
	'login[password]' : pwd,
	'login[redir]' : '/home',
	'login[_token]' : login_token,
	'login[iovation]' : login_iovation
	}
	#login
	r = s.post(login_url,payload)
	feed = feedparser.parse(feed_url)
	jobs = []
	for item in feed['items']:
	job_dt = datetime.fromtimestamp(mktime(item.published_parsed)).date()
	if job_dt<yesterday : continue
	link = item.get('link','')
	#get budget and slice description
	budget = re.findall(u'<b>Budget<\/b>: \$(\d+)',item['description'])
	if budget:
	item['budget'] = budget[0]
	item['description'] = item['description'][:100]+"..."
	if link:
	r = s.get(link)
	soup = bs4.BeautifulSoup(r.text, "html.parser")
	matched_filters = {}
	for filter,config in filters.iteritems():
	element, kwargs,not_present,avg_hour__gt = config.get('element',''),config.get('kwargs',{}),config.get('not_present',{}),config.get('avg_hour__gt',{})
	if element or kwargs:
	values = soup.find_all(element,**kwargs)
	#we search by not presence of field
	if not_present:
	matched_filters[filter] = not values
	else:
	#avg hour filtering
	if not budget and avg_hour__gt:
	if values:
	avg_rate = values[0].parent.get_text()
	matches = re.findall(r'(\$?\d+\.\d+)', avg_rate)
	not_match = matches and float(matches[0].replace("$",""))> avg_hour__gt
	item['budget'] = matches[0]+"/h"
	matched_filters[filter] = not not_match
	else:
	#if it's a new contractor and has no avg hour or it's a pfixed price project
	matched_filters[filter] = True
	else:
	#default case: only check presence
	matched_filters[filter] = bool(values)
	#set filter value to allow it appear in summary mail
	item[filter] = matched_filters[filter]
	if all(matched_filters.values()):
	jobs.append(item)
	#send summary mail, with those jobs that match with all filters
	if jobs:
	send_mail(jobs,extra_columns=filters.keys())