erikrichardlarson/quick_detect.py

## quick_detect.py
from marketorestpython.client import MarketoClient
import pandas as pd
import numpy as np

class MarketoQueue:

    def __init__(self, marketo_list_id='', munchkin_id='',
                 marketo_client_id='', marketo_client_secret='', fields=[]):

        self.munchkin_id = munchkin_id
        self.marketo_client_id = marketo_client_id
        self.marketo_client_secret = marketo_client_secret
        self.marketo_list = marketo_list_id
        self.fields = fields
        self.mc = MarketoClient(self.munchkin_id, self.marketo_client_id, self.marketo_client_secret)


    def _build_queue(self):

        leads = self.mc.execute(method='get_multiple_leads_by_list_id',
                        listId=self.marketo_list,
                        fields=self.fields)

        self.lead_df = pd.DataFrame.from_dict(leads)


    def _format_queue(self):

        self.lead_df['fullName'] = self.lead_df['firstName'] + ' ' + self.lead_df['lastName']


    def receive_leads(self):
        self._build_queue()
        self._format_queue()


    def _label_spam(self):

        blacklisted_companies = pd.read_csv('./blacklisted_companies.txt').company.tolist()
        self.lead_df['blacklisted_companies'] = np.where(self.lead_df.company.isin(blacklisted_companies), 1, 0)

        blacklisted_names = pd.read_csv('./blacklisted_names.txt').fullName.tolist()
        self.lead_df['blacklisted_names'] = np.where(self.lead_df.company.isin(blacklisted_names), 1, 0)

        profane_words = pd.read_csv('./data/profane_words.txt').words.tolist()
        self.lead_df['contains_profanity'] = np.where(self.lead_df.company.str.contains('|'.join(profane_words)), 1, 0)

        self.lead_df['short_company_name'] = np.where(self.lead_df.company.str.len() == 1, 1, 0)
        self.lead_df['numbers_in_name'] = np.where((self.lead_df['lastName'].str.count('\d+') \
                                                    + self.lead_df['firstName'].str.count('\d+')) > 1, 1, 0)
        self.lead_df['first_last_null'] = np.where((self.lead_df.firstName.isnull()) & (self.lead_df.lastName.isnull()), 1, 0)
        self.lead_df['first_equals_last'] = np.where(self.lead_df.firstName == self.lead_df.lastName, 1, 0)

    def score_leads(self.lead_df):

        self._label_spam()
        self.lead_df.set_index('id', inplace=True)
        self.lead_df['spam_score'] = (self.lead_df[[col for col in self.lead_df.columns if col not in self.fields]].sum(axis=1))
        self.spam_leads = self.lead_df[self.lead_df['spam_score'] >= 1]


def quick_detect():

    creds = utils.load_creds()

    marketo_queue = MarketoQueue(marketo_list_id='',
                          munchkin_id='',
                          marketo_client_id='',
                          marketo_client_secret='',
                          fields=['id', 'email', 'firstName',
                                 'lastName', 'company'])

    marketo_queue.receive_leads()
    marketo_queue.score_leads()

    for record in marketo_queue.spam_leads.to_dict('records'):
        # Update each Marketo record accordingly here
        print(record)


if __name__ == '__main__':

    quick_detect()
	from marketorestpython.client import MarketoClient
	import pandas as pd
	import numpy as np

	class MarketoQueue:

	def __init__(self, marketo_list_id='', munchkin_id='',
	marketo_client_id='', marketo_client_secret='', fields=[]):

	self.munchkin_id = munchkin_id
	self.marketo_client_id = marketo_client_id
	self.marketo_client_secret = marketo_client_secret
	self.marketo_list = marketo_list_id
	self.fields = fields
	self.mc = MarketoClient(self.munchkin_id, self.marketo_client_id, self.marketo_client_secret)


	def _build_queue(self):

	leads = self.mc.execute(method='get_multiple_leads_by_list_id',
	listId=self.marketo_list,
	fields=self.fields)

	self.lead_df = pd.DataFrame.from_dict(leads)


	def _format_queue(self):

	self.lead_df['fullName'] = self.lead_df['firstName'] + ' ' + self.lead_df['lastName']


	def receive_leads(self):
	self._build_queue()
	self._format_queue()


	def _label_spam(self):

	blacklisted_companies = pd.read_csv('./blacklisted_companies.txt').company.tolist()
	self.lead_df['blacklisted_companies'] = np.where(self.lead_df.company.isin(blacklisted_companies), 1, 0)

	blacklisted_names = pd.read_csv('./blacklisted_names.txt').fullName.tolist()
	self.lead_df['blacklisted_names'] = np.where(self.lead_df.company.isin(blacklisted_names), 1, 0)

	profane_words = pd.read_csv('./data/profane_words.txt').words.tolist()
	self.lead_df['contains_profanity'] = np.where(self.lead_df.company.str.contains('\|'.join(profane_words)), 1, 0)

	self.lead_df['short_company_name'] = np.where(self.lead_df.company.str.len() == 1, 1, 0)
	self.lead_df['numbers_in_name'] = np.where((self.lead_df['lastName'].str.count('\d+') \
	+ self.lead_df['firstName'].str.count('\d+')) > 1, 1, 0)
	self.lead_df['first_last_null'] = np.where((self.lead_df.firstName.isnull()) & (self.lead_df.lastName.isnull()), 1, 0)
	self.lead_df['first_equals_last'] = np.where(self.lead_df.firstName == self.lead_df.lastName, 1, 0)

	def score_leads(self.lead_df):

	self._label_spam()
	self.lead_df.set_index('id', inplace=True)
	self.lead_df['spam_score'] = (self.lead_df[[col for col in self.lead_df.columns if col not in self.fields]].sum(axis=1))
	self.spam_leads = self.lead_df[self.lead_df['spam_score'] >= 1]


	def quick_detect():

	creds = utils.load_creds()

	marketo_queue = MarketoQueue(marketo_list_id='',
	munchkin_id='',
	marketo_client_id='',
	marketo_client_secret='',
	fields=['id', 'email', 'firstName',
	'lastName', 'company'])

	marketo_queue.receive_leads()
	marketo_queue.score_leads()

	for record in marketo_queue.spam_leads.to_dict('records'):
	# Update each Marketo record accordingly here
	print(record)


	if __name__ == '__main__':

	quick_detect()