Skip to content

Instantly share code, notes, and snippets.

@erikrichardlarson
Created November 26, 2018 00:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save erikrichardlarson/85c642fffc3ac78c7920bb48ecf18194 to your computer and use it in GitHub Desktop.
Save erikrichardlarson/85c642fffc3ac78c7920bb48ecf18194 to your computer and use it in GitHub Desktop.
from marketorestpython.client import MarketoClient
import pandas as pd
import numpy as np
class MarketoQueue:
def __init__(self, marketo_list_id='', munchkin_id='',
marketo_client_id='', marketo_client_secret='', fields=[]):
self.munchkin_id = munchkin_id
self.marketo_client_id = marketo_client_id
self.marketo_client_secret = marketo_client_secret
self.marketo_list = marketo_list_id
self.fields = fields
self.mc = MarketoClient(self.munchkin_id, self.marketo_client_id, self.marketo_client_secret)
def _build_queue(self):
leads = self.mc.execute(method='get_multiple_leads_by_list_id',
listId=self.marketo_list,
fields=self.fields)
self.lead_df = pd.DataFrame.from_dict(leads)
def _format_queue(self):
self.lead_df['fullName'] = self.lead_df['firstName'] + ' ' + self.lead_df['lastName']
def receive_leads(self):
self._build_queue()
self._format_queue()
def _label_spam(self):
blacklisted_companies = pd.read_csv('./blacklisted_companies.txt').company.tolist()
self.lead_df['blacklisted_companies'] = np.where(self.lead_df.company.isin(blacklisted_companies), 1, 0)
blacklisted_names = pd.read_csv('./blacklisted_names.txt').fullName.tolist()
self.lead_df['blacklisted_names'] = np.where(self.lead_df.company.isin(blacklisted_names), 1, 0)
profane_words = pd.read_csv('./data/profane_words.txt').words.tolist()
self.lead_df['contains_profanity'] = np.where(self.lead_df.company.str.contains('|'.join(profane_words)), 1, 0)
self.lead_df['short_company_name'] = np.where(self.lead_df.company.str.len() == 1, 1, 0)
self.lead_df['numbers_in_name'] = np.where((self.lead_df['lastName'].str.count('\d+') \
+ self.lead_df['firstName'].str.count('\d+')) > 1, 1, 0)
self.lead_df['first_last_null'] = np.where((self.lead_df.firstName.isnull()) & (self.lead_df.lastName.isnull()), 1, 0)
self.lead_df['first_equals_last'] = np.where(self.lead_df.firstName == self.lead_df.lastName, 1, 0)
def score_leads(self.lead_df):
self._label_spam()
self.lead_df.set_index('id', inplace=True)
self.lead_df['spam_score'] = (self.lead_df[[col for col in self.lead_df.columns if col not in self.fields]].sum(axis=1))
self.spam_leads = self.lead_df[self.lead_df['spam_score'] >= 1]
def quick_detect():
creds = utils.load_creds()
marketo_queue = MarketoQueue(marketo_list_id='',
munchkin_id='',
marketo_client_id='',
marketo_client_secret='',
fields=['id', 'email', 'firstName',
'lastName', 'company'])
marketo_queue.receive_leads()
marketo_queue.score_leads()
for record in marketo_queue.spam_leads.to_dict('records'):
# Update each Marketo record accordingly here
print(record)
if __name__ == '__main__':
quick_detect()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment