Skip to content

Instantly share code, notes, and snippets.

@raulgarreta
Created February 10, 2015 16:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save raulgarreta/2f74ef00e733349052c1 to your computer and use it in GitHub Desktop.
Save raulgarreta/2f74ef00e733349052c1 to your computer and use it in GitHub Desktop.
MonkeyLearn & Import.io webinar
import json
import pandas as pd
import requests
# Your API key goes here.
API_KEY = ''
raw_df = pd.read_csv('linkedin-sf.csv', encoding='utf-8', skiprows=1,
error_bad_lines=False)
df = raw_df[['text_1', 'title_link/_text', 'snippet_text']]
df.columns = ['location', 'title', 'description']
content_df = list(df.title + ' ' + df.description)
categories = []
step = 150
for start in xrange(0, len(content_df), step):
end = start + step
response = requests.post(
"https://api.monkeylearn.com/api/v1/categorizer/cl_4PFzSWVR/classify_batch_text/",
data=json.dumps({
'text_list': content_df[start:end]
}),
headers={
'Authorization': 'Token {}'.format(API_KEY),
'Content-Type': 'application/json'
}
).json()
# We go through the results of the API call, storing the result on a list.
for category in response['result']:
categories.append(category[0]['label'])
augmented_df = df.join(pd.DataFrame(categories, columns=['category']))
augmented_df.to_csv('linkedin-sf-aug.csv', encoding='utf-8',
index=False, header=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment