Skip to content

Instantly share code, notes, and snippets.

@charliejllewellyn
Created May 17, 2019 13:17
Show Gist options
  • Save charliejllewellyn/8418325a32af2bb0aa76248a87683497 to your computer and use it in GitHub Desktop.
Save charliejllewellyn/8418325a32af2bb0aa76248a87683497 to your computer and use it in GitHub Desktop.
import re
#from email.parser import HeaderParser
import email
import json
import operator
client = boto3.client('firehose')
client2 = boto3.client('comprehend')
DataSet = []
def docGenerator(df):
record = {'Data': bytes(json.dumps(Data))}
response = client.put_record(
DeliveryStreamName='enronemail',
Record = record
)
#print(response)
def getHighestVal(data):
if data == []:
return []
else:
newArray = {}
for item in data:
newArray[item['Text']] = item['Score']
return max(newArray.iteritems(), key=operator.itemgetter(1))[0]
def runComp(Data):
Msg = Data['Message'][:5000]
response = client2.detect_entities(
Text=Msg,
LanguageCode='en'
)
Data['Entities'] = getHighestVal(response['Entities'])
response = client2.detect_key_phrases(
Text=Msg,
LanguageCode='en'
)
Data['KeyPhrases'] = getHighestVal(response['KeyPhrases'])
response = client2.detect_sentiment(
Text=Msg,
LanguageCode='en'
)
Data['Sentiment'] = response['Sentiment']
return Data
for index, row in df.iterrows():
msg = email.message_from_string(row["message"])
Data = {}
Data['Date'] = msg['Date']
Data['To'] = msg['To']
Data['From'] = msg['From']
Data['Subject'] = msg['Subject']
if msg.is_multipart():
for payload in msg.get_payload():
Data['Message'] = payload.get_payload()
else:
Data['Message'] = msg.get_payload()
Data = runComp(Data)
docGenerator(Data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment