Skip to content

Instantly share code, notes, and snippets.

@vkurup
Created April 15, 2016 19:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vkurup/b476d1bd0f2681935789ef8ff7c69ab9 to your computer and use it in GitHub Desktop.
Save vkurup/b476d1bd0f2681935789ef8ff7c69ab9 to your computer and use it in GitHub Desktop.
import urllib
import boto3
import requests
print('Loading function')
s3 = boto3.client('s3')
# someday i will need to learn how to use AWS
s3_resource = boto3.resource('s3')
def lambda_handler(event, context):
# Get the object from the event
bucket = event['Records'][0]['s3']['bucket']['name']
key = urllib.unquote_plus(event['Records'][0]['s3']['object']['key']).decode('utf8')
destbucket = 'lambda-vinod-dest'
thousand_words_url = 'https://gist.githubusercontent.com/deekayen/4148741/raw/01c6252ccc5b5fb307c1bb899c95989a8a284616/1-1000.txt'
thousand_words = requests.get(thousand_words_url).content.split()
try:
response = s3.get_object(Bucket=bucket, Key=key)
content = response['Body'].read()
# how many words
word_list = content.split()
print('Word count: {}'.format(len(word_list)))
# sentiment analysis
r = requests.post('http://text-processing.com/api/sentiment/',
data={'text': content})
print(r.text)
# Remove any words not in the top 1000 and save to new bucket
fixed_content = ' '.join([word for word in content.split() if word.lower() in thousand_words])
s3_resource.Bucket(destbucket).put_object(Key='dest'+key, Body=fixed_content)
return
except Exception as e:
print(e)
print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket))
raise e
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment