Created
April 15, 2016 19:09
-
-
Save vkurup/b476d1bd0f2681935789ef8ff7c69ab9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib | |
import boto3 | |
import requests | |
print('Loading function') | |
s3 = boto3.client('s3') | |
# someday i will need to learn how to use AWS | |
s3_resource = boto3.resource('s3') | |
def lambda_handler(event, context): | |
# Get the object from the event | |
bucket = event['Records'][0]['s3']['bucket']['name'] | |
key = urllib.unquote_plus(event['Records'][0]['s3']['object']['key']).decode('utf8') | |
destbucket = 'lambda-vinod-dest' | |
thousand_words_url = 'https://gist.githubusercontent.com/deekayen/4148741/raw/01c6252ccc5b5fb307c1bb899c95989a8a284616/1-1000.txt' | |
thousand_words = requests.get(thousand_words_url).content.split() | |
try: | |
response = s3.get_object(Bucket=bucket, Key=key) | |
content = response['Body'].read() | |
# how many words | |
word_list = content.split() | |
print('Word count: {}'.format(len(word_list))) | |
# sentiment analysis | |
r = requests.post('http://text-processing.com/api/sentiment/', | |
data={'text': content}) | |
print(r.text) | |
# Remove any words not in the top 1000 and save to new bucket | |
fixed_content = ' '.join([word for word in content.split() if word.lower() in thousand_words]) | |
s3_resource.Bucket(destbucket).put_object(Key='dest'+key, Body=fixed_content) | |
return | |
except Exception as e: | |
print(e) | |
print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket)) | |
raise e |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment