Skip to content

Instantly share code, notes, and snippets.

@AdroitAnandAI
Last active June 11, 2021 17:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AdroitAnandAI/9febfdb1bab17c277b49f254f549b9bf to your computer and use it in GitHub Desktop.
Save AdroitAnandAI/9febfdb1bab17c277b49f254f549b9bf to your computer and use it in GitHub Desktop.
Read S3 files from Spark EMR Notebook
import os
import boto3
import pandas as pd
import sys
if sys.version_info[0] < 3:
from StringIO import StringIO # Python 2.x
else:
from io import StringIO # Python 3.x
######################################################
# set your credentials
aws_id = '*********************'
aws_secret = '*********************'
client = boto3.client('s3', aws_access_key_id=aws_id,
aws_secret_access_key=aws_secret)
def openFile(fileName):
csv_obj = client.get_object(Bucket='spark-anand', Key=fileName)
body = csv_obj['Body']
csv_string = body.read().decode('utf-8')
df = pd.read_csv(StringIO(csv_string))
return df
######################################################
# Read the s&p 500 input data set and sorting based on date.
observed = openFile("observed_1.csv")
observed.head()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment