Skip to content

Instantly share code, notes, and snippets.

@neerav1985
Created December 13, 2013 16:34
Show Gist options
  • Save neerav1985/7947014 to your computer and use it in GitHub Desktop.
Save neerav1985/7947014 to your computer and use it in GitHub Desktop.
Python: Fetch data from an Oracle and Pandas for data analysis
import cx_Oracle
import logging
logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=logging.DEBUG)
if __name__ == "__main__":
conn = cx_Oracle.connect(user,password,server)
cursor = conn.cursor()
cursor.arraysize = 10000
for city in range("New York", "San Francisco", "Chicago", "Boston"):
logging.info("Getting data for city:" + str(city))
#sql to be executed
sql = "select city, employee_age, employee_gender, employment_age, salary from employee_salary where city = " + str(city)
cursor.execute( sql )
#create dataframe from the data returned
df = DataFrame(cursor.fetchall())
#map the salary value to a lambda function to create the bucket
# the lambda function will create bucket in a step of 50000. For e.g. 0<x<50000, 50000<x<100000 and store in the new column 'bucket'
df['bucket'] = df[4].map(lambda x:str(int(math.floor(x/50000.0)*50000)) + "< x <" + str(int(math.ceil(x/50000.0)*50000)))
#group by
df2 = df.groupby([0,1,2,3,'bucket'])
#aggregate and calculate sum, mean and count
final = DataFrame({'sum(salary)':df2[4].sum(),'mean(salary)':df2[4].mean(), 'count(salary)':df2[4].size()}).reset_index()
logging.info("Writing data to excel for a city:" + str(city))
#write to excel
final.to_excel(writer,'Sheet' + str(city))
writer.save()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment