Skip to content

Instantly share code, notes, and snippets.

@john9631
Created October 6, 2013 04:36
Show Gist options
  • Save john9631/6849570 to your computer and use it in GitHub Desktop.
Save john9631/6849570 to your computer and use it in GitHub Desktop.
R.Ass.2
{
"metadata": {
"name": "R Programming Assignment 1"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": "CfDA R Course Programming Assignment 2"
},
{
"cell_type": "code",
"collapsed": false,
"input": "import pandas as pd\nimport numpy as np\nimport scipy\n\ndirectory = \"/home/john/Moocs/Computing for Data Analysis JH Coursera/L2/specdata\"\nID = range(1,333)",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": "def getmonitor(id, directory = \"\", summary = False):\n \n df = pd.read_csv('{0}/{1:03.0f}.csv'.format(directory, int(id)))\n if summary: \n print df.describe()\n return df\n\n# getmonitor(\"1\", directory, True)",
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": "def complete(directory, id=range(1,332)):\n \n data = pd.DataFrame(columns = [\"nobs\"], index = [id])\n for i in id: \n df = getmonitor(id = i, directory = directory)\n data.ix[i] = len(df.dropna())\n return data\n\nprint complete(directory, [1,20,31,50]) ",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": " nobs\n1 117\n20 124\n31 483\n50 459\n"
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": "def corr(directory, threshold = 0): \n # build df then extract indexes of rows > threshold na values\n df = complete(directory)\n filelist = df[df['nobs'] > threshold].index\n\n corrs = [] \n for i in filelist:\n corrs.append(getmonitor(i,directory).corr(method='pearson').ix[0,1])\n return corrs\n\n\ncr = corr(directory, 150)\nprint cr[0:6], '\\n'\nprint pd.DataFrame(cr).describe()",
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": "[-0.018957540970254896, -0.14051254401589205, -0.043897372138784689, -0.06815956229777316, -0.12350666584148721, -0.075888144218988859] \n\n 0\ncount 234.000000\nmean 0.125253\nstd 0.218957\nmin -0.210568\n25% -0.049993\n50% 0.094626\n75% 0.268445\nmax 0.763129\n"
}
],
"prompt_number": 4
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment