Skip to content

Instantly share code, notes, and snippets.

@john9631
Created October 15, 2013 22:00
Show Gist options
  • Save john9631/6999320 to your computer and use it in GitHub Desktop.
Save john9631/6999320 to your computer and use it in GitHub Desktop.
Correlation Exercise: Programming Assignment 2 / 3, Computing for Data Analysis, Coursera JH
{
"metadata": {
"language": "Julia",
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"using DataFrames\n",
"ID = [1:332]\n",
"sizeof(ID)/sizeof(ID[1])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 2,
"text": [
"332.0"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"function corr(directory, threshold = 0)\n",
" pcorr = Array(Float64,0)\n",
" for (i = 1:332) \n",
" df = readtable(@sprintf(\"%s/%03d.csv\",directory,i))\n",
" v2 = Array(Float64,0) \n",
" v3 = Array(Float64,0)\n",
" for j = 1:size(df)[1]\n",
" if(!isna(df[j,2]) & !isna(df[j,3]))\n",
" push!(v2, df[j,2])\n",
" push!(v3, df[j,3])\n",
" end\n",
" end\n",
" size(v2)[1] > threshold && push!(pcorr, cor(v2,v3)) \n",
" end\n",
" return(pcorr)\n",
"end\n",
"\n",
"pcorr = corr(\"specdata\")\n",
"@time corr(\"specdata\") ;"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"elapsed time: 1"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
".676303646 seconds (511259896 bytes allocated)\n"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
" function corr2(directory, threshold = 0)\n",
" pcorr = Array(Float64,0)\n",
" for (i = 1:332) \n",
" df = readcsv(@sprintf(\"%s/%03d.csv\",directory,i),has_header=true)\n",
" v2 = Array(Float64,0) \n",
" v3 = Array(Float64,0)\n",
" for j = 1:size(df[1],1)\n",
" if((df[1][j,2] != \"NA\") & (df[1][j,3] != \"NA\"))\n",
" push!(v2, float(df[1][j,2]))\n",
" push!(v3, float(df[1][j,3]))\n",
" end\n",
" end\n",
" size(v2)[1] > threshold && push!(pcorr, cor(v2,v3)) \n",
" end\n",
" return(pcorr)\n",
" end\n",
" \n",
" pcorr = corr2(\"specdata\",400)\n",
" @time corr2(\"specdata\") ;"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"elapsed time: 1"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
".157388875 seconds (374348556 bytes allocated)\n"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
" function corr3(directory, threshold = 0)\n",
" pcorr = Array(Float64,0)\n",
" for (i = 1:332) \n",
" df = readdlm(@sprintf(\"%s/%03d.csv\",directory,i),has_header=true)[1]\n",
" v2 = Array(Float64,0) \n",
" v3 = Array(Float64,0)\n",
" for j = 1:size(df,1)\n",
" #\"2004-06-24\",NA,NA,1\n",
" # skip if you match a NA in the string\n",
" if !ismatch(r\"NA\", df[j][14:end])\n",
" c2 = c1 = 0\n",
" # find the two commas in the string to find the numbers\n",
" for k = 14:length(df[j])\n",
" if df[j][k] == ','\n",
" if c1 == 0 c1 = k-1\n",
" else c2 = k-1\n",
" end\n",
" end\n",
" end\n",
" push!(v2,float(df[j][14:c1]))\n",
" push!(v3,float(df[j][c1+2:c2]))\n",
" end\n",
" end\n",
" size(v2)[1] > threshold && push!(pcorr, cor(v2,v3))\n",
" end\n",
" return(pcorr)\n",
" end\n",
" \n",
" pcorr = corr3(\"specdata\",400)\n",
" @time corr3(\"specdata\") ;"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"elapsed time: 1"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
".137150839 seconds (479843724 bytes allocated)\n"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 17
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment