Created
October 15, 2013 22:00
-
-
Save john9631/6999320 to your computer and use it in GitHub Desktop.
Correlation Exercise: Programming Assignment 2 / 3, Computing for Data Analysis, Coursera JH
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"language": "Julia", | |
"name": "" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"using DataFrames\n", | |
"ID = [1:332]\n", | |
"sizeof(ID)/sizeof(ID[1])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 2, | |
"text": [ | |
"332.0" | |
] | |
} | |
], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"function corr(directory, threshold = 0)\n", | |
" pcorr = Array(Float64,0)\n", | |
" for (i = 1:332) \n", | |
" df = readtable(@sprintf(\"%s/%03d.csv\",directory,i))\n", | |
" v2 = Array(Float64,0) \n", | |
" v3 = Array(Float64,0)\n", | |
" for j = 1:size(df)[1]\n", | |
" if(!isna(df[j,2]) & !isna(df[j,3]))\n", | |
" push!(v2, df[j,2])\n", | |
" push!(v3, df[j,3])\n", | |
" end\n", | |
" end\n", | |
" size(v2)[1] > threshold && push!(pcorr, cor(v2,v3)) \n", | |
" end\n", | |
" return(pcorr)\n", | |
"end\n", | |
"\n", | |
"pcorr = corr(\"specdata\")\n", | |
"@time corr(\"specdata\") ;" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"elapsed time: 1" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
".676303646 seconds (511259896 bytes allocated)\n" | |
] | |
} | |
], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
" function corr2(directory, threshold = 0)\n", | |
" pcorr = Array(Float64,0)\n", | |
" for (i = 1:332) \n", | |
" df = readcsv(@sprintf(\"%s/%03d.csv\",directory,i),has_header=true)\n", | |
" v2 = Array(Float64,0) \n", | |
" v3 = Array(Float64,0)\n", | |
" for j = 1:size(df[1],1)\n", | |
" if((df[1][j,2] != \"NA\") & (df[1][j,3] != \"NA\"))\n", | |
" push!(v2, float(df[1][j,2]))\n", | |
" push!(v3, float(df[1][j,3]))\n", | |
" end\n", | |
" end\n", | |
" size(v2)[1] > threshold && push!(pcorr, cor(v2,v3)) \n", | |
" end\n", | |
" return(pcorr)\n", | |
" end\n", | |
" \n", | |
" pcorr = corr2(\"specdata\",400)\n", | |
" @time corr2(\"specdata\") ;" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"elapsed time: 1" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
".157388875 seconds (374348556 bytes allocated)\n" | |
] | |
} | |
], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
" function corr3(directory, threshold = 0)\n", | |
" pcorr = Array(Float64,0)\n", | |
" for (i = 1:332) \n", | |
" df = readdlm(@sprintf(\"%s/%03d.csv\",directory,i),has_header=true)[1]\n", | |
" v2 = Array(Float64,0) \n", | |
" v3 = Array(Float64,0)\n", | |
" for j = 1:size(df,1)\n", | |
" #\"2004-06-24\",NA,NA,1\n", | |
" # skip if you match a NA in the string\n", | |
" if !ismatch(r\"NA\", df[j][14:end])\n", | |
" c2 = c1 = 0\n", | |
" # find the two commas in the string to find the numbers\n", | |
" for k = 14:length(df[j])\n", | |
" if df[j][k] == ','\n", | |
" if c1 == 0 c1 = k-1\n", | |
" else c2 = k-1\n", | |
" end\n", | |
" end\n", | |
" end\n", | |
" push!(v2,float(df[j][14:c1]))\n", | |
" push!(v3,float(df[j][c1+2:c2]))\n", | |
" end\n", | |
" end\n", | |
" size(v2)[1] > threshold && push!(pcorr, cor(v2,v3))\n", | |
" end\n", | |
" return(pcorr)\n", | |
" end\n", | |
" \n", | |
" pcorr = corr3(\"specdata\",400)\n", | |
" @time corr3(\"specdata\") ;" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"elapsed time: 1" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
".137150839 seconds (479843724 bytes allocated)\n" | |
] | |
} | |
], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 17 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment