Created
December 12, 2020 15:41
-
-
Save joshuaulrich/336ece2a1e6af8474b90c8cdfbf7a14e to your computer and use it in GitHub Desktop.
Parse CRSP file into separate files by symbol
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/awk -f | |
# GNU Awk 3.1.8 | |
# | |
# Tutorial: | |
# http://www.grymoire.com/Unix/Awk.html | |
# | |
BEGIN{ FS=","; OFS=","; filename="" } | |
{ | |
if(NR==1) { # the first record/line is the header | |
header=substr($0,8) # store to write to each symbol file | |
} else { | |
if(filename != $1) { # if symbol is new | |
close(filename) # close old file | |
filename=$1 # assign new filename | |
print header > filename".csv" # write header | |
} | |
# Extract year, month, and day from 2nd col | |
# Assumes format is YYYYMMDD | |
yyyy=substr($2,1,4) | |
mm=substr($2,5,2) | |
dd=substr($2,7,2) | |
# Get HH:MM:SS and append leading zero, if necessary | |
hms=$3 | |
if(length($3)==7) { | |
hms="0"hms | |
} | |
# Write datetime column | |
printf("%s-%s-%s %s,",yyyy,mm,dd,hms) >>filename".csv" | |
# Write remaining columns | |
print $4,$5,$6,$7,$8,$9 >>filename".csv" | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment