Created
September 21, 2015 13:10
-
-
Save ElOceanografo/644cc616f1d0e7ea4419 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument("input", nargs="+") | |
ENSEMBLE_HEADER_LENGTH = 6 | |
COLUMN_NAMES = ["depth", "magnitude", "direction", "east", "north", "up", "error", | |
"amplitude1", "amplitude2", "amplitude3", "amplitude4", | |
"pct_good", "discharge"] | |
VELOCITY_VARIABLES = COLUMN_NAMES[1:6] | |
def get_metadata(metadata_lines): | |
''' | |
Parse out relevant metadata from the six metadata lines at the start of | |
every ensemble. | |
''' | |
fields = [x.strip().split() for x in metadata_lines] | |
year, month, day, hour, minute, second, hundredth = [int(x) for x in fields[0][0:7]] | |
ensemble = int(fields[0][7]) | |
temp = float(fields[0][12]) | |
distance = float(fields[2][0]) | |
lat, lon = [float(x) for x in fields[3][0:2]] | |
nbins = int(fields[5][0]) | |
names = ["year", "month", "day", "hour", "minute", "second", "hundredth", | |
"ensemble", "temp", "lat", "lon", "distance", "nbins"] | |
values = [year, month, day, hour, minute, second, hundredth, | |
ensemble, temp, lat, lon, distance, nbins] | |
return (names, values) | |
def read_ascii(filename): | |
f = open(filename, "r") | |
lines = f.readlines() | |
i = 3 | |
ensembles = [] | |
while i < len(lines): | |
metadata = get_metadata(lines[i : i + ENSEMBLE_HEADER_LENGTH]) | |
start = i + ENSEMBLE_HEADER_LENGTH | |
stop = start + int(metadata[1][-1]) | |
data = np.array([x.strip().split() for x in lines[start:stop]], dtype=float) | |
data = pd.DataFrame(data, columns=COLUMN_NAMES) | |
for name, value in zip(metadata[0], metadata[1]): | |
data[name] = value | |
ensembles.append(data) | |
i = stop | |
data = pd.concat(ensembles) | |
data[data[VELOCITY_VARIABLES] == -32768] = np.nan | |
data[data[["discharge"]] == 2147483647] = np.nan | |
data[data[["lat", "lon"]] == 30000] = np.nan | |
return data.dropna() | |
if __name__ == '__main__': | |
args = parser.parse_args() | |
for filename in args.input: | |
print filename | |
data = read_ascii(filename) | |
data.to_csv(filename[0:-4] + ".csv", index=False, na_rep="NA") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment