Skip to content

Instantly share code, notes, and snippets.

@ElOceanografo
Created September 21, 2015 13:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ElOceanografo/644cc616f1d0e7ea4419 to your computer and use it in GitHub Desktop.
Save ElOceanografo/644cc616f1d0e7ea4419 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("input", nargs="+")
ENSEMBLE_HEADER_LENGTH = 6
COLUMN_NAMES = ["depth", "magnitude", "direction", "east", "north", "up", "error",
"amplitude1", "amplitude2", "amplitude3", "amplitude4",
"pct_good", "discharge"]
VELOCITY_VARIABLES = COLUMN_NAMES[1:6]
def get_metadata(metadata_lines):
'''
Parse out relevant metadata from the six metadata lines at the start of
every ensemble.
'''
fields = [x.strip().split() for x in metadata_lines]
year, month, day, hour, minute, second, hundredth = [int(x) for x in fields[0][0:7]]
ensemble = int(fields[0][7])
temp = float(fields[0][12])
distance = float(fields[2][0])
lat, lon = [float(x) for x in fields[3][0:2]]
nbins = int(fields[5][0])
names = ["year", "month", "day", "hour", "minute", "second", "hundredth",
"ensemble", "temp", "lat", "lon", "distance", "nbins"]
values = [year, month, day, hour, minute, second, hundredth,
ensemble, temp, lat, lon, distance, nbins]
return (names, values)
def read_ascii(filename):
f = open(filename, "r")
lines = f.readlines()
i = 3
ensembles = []
while i < len(lines):
metadata = get_metadata(lines[i : i + ENSEMBLE_HEADER_LENGTH])
start = i + ENSEMBLE_HEADER_LENGTH
stop = start + int(metadata[1][-1])
data = np.array([x.strip().split() for x in lines[start:stop]], dtype=float)
data = pd.DataFrame(data, columns=COLUMN_NAMES)
for name, value in zip(metadata[0], metadata[1]):
data[name] = value
ensembles.append(data)
i = stop
data = pd.concat(ensembles)
data[data[VELOCITY_VARIABLES] == -32768] = np.nan
data[data[["discharge"]] == 2147483647] = np.nan
data[data[["lat", "lon"]] == 30000] = np.nan
return data.dropna()
if __name__ == '__main__':
args = parser.parse_args()
for filename in args.input:
print filename
data = read_ascii(filename)
data.to_csv(filename[0:-4] + ".csv", index=False, na_rep="NA")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment