Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jpivarski/5660b4d5cf198198c8b90c718f324802 to your computer and use it in GitHub Desktop.
Save jpivarski/5660b4d5cf198198c8b90c718f324802 to your computer and use it in GitHub Desktop.
Converting Argo data from NetCDF4 to Parquet

First, I fetched all of the Argo data up through 2021 via ftp:

wget -r ftp://ftp.ifremer.fr/ifremer/argo

# wait a long time

tree ftp.ifremer.fr
└── ifremer
    └── argo
        └── geo
            ├── atlantic_ocean
            │   ├── 1997
            │   │   ├── 07
            │   │   │   ├── 19970728_prof.nc
            │   │   │   ├── 19970729_prof.nc
            │   │   │   └── 19970730_prof.nc
            │   │   ├── 08
            │   │   │   ├── 19970801_prof.nc
            │   │   │   ├── 19970802_prof.nc
            │   │   │   ├── 19970803_prof.nc
...

then I created a data-raw directory and ran the two attached Python scripts:

mkdir data-raw
python netcdf4-to-raw-2.py
python argo-from-raw.py

The first script, netcdf4-to-raw-2.py, uses the netCDF4 package to read every **/*.nc file and write its contents into raw files in data-raw. By "raw," I mean that the data in the file is byte-for-byte identical to an array in memory. These files concatenate all of the data from the NetCDF4 with "counts" arrays to indicate where one profile ends and another begins. The 24969 NetCDF4 files (135 GB) are reduced to 49 uncompressed raw files (118 GB) with no metadata, not even dtype. This is the second version of netcdf4-to-raw-2.py, after I had learned what to expect, commented out assertions (which all succeed if uncommented, but are slow), and switched from missing data handling in auxiliary arrays to just nan, following what argopy does. Also, I only included the fields used by expert level, and did the datetime conversions early.

The second script, argo-from-raw.py, uses awkward 1.9.0rc2 to load those raw files as two Awkward Arrays, standard and expert. The underlying data are memory-mapped, so it was possible to load them into an array even though I don't have 118 GB of RAM. I tried some calculations on this gigantic array and it's possible, though paging all of the data from disk to RAM took about 10 minutes for my disk, and any output arrays that are larger than my RAM can't be created. Instead of doing a demo based on memory-mapped arrays, I modified this script to just write the data to Parquet. The script predated the version 2 (v2) to_parquet function and I don't use v1 much anymore because we're finalizing v2, so it makes the Parquet file manually with to_arrow and pyarrow.parquet.ParquetWriter. Picking out columns for use_dictionary and use_byte_stream_split is just being fancy: it compresses well enough without that optimization.

The file should be world-downloadable at https://pivarski-princeton.s3.amazonaws.com/argo-floats-expert.parquet.

import glob
import numpy as np
import awkward as ak
buffers = {}
buffers.update({name: np.memmap(name, dtype=np.uint8) for name in glob.glob("data-raw/*.B")})
buffers.update({name: np.memmap(name, dtype=np.int16) for name in glob.glob("data-raw/*.i2")})
buffers.update({name: np.memmap(name, dtype=np.int32) for name in glob.glob("data-raw/*.i4")})
buffers.update({name: np.memmap(name, dtype=np.int64) for name in glob.glob("data-raw/*.i8")})
buffers.update({name: np.memmap(name, dtype=np.float32) for name in glob.glob("data-raw/*.f4")})
buffers.update({name: np.memmap(name, dtype=np.float64) for name in glob.glob("data-raw/*.f8")})
buffers["data-raw/time.i8"] = buffers["data-raw/time.i8"].view("datetime64[ns]")
buffers["data-raw/time_location.i8"] = buffers["data-raw/time_location.i8"].view("datetime64[ns]")
standard_form = """
{
"class": "RecordArray",
"contents": {
"latitude": {
"class": "NumpyArray",
"primitive": "float64",
"form_key": "data-raw/latitude.f8"
},
"longitude": {
"class": "NumpyArray",
"primitive": "float64",
"form_key": "data-raw/longitude.f8"
},
"time": {
"class": "NumpyArray",
"primitive": "datetime64[ns]",
"form_key": "data-raw/time.i8"
},
"levels": {
"class": "ListOffsetArray",
"offsets": "i64",
"form_key": "data-raw/profiles-levels-OFFSETS.i8",
"content": {
"class": "RecordArray",
"contents": {
"pres": {
"class": "NumpyArray",
"primitive": "float32",
"form_key": "data-raw/pres.f4"
},
"pres_qc": {
"class": "RegularArray",
"size": 1,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/pres_qc.B"
}
},
"psal": {
"class": "NumpyArray",
"primitive": "float32",
"form_key": "data-raw/psal.f4"
},
"psal_qc": {
"class": "RegularArray",
"size": 1,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/psal_qc.B"
}
},
"temp": {
"class": "NumpyArray",
"primitive": "float32",
"form_key": "data-raw/temp.f4"
},
"temp_qc": {
"class": "RegularArray",
"size": 1,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/temp_qc.B"
}
}
}
}
},
"config_mission_number": {
"class": "NumpyArray",
"primitive": "int32",
"form_key": "data-raw/config_mission_number.i4"
},
"cycle_number": {
"class": "NumpyArray",
"primitive": "int32",
"form_key": "data-raw/cycle_number.i4"
},
"data_mode": {
"class": "RegularArray",
"size": 1,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/data_mode.B"
}
},
"direction": {
"class": "RegularArray",
"size": 1,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/direction.B"
}
},
"platform_number": {
"class": "NumpyArray",
"primitive": "int32",
"form_key": "data-raw/platform_number.i4"
},
"position_qc": {
"class": "RegularArray",
"size": 1,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/position_qc.B"
}
},
"time_qc": {
"class": "RegularArray",
"size": 1,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/time_qc.B"
}
}
}
}
"""
expert_form = """
{
"class": "RecordArray",
"contents": {
"latitude": {
"class": "NumpyArray",
"primitive": "float64",
"form_key": "data-raw/latitude.f8"
},
"longitude": {
"class": "NumpyArray",
"primitive": "float64",
"form_key": "data-raw/longitude.f8"
},
"time": {
"class": "NumpyArray",
"primitive": "datetime64[ns]",
"form_key": "data-raw/time.i8"
},
"levels": {
"class": "ListOffsetArray",
"offsets": "i64",
"form_key": "data-raw/profiles-levels-OFFSETS.i8",
"content": {
"class": "RecordArray",
"contents": {
"pres": {
"class": "NumpyArray",
"primitive": "float32",
"form_key": "data-raw/pres.f4"
},
"pres_adjusted": {
"class": "NumpyArray",
"primitive": "float32",
"form_key": "data-raw/pres_adjusted.f4"
},
"pres_adjusted_error": {
"class": "NumpyArray",
"primitive": "float32",
"form_key": "data-raw/pres_adjusted_error.f4"
},
"pres_adjusted_qc": {
"class": "RegularArray",
"size": 1,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/pres_adjusted_qc.B"
}
},
"pres_qc": {
"class": "RegularArray",
"size": 1,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/pres_qc.B"
}
},
"psal": {
"class": "NumpyArray",
"primitive": "float32",
"form_key": "data-raw/psal.f4"
},
"psal_adjusted": {
"class": "NumpyArray",
"primitive": "float32",
"form_key": "data-raw/psal_adjusted.f4"
},
"psal_adjusted_error": {
"class": "NumpyArray",
"primitive": "float32",
"form_key": "data-raw/psal_adjusted_error.f4"
},
"psal_adjusted_qc": {
"class": "RegularArray",
"size": 1,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/psal_adjusted_qc.B"
}
},
"psal_qc": {
"class": "RegularArray",
"size": 1,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/psal_qc.B"
}
},
"temp": {
"class": "NumpyArray",
"primitive": "float32",
"form_key": "data-raw/temp.f4"
},
"temp_adjusted": {
"class": "NumpyArray",
"primitive": "float32",
"form_key": "data-raw/temp_adjusted.f4"
},
"temp_adjusted_error": {
"class": "NumpyArray",
"primitive": "float32",
"form_key": "data-raw/temp_adjusted_error.f4"
},
"temp_adjusted_qc": {
"class": "RegularArray",
"size": 1,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/temp_adjusted_qc.B"
}
},
"temp_qc": {
"class": "RegularArray",
"size": 1,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/temp_qc.B"
}
}
}
}
},
"config_mission_number": {
"class": "NumpyArray",
"primitive": "int32",
"form_key": "data-raw/config_mission_number.i4"
},
"cycle_number": {
"class": "NumpyArray",
"primitive": "int32",
"form_key": "data-raw/cycle_number.i4"
},
"data_centre": {
"class": "RegularArray",
"size": 2,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/data_centre.B"
}
},
"data_mode": {
"class": "RegularArray",
"size": 1,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/data_mode.B"
}
},
"data_state_indicator": {
"class": "RegularArray",
"size": 4,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/data_state_indicator.B"
}
},
"dc_reference": {
"class": "ListOffsetArray",
"offsets": "i64",
"parameters": {"__array__": "string"},
"form_key": "data-raw/dc_reference-OFFSETS.i8",
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/dc_reference.B"
}
},
"direction": {
"class": "RegularArray",
"size": 1,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/direction.B"
}
},
"firmware_version": {
"class": "ListOffsetArray",
"offsets": "i64",
"parameters": {"__array__": "string"},
"form_key": "data-raw/firmware_version-OFFSETS.i8",
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/firmware_version.B"
}
},
"float_serial_no": {
"class": "ListOffsetArray",
"offsets": "i64",
"parameters": {"__array__": "string"},
"form_key": "data-raw/float_serial_no-OFFSETS.i8",
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/float_serial_no.B"
}
},
"pi_name": {
"class": "ListOffsetArray",
"offsets": "i64",
"parameters": {"__array__": "string"},
"form_key": "data-raw/pi_name-OFFSETS.i8",
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/pi_name.B"
}
},
"platform_number": {
"class": "NumpyArray",
"primitive": "int32",
"form_key": "data-raw/platform_number.i4"
},
"platform_type": {
"class": "ListOffsetArray",
"offsets": "i64",
"parameters": {"__array__": "string"},
"form_key": "data-raw/platform_type-OFFSETS.i8",
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/platform_type.B"
}
},
"positioning_system": {
"class": "RegularArray",
"size": 4,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/positioning_system.B"
}
},
"position_qc": {
"class": "RegularArray",
"size": 1,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/position_qc.B"
}
},
"profile_pres_qc": {
"class": "RegularArray",
"size": 1,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/profile_pres_qc.B"
}
},
"profile_psal_qc": {
"class": "RegularArray",
"size": 1,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/profile_psal_qc.B"
}
},
"profile_temp_qc": {
"class": "RegularArray",
"size": 1,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/profile_temp_qc.B"
}
},
"project_name": {
"class": "ListOffsetArray",
"offsets": "i64",
"parameters": {"__array__": "string"},
"form_key": "data-raw/project_name-OFFSETS.i8",
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/project_name.B"
}
},
"time_location": {
"class": "NumpyArray",
"primitive": "datetime64[ns]",
"form_key": "data-raw/time_location.i8"
},
"time_qc": {
"class": "RegularArray",
"size": 1,
"parameters": {"__array__": "string"},
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/time_qc.B"
}
},
"vertical_sampling_scheme": {
"class": "ListOffsetArray",
"offsets": "i64",
"parameters": {"__array__": "string"},
"form_key": "data-raw/vertical_sampling_scheme-OFFSETS.i8",
"content": {
"class": "NumpyArray",
"primitive": "uint8",
"parameters": {"__array__": "char"},
"form_key": "data-raw/vertical_sampling_scheme.B"
}
},
"wmo_inst_type": {
"class": "NumpyArray",
"primitive": "int16",
"form_key": "data-raw/wmo_inst_type.i2"
}
}
}
"""
standard = ak._v2.from_buffers(
standard_form,
buffers["data-raw/profiles-OFFSETS.i8"][-1],
buffers,
buffer_key="{form_key}",
)
ak._v2.validity_error(standard, exception=True)
expert = ak._v2.from_buffers(
expert_form,
buffers["data-raw/profiles-OFFSETS.i8"][-1],
buffers,
buffer_key="{form_key}",
)
ak._v2.validity_error(expert, exception=True)
starts = list(range(0, len(expert), 10000))
stops = [min(x + 10000, len(expert)) for x in starts]
import pyarrow.parquet as pq
with pq.ParquetWriter(
"argo-floats-expert.parquet",
ak._v2.to_arrow_table(expert[1000:], extensionarray=False).schema,
use_dictionary=[
"data_centre",
"data_state_indicator",
"firmware_version",
"pi_name",
"platform_type",
"positioning_system",
"project_name",
"vertical_sampling_scheme",
"wmo_inst_type",
],
use_byte_stream_split=[
"latitude",
"longitude",
"levels.pres_adjusted_error",
"levels.pres_adjusted",
"levels.pres",
"levels.psal_adjusted_error",
"levels.psal_adjusted",
"levels.psal",
"levels.temp_adjusted_error",
"levels.temp_adjusted",
"levels.temp",
],
compression="zstd",
compression_level=22,
) as writer:
for i, (start, stop) in enumerate(zip(starts, stops)):
print(i, len(starts), stop / len(expert))
writer.write_table(ak._v2.to_arrow_table(expert[start:stop], extensionarray=False))
import glob
import re
import os
import struct
import sys
import datetime
import numpy as np
import netCDF4 as nc
# filenames = sorted(glob.glob(f"ftp.ifremer.fr/ifremer/argo/geo/indian_ocean/*/01/*01_prof.nc", recursive=True))
filenames = sorted(glob.glob("ftp.ifremer.fr/ifremer/**/*.nc", recursive=True))
if len(filenames) == 0:
sys.exit(0)
if not os.path.exists("data-raw"):
sys.exit(0)
files = {}
last = {}
integer = struct.Struct("<q")
datetime1950 = datetime.datetime(1950, 1, 1)
def newfile(name, ext):
files[name] = open(os.path.join("data-raw", name + "." + ext), "wb")
if name.endswith("-OFFSETS"):
assert ext == "i8"
last[name] = 0
files[name].write(integer.pack(0))
elif name.endswith("-MASK"):
assert ext == "bool"
else:
assert ext != "bool"
def append_count(name, count):
# assert isinstance(count, (int, np.integer))
# assert name.endswith("-OFFSETS")
last[name] += count
files[name].write(integer.pack(last[name]))
def append_counts(name, counts):
# assert isinstance(counts, np.ndarray) and counts.dtype == np.dtype("<i8")
# assert name.endswith("-OFFSETS")
if len(counts) > 0:
offsets = last[name] + np.cumsum(counts)
last[name] = offsets[-1]
files[name].write(offsets.tobytes())
def append(name, batch):
# assert isinstance(batch, (np.ndarray, bytes))
# assert not name.endswith("-OFFSETS")
# if name.endswith("-MASK"):
# assert batch.dtype == np.dtype("?")
if len(batch) > 0:
files[name].write(np.asarray(batch).tobytes())
def append_chars(name, batch):
# assert isinstance(batch, np.ndarray)
# assert not name.endswith("-OFFSETS")
for x in batch:
if len(x) > 0:
files[name].write(x)
### argopy index has
#
# float64 LATITUDE (N_POINTS) float64 24.54 24.54 25.04 ... 24.96 24.96
# float64 LONGITUDE (N_POINTS) float64 -45.14 -45.14 ... -50.4 -50.4
# (JULD) TIME (N_POINTS) datetime64[ns] 2011-01-01T11:49:19 ... ...
### argopy standard mode has these variables:
#
# CONFIG_MISSION_NUMBER (N_POINTS) int64
# CYCLE_NUMBER (N_POINTS) int64
# DATA_MODE (N_POINTS) <U1
# DIRECTION (N_POINTS) <U1
# PLATFORM_NUMBER (N_POINTS) int64
# POSITION_QC (N_POINTS) int64
# PRES (N_POINTS) float64
# PRES_QC (N_POINTS) int64
# PSAL (N_POINTS) float64
# PSAL_QC (N_POINTS) int64
# TEMP (N_POINTS) float64
# TEMP_QC (N_POINTS) int64
# TIME_QC (N_POINTS) int64
### argopy expert mode has these variables:
#
# int32 CONFIG_MISSION_NUMBER (N_POINTS) int64 2 2
# int32 CYCLE_NUMBER (N_POINTS) int64 2 2
# fixed2 DATA_CENTRE (N_POINTS) <U2 'BO' 'BO'
# fixed1 DATA_MODE (N_POINTS) <U1 'R' 'R'
# fixed4 DATA_STATE_INDICATOR (N_POINTS) <U4 '2B ' '2B '
# string DC_REFERENCE (N_POINTS) <U32 '
# fixed1 DIRECTION (N_POINTS) <U1 'A' 'A'
# string FIRMWARE_VERSION (N_POINTS) <U32 '5900A04
# string FLOAT_SERIAL_NO (N_POINTS) <U32 'AI2600-17EU01
# string PI_NAME (N_POINTS) <U64 "Diarmuid O'Conchubhair
# int32 PLATFORM_NUMBER (N_POINTS) int64 6901929 6901929
# string PLATFORM_TYPE (N_POINTS) <U32 'ARVOR
# fixed8 POSITIONING_SYSTEM (N_POINTS) <U8 'GPS ' 'GPS '
# fixed1 POSITION_QC (N_POINTS) int64 1 1
# float32 PRES (N_POINTS) float32 0.4 0.5
# float32 PRES_ADJUSTED (N_POINTS) float32 nan nan
# float32 PRES_ADJUSTED_ERROR (N_POINTS) float32 nan nan
# fixed1 PRES_ADJUSTED_QC (N_POINTS) int64 0 0
# fixed1 PRES_QC (N_POINTS) int64 1 1
# fixed1 PROFILE_PRES_QC (N_POINTS) <U1 'A' 'A'
# fixed1 PROFILE_PSAL_QC (N_POINTS) <U1 'A' 'A'
# fixed1 PROFILE_TEMP_QC (N_POINTS) <U1 'F' 'F'
# string PROJECT_NAME (N_POINTS) <U64 'Argo Ireland
# float32 PSAL (N_POINTS) float32 35.34 35.34
# float32 PSAL_ADJUSTED (N_POINTS) float32 nan nan
# float32 PSAL_ADJUSTED_ERROR (N_POINTS) float32 nan nan
# fixed1 PSAL_ADJUSTED_QC (N_POINTS) int64 0 0
# fixed1 PSAL_QC (N_POINTS) int64 1 1
# float32 TEMP (N_POINTS) float32 10.03 10.03
# float32 TEMP_ADJUSTED (N_POINTS) float32 nan nan
# float32 TEMP_ADJUSTED_ERROR (N_POINTS) float32 nan nan
# fixed1 TEMP_ADJUSTED_QC (N_POINTS) int64 0 0
# fixed1 TEMP_QC (N_POINTS) int64 4 4
# (JULD) TIME_LOCATION (N_POINTS) datetime64[ns] 2018-02-24T09:16:24.9...
# fixed1 TIME_QC (N_POINTS) int64 1 1
# string VERTICAL_SAMPLING_SCHEME (N_POINTS) <U256 'Primary sampling: averaged [1...
# int16 WMO_INST_TYPE (N_POINTS) int64 844 844
newfile("profiles-OFFSETS", "i8")
newfile("profiles-levels-OFFSETS", "i8")
# multiplicity: N_PROF
newfile("platform_number", "i4")
newfile("project_name-OFFSETS", "i8")
newfile("project_name", "B")
newfile("pi_name-OFFSETS", "i8")
newfile("pi_name", "B")
newfile("data_centre", "B")
newfile("dc_reference-OFFSETS", "i8")
newfile("dc_reference", "B")
newfile("data_state_indicator", "B")
newfile("platform_type-OFFSETS", "i8")
newfile("platform_type", "B")
newfile("float_serial_no-OFFSETS", "i8")
newfile("float_serial_no", "B")
newfile("firmware_version-OFFSETS", "i8")
newfile("firmware_version", "B")
newfile("wmo_inst_type", "i2")
newfile("positioning_system", "B")
newfile("vertical_sampling_scheme-OFFSETS", "i8")
newfile("vertical_sampling_scheme", "B")
newfile("position_qc", "B")
newfile("profile_pres_qc", "B")
newfile("profile_temp_qc", "B")
newfile("profile_psal_qc", "B")
newfile("direction", "B")
newfile("data_mode", "B")
newfile("time_qc", "B")
# multiplicity: N_PROF * N_LEVELS
newfile("pres_qc", "B")
newfile("pres_adjusted_qc", "B")
newfile("temp_qc", "B")
newfile("psal_qc", "B")
newfile("temp_adjusted_qc", "B")
newfile("psal_adjusted_qc", "B")
# multiplicity: N_PROF
newfile("time", "i8")
newfile("time_location", "i8")
newfile("cycle_number", "i4")
newfile("latitude", "f8")
newfile("longitude", "f8")
newfile("config_mission_number", "i4")
# multiplicity: N_PROF * N_LEVELS
newfile("pres", "f4")
newfile("pres_adjusted", "f4")
newfile("pres_adjusted_error", "f4")
newfile("temp", "f4")
newfile("psal", "f4")
newfile("temp_adjusted", "f4")
newfile("psal_adjusted", "f4")
newfile("temp_adjusted_error", "f4")
newfile("psal_adjusted_error", "f4")
for index, filename in enumerate(filenames):
print(filename, round(100 * index / len(filenames)), "percent")
with nc.Dataset(filename) as dataset:
# assert dataset.Conventions == "Argo-3.1 CF-1.6"
# assert dataset.featureType == "trajectoryProfile"
# assert dataset.user_manual_version == "3.1"
filename = filename[len("ftp.ifremer.fr-HOLD/ifremer/argo/geo/") :]
d = dataset.dimensions
v = dataset.variables
################# dimensions
# assert list(d.keys()) == [
# "DATE_TIME",
# "STRING256",
# "STRING64",
# "STRING32",
# "STRING16",
# "STRING8",
# "STRING4",
# "STRING2",
# "N_PROF",
# "N_PARAM",
# "N_LEVELS",
# "N_CALIB",
# "N_HISTORY",
# ]
# assert d["DATE_TIME"].size == 14
# assert d["STRING256"].size == 256
# assert d["STRING64"].size == 64
# assert d["STRING32"].size == 32
# assert d["STRING16"].size == 16
# assert d["STRING8"].size == 8
# assert d["STRING4"].size == 4
# assert d["STRING2"].size == 2
# assert d["N_HISTORY"].size == 0
append_count("profiles-OFFSETS", d["N_PROF"].size)
for i in range(d["N_PROF"].size):
append_count("profiles-levels-OFFSETS", d["N_LEVELS"].size)
################# variables
# assert v["PLATFORM_NUMBER"].name == "PLATFORM_NUMBER"
# assert v["PLATFORM_NUMBER"].dimensions == ("N_PROF", "STRING8")
# assert v["PLATFORM_NUMBER"].dtype == np.dtype("S1")
# assert v["PLATFORM_NUMBER"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert v["PLATFORM_NUMBER"].long_name == "Float unique identifier"
# assert v["PLATFORM_NUMBER"].conventions == "WMO float identifier : A9IIIII"
# assert v["PLATFORM_NUMBER"]._FillValue == b" "
if d["N_PROF"].size != 0:
array = np.char.rstrip(np.asarray(v["PLATFORM_NUMBER"]).view("S8"), b" ").astype(np.int32)
append("platform_number", array)
# assert v["PROJECT_NAME"].name == "PROJECT_NAME"
# assert v["PROJECT_NAME"].dimensions == ("N_PROF", "STRING64")
# assert v["PROJECT_NAME"].dtype == np.dtype("S1")
# assert v["PROJECT_NAME"].ncattrs() == ["long_name", "_FillValue"]
# assert v["PROJECT_NAME"].long_name == "Name of the project"
# assert v["PROJECT_NAME"]._FillValue == b" "
if d["N_PROF"].size != 0:
array = np.char.rstrip(np.asarray(v["PROJECT_NAME"]).view("S64"), b" ").ravel()
append_counts("project_name-OFFSETS", np.char.str_len(array))
append_chars("project_name", array)
# assert v["PI_NAME"].name == "PI_NAME"
# assert v["PI_NAME"].dimensions == ("N_PROF", "STRING64")
# assert v["PI_NAME"].dtype == np.dtype("S1")
# assert v["PI_NAME"].ncattrs() == ["long_name", "_FillValue"]
# assert v["PI_NAME"].long_name == "Name of the principal investigator"
# assert v["PI_NAME"]._FillValue == b" "
if d["N_PROF"].size != 0:
array = np.char.rstrip(np.asarray(v["PI_NAME"]).view("S64"), b" ").ravel()
append_counts("pi_name-OFFSETS", np.char.str_len(array))
append_chars("pi_name", array)
# assert v["DATA_CENTRE"].name == "DATA_CENTRE"
# assert v["DATA_CENTRE"].dimensions == ("N_PROF", "STRING2")
# assert v["DATA_CENTRE"].dtype == np.dtype("S1")
# assert v["DATA_CENTRE"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert (
# v["DATA_CENTRE"].long_name == "Data centre in charge of float data processing"
# )
# assert v["DATA_CENTRE"].conventions == "Argo reference table 4"
# assert v["DATA_CENTRE"]._FillValue == b" "
if d["N_PROF"].size != 0:
array = np.asarray(v["DATA_CENTRE"]).view("S2")
append_chars("data_centre", array)
# assert v["DC_REFERENCE"].name == "DC_REFERENCE"
# assert v["DC_REFERENCE"].dimensions == ("N_PROF", "STRING32")
# assert v["DC_REFERENCE"].dtype == np.dtype("S1")
# assert v["DC_REFERENCE"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert v["DC_REFERENCE"].long_name == "Station unique identifier in data centre"
# assert v["DC_REFERENCE"].conventions == "Data centre convention"
# assert v["DC_REFERENCE"]._FillValue == b" "
if d["N_PROF"].size != 0:
array = np.char.rstrip(np.asarray(v["DC_REFERENCE"]).view("S32"), b" ").ravel()
append_counts("dc_reference-OFFSETS", np.char.str_len(array))
append_chars("dc_reference", array)
# assert v["DATA_STATE_INDICATOR"].name == "DATA_STATE_INDICATOR"
# assert v["DATA_STATE_INDICATOR"].dimensions == (
# "N_PROF",
# "STRING4",
# )
# assert v["DATA_STATE_INDICATOR"].dtype == np.dtype("S1")
# assert v["DATA_STATE_INDICATOR"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert (
# v["DATA_STATE_INDICATOR"].long_name
# == "Degree of processing the data have passed through"
# )
# assert v["DATA_STATE_INDICATOR"].conventions == "Argo reference table 6"
# assert v["DATA_STATE_INDICATOR"]._FillValue == b" "
if d["N_PROF"].size != 0:
array = np.asarray(v["DATA_STATE_INDICATOR"]).view("S4")
append_chars("data_state_indicator", array)
# assert v["PLATFORM_TYPE"].name == "PLATFORM_TYPE"
# assert v["PLATFORM_TYPE"].dimensions == ("N_PROF", "STRING32")
# assert v["PLATFORM_TYPE"].dtype == np.dtype("S1")
# assert v["PLATFORM_TYPE"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert v["PLATFORM_TYPE"].long_name == "Type of float"
# assert v["PLATFORM_TYPE"].conventions == "Argo reference table 23"
# assert v["PLATFORM_TYPE"]._FillValue == b" "
if d["N_PROF"].size != 0:
array = np.char.rstrip(np.asarray(v["PLATFORM_TYPE"]).view("S32"), b" ").ravel()
append_counts("platform_type-OFFSETS", np.char.str_len(array))
append_chars("platform_type", array)
# assert v["FLOAT_SERIAL_NO"].name == "FLOAT_SERIAL_NO"
# assert v["FLOAT_SERIAL_NO"].dimensions == ("N_PROF", "STRING32")
# assert v["FLOAT_SERIAL_NO"].dtype == np.dtype("S1")
# assert v["FLOAT_SERIAL_NO"].ncattrs() == [
# "long_name",
# "_FillValue",
# ]
# assert v["FLOAT_SERIAL_NO"].long_name == "Serial number of the float"
# assert v["FLOAT_SERIAL_NO"]._FillValue == b" "
if d["N_PROF"].size != 0:
array = np.char.rstrip(np.asarray(v["FLOAT_SERIAL_NO"]).view("S32"), b" ").ravel()
append_counts("float_serial_no-OFFSETS", np.char.str_len(array))
append_chars("float_serial_no", array)
# assert v["FIRMWARE_VERSION"].name == "FIRMWARE_VERSION"
# assert v["FIRMWARE_VERSION"].dimensions == ("N_PROF", "STRING32")
# assert v["FIRMWARE_VERSION"].dtype == np.dtype("S1")
# assert v["FIRMWARE_VERSION"].ncattrs() == [
# "long_name",
# "_FillValue",
# ]
# assert v["FIRMWARE_VERSION"].long_name == "Instrument firmware version"
# assert v["FIRMWARE_VERSION"]._FillValue == b" "
if d["N_PROF"].size != 0:
array = np.char.rstrip(np.asarray(v["FIRMWARE_VERSION"]).view("S32"), b" ").ravel()
append_counts("firmware_version-OFFSETS", np.char.str_len(array))
append_chars("firmware_version", array)
# assert v["WMO_INST_TYPE"].name == "WMO_INST_TYPE"
# assert v["WMO_INST_TYPE"].dimensions == ("N_PROF", "STRING4")
# assert v["WMO_INST_TYPE"].dtype == np.dtype("S1")
# assert v["WMO_INST_TYPE"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert v["WMO_INST_TYPE"].long_name == "Coded instrument type"
# assert v["WMO_INST_TYPE"].conventions == "Argo reference table 8"
# assert v["WMO_INST_TYPE"]._FillValue == b" "
if d["N_PROF"].size != 0:
array = np.char.rstrip(np.asarray(v["WMO_INST_TYPE"]).view("S4"), b" ").astype(np.int16)
append_chars("wmo_inst_type", array)
# assert v["POSITIONING_SYSTEM"].name == "POSITIONING_SYSTEM"
# assert v["POSITIONING_SYSTEM"].dimensions == ("N_PROF", "STRING8")
# assert v["POSITIONING_SYSTEM"].dtype == np.dtype("S1")
# assert v["POSITIONING_SYSTEM"].ncattrs() == [
# "long_name",
# "_FillValue",
# ]
# assert v["POSITIONING_SYSTEM"].long_name == "Positioning system"
# assert v["POSITIONING_SYSTEM"]._FillValue == b" "
if d["N_PROF"].size != 0:
array = np.asarray(v["POSITIONING_SYSTEM"]).view("S8")
append_chars("positioning_system", array)
# assert v["VERTICAL_SAMPLING_SCHEME"].name == "VERTICAL_SAMPLING_SCHEME"
# assert v["VERTICAL_SAMPLING_SCHEME"].dimensions == (
# "N_PROF",
# "STRING256",
# )
# assert v["VERTICAL_SAMPLING_SCHEME"].dtype == np.dtype("S1")
# assert v["VERTICAL_SAMPLING_SCHEME"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert v["VERTICAL_SAMPLING_SCHEME"].long_name == "Vertical sampling scheme"
# assert v["VERTICAL_SAMPLING_SCHEME"].conventions == "Argo reference table 16"
# assert v["VERTICAL_SAMPLING_SCHEME"]._FillValue == b" "
if d["N_PROF"].size != 0:
array = np.char.rstrip(np.asarray(v["VERTICAL_SAMPLING_SCHEME"]).view("S256"), b" ").ravel()
append_counts("vertical_sampling_scheme-OFFSETS", np.char.str_len(array))
append_chars("vertical_sampling_scheme", array)
# assert v["POSITION_QC"].name == "POSITION_QC"
# assert v["POSITION_QC"].dimensions == ("N_PROF",)
# assert v["POSITION_QC"].dtype == np.dtype("S1")
# assert v["POSITION_QC"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert v["POSITION_QC"].long_name == "Quality on position (latitude and longitude)"
# assert v["POSITION_QC"].conventions == "Argo reference table 2"
# assert v["POSITION_QC"]._FillValue == b" "
if d["N_PROF"].size != 0:
array = np.asarray(v["POSITION_QC"]).view("S1")
append_chars("position_qc", array)
# assert v["PROFILE_PRES_QC"].name == "PROFILE_PRES_QC"
# assert v["PROFILE_PRES_QC"].dimensions == ("N_PROF",)
# assert v["PROFILE_PRES_QC"].dtype == np.dtype("S1")
# assert v["PROFILE_PRES_QC"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert v["PROFILE_PRES_QC"].long_name == "Global quality flag of PRES profile"
# assert v["PROFILE_PRES_QC"].conventions == "Argo reference table 2a"
# assert v["PROFILE_PRES_QC"]._FillValue == b" "
if d["N_PROF"].size != 0:
array = np.asarray(v["PROFILE_PRES_QC"]).view("S1")
append_chars("profile_pres_qc", array)
# assert v["PROFILE_TEMP_QC"].name == "PROFILE_TEMP_QC"
# assert v["PROFILE_TEMP_QC"].dimensions == ("N_PROF",)
# assert v["PROFILE_TEMP_QC"].dtype == np.dtype("S1")
# assert v["PROFILE_TEMP_QC"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert v["PROFILE_TEMP_QC"].long_name == "Global quality flag of TEMP profile"
# assert v["PROFILE_TEMP_QC"].conventions == "Argo reference table 2a"
# assert v["PROFILE_TEMP_QC"]._FillValue == b" "
if d["N_PROF"].size != 0:
array = np.asarray(v["PROFILE_TEMP_QC"]).view("S1")
append_chars("profile_temp_qc", array)
if "PROFILE_PSAL_QC" in v:
# assert v["PROFILE_PSAL_QC"].name == "PROFILE_PSAL_QC"
# assert v["PROFILE_PSAL_QC"].dimensions == ("N_PROF",)
# assert v["PROFILE_PSAL_QC"].dtype == np.dtype("S1")
# assert v["PROFILE_PSAL_QC"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert v["PROFILE_PSAL_QC"].long_name == "Global quality flag of PSAL profile"
# assert v["PROFILE_PSAL_QC"].conventions == "Argo reference table 2a"
# assert v["PROFILE_PSAL_QC"]._FillValue == b" "
if d["N_PROF"].size != 0:
array = np.asarray(v["PROFILE_PSAL_QC"]).view("S1")
append_chars("profile_psal_qc", array)
else:
if d["N_PROF"].size != 0:
array = np.frombuffer(b" " * d["N_PROF"].size, dtype="S1")
append_chars("profile_psal_qc", array)
# assert v["DIRECTION"].name == "DIRECTION"
# assert v["DIRECTION"].dimensions == ("N_PROF",)
# assert v["DIRECTION"].dtype == np.dtype("S1")
# assert v["DIRECTION"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert v["DIRECTION"].long_name == "Direction of the station profiles"
# assert v["DIRECTION"].conventions == "A: ascending profiles, D: descending profiles"
# assert v["DIRECTION"]._FillValue == b" "
if d["N_PROF"].size != 0:
array = np.asarray(v["DIRECTION"]).view("S1")
append_chars("direction", array)
# assert v["DATA_MODE"].name == "DATA_MODE"
# assert v["DATA_MODE"].dimensions == ("N_PROF",)
# assert v["DATA_MODE"].dtype == np.dtype("S1")
# assert v["DATA_MODE"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert v["DATA_MODE"].long_name == "Delayed mode or real time data"
# assert (
# v["DATA_MODE"].conventions
# == "R : real time; D : delayed mode; A : real time with adjustment"
# )
# assert v["DATA_MODE"]._FillValue == b" "
if d["N_PROF"].size != 0:
array = np.asarray(v["DATA_MODE"]).view("S1")
append_chars("data_mode", array)
# assert v["JULD_QC"].name == "JULD_QC"
# assert v["JULD_QC"].dimensions == ("N_PROF",)
# assert v["JULD_QC"].dtype == np.dtype("S1")
# assert v["JULD_QC"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert v["JULD_QC"].long_name == "Quality on date and time"
# assert v["JULD_QC"].conventions == "Argo reference table 2"
# assert v["JULD_QC"]._FillValue == b" "
if d["N_PROF"].size != 0:
array = np.asarray(v["JULD_QC"]).view("S1")
append_chars("time_qc", array)
# assert v["PRES_QC"].name == "PRES_QC"
# assert v["PRES_QC"].dimensions == ("N_PROF", "N_LEVELS")
# assert v["PRES_QC"].dtype == np.dtype("S1")
# assert v["PRES_QC"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert v["PRES_QC"].long_name == "quality flag"
# assert v["PRES_QC"].conventions == "Argo reference table 2"
# assert v["PRES_QC"]._FillValue == b" "
if d["N_PROF"].size * d["N_LEVELS"].size != 0:
array = np.asarray(v["PRES_QC"]).view("S1")
append_chars("pres_qc", array)
# assert v["PRES_ADJUSTED_QC"].name == "PRES_ADJUSTED_QC"
# assert v["PRES_ADJUSTED_QC"].dimensions == ("N_PROF", "N_LEVELS")
# assert v["PRES_ADJUSTED_QC"].dtype == np.dtype("S1")
# assert v["PRES_ADJUSTED_QC"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert v["PRES_ADJUSTED_QC"].long_name == "quality flag"
# assert v["PRES_ADJUSTED_QC"].conventions == "Argo reference table 2"
# assert v["PRES_ADJUSTED_QC"]._FillValue == b" "
if d["N_PROF"].size * d["N_LEVELS"].size != 0:
array = np.asarray(v["PRES_ADJUSTED_QC"]).view("S1")
append_chars("pres_adjusted_qc", array)
# assert v["TEMP_QC"].name == "TEMP_QC"
# assert v["TEMP_QC"].dimensions == ("N_PROF", "N_LEVELS")
# assert v["TEMP_QC"].dtype == np.dtype("S1")
# assert v["TEMP_QC"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert v["TEMP_QC"].long_name == "quality flag"
# assert v["TEMP_QC"].conventions == "Argo reference table 2"
# assert v["TEMP_QC"]._FillValue == b" "
if d["N_PROF"].size * d["N_LEVELS"].size != 0:
array = np.asarray(v["TEMP_QC"]).view("S1")
append_chars("temp_qc", array)
if "PSAL_QC" in v:
# assert v["PSAL_QC"].name == "PSAL_QC"
# assert v["PSAL_QC"].dimensions == ("N_PROF", "N_LEVELS")
# assert v["PSAL_QC"].dtype == np.dtype("S1")
# assert v["PSAL_QC"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert v["PSAL_QC"].long_name == "quality flag"
# assert v["PSAL_QC"].conventions == "Argo reference table 2"
# assert v["PSAL_QC"]._FillValue == b" "
if d["N_PROF"].size * d["N_LEVELS"].size != 0:
array = np.asarray(v["PSAL_QC"]).view("S1")
append_chars("psal_qc", array)
else:
if d["N_PROF"].size * d["N_LEVELS"].size != 0:
array = np.frombuffer(b" " * (d["N_PROF"].size * d["N_LEVELS"].size), dtype="S1")
append_chars("psal_qc", array)
# assert v["TEMP_ADJUSTED_QC"].name == "TEMP_ADJUSTED_QC"
# assert v["TEMP_ADJUSTED_QC"].dimensions == ("N_PROF", "N_LEVELS")
# assert v["TEMP_ADJUSTED_QC"].dtype == np.dtype("S1")
# assert v["TEMP_ADJUSTED_QC"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert v["TEMP_ADJUSTED_QC"].long_name == "quality flag"
# assert v["TEMP_ADJUSTED_QC"].conventions == "Argo reference table 2"
# assert v["TEMP_ADJUSTED_QC"]._FillValue == b" "
if d["N_PROF"].size * d["N_LEVELS"].size != 0:
array = np.asarray(v["TEMP_ADJUSTED_QC"]).view("S1")
append_chars("temp_adjusted_qc", array)
if "PSAL_ADJUSTED_QC" in v:
# assert v["PSAL_ADJUSTED_QC"].name == "PSAL_ADJUSTED_QC"
# assert v["PSAL_ADJUSTED_QC"].dimensions == ("N_PROF", "N_LEVELS")
# assert v["PSAL_ADJUSTED_QC"].dtype == np.dtype("S1")
# assert v["PSAL_ADJUSTED_QC"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert v["PSAL_ADJUSTED_QC"].long_name == "quality flag"
# assert v["PSAL_ADJUSTED_QC"].conventions == "Argo reference table 2"
# assert v["PSAL_ADJUSTED_QC"]._FillValue == b" "
if d["N_PROF"].size * d["N_LEVELS"].size != 0:
array = np.asarray(v["PSAL_ADJUSTED_QC"]).view("S1")
append_chars("psal_adjusted_qc", array)
else:
if d["N_PROF"].size * d["N_LEVELS"].size != 0:
array = np.frombuffer(b" " * (d["N_PROF"].size * d["N_LEVELS"].size), dtype="S1")
append_chars("psal_adjusted_qc", array)
# assert v["JULD"].name == "JULD"
# assert v["JULD"].dimensions == ("N_PROF",)
# assert v["JULD"].dtype == np.dtype("float64")
# assert v["JULD"].ncattrs() == [
# "long_name",
# "standard_name",
# "units",
# "conventions",
# "resolution",
# "_FillValue",
# "axis",
# ]
# assert (
# v["JULD"].long_name
# == "Julian day (UTC) of the station relative to REFERENCE_DATE_TIME"
# )
# assert v["JULD"].standard_name == "time"
# assert v["JULD"].units == "days since 1950-01-01 00:00:00 UTC"
# assert (
# v["JULD"].conventions
# == "Relative julian days with decimal part (as parts of day)"
# )
# assert v["JULD"].resolution == 0.0
# assert v["JULD"]._FillValue == 999999.0
# assert v["JULD"].axis == "T"
array = np.asarray(v["JULD"]).ravel()
# assert (array != v["JULD"]._FillValue).all()
append("time", np.array([datetime1950 + datetime.timedelta(days=x) for x in array], "datetime64[ns]").view(np.int64))
# assert v["JULD_LOCATION"].name == "JULD_LOCATION"
# assert v["JULD_LOCATION"].dimensions == ("N_PROF",)
# assert v["JULD_LOCATION"].dtype == np.dtype("float64")
# assert v["JULD_LOCATION"].ncattrs() == [
# "long_name",
# "units",
# "conventions",
# "resolution",
# "_FillValue",
# ]
# assert (
# v["JULD_LOCATION"].long_name
# == "Julian day (UTC) of the location relative to REFERENCE_DATE_TIME"
# )
# assert v["JULD_LOCATION"].units == "days since 1950-01-01 00:00:00 UTC"
# assert (
# v["JULD_LOCATION"].conventions
# == "Relative julian days with decimal part (as parts of day)"
# )
# assert v["JULD_LOCATION"].resolution == 0.0
# assert v["JULD_LOCATION"]._FillValue == 999999.0
array = np.asarray(v["JULD_LOCATION"]).ravel()
# assert (array != v["JULD_LOCATION"]._FillValue).all()
append("time_location", np.array([datetime1950 + datetime.timedelta(days=x) for x in array], "datetime64[ns]").view(np.int64))
# assert v["CYCLE_NUMBER"].name == "CYCLE_NUMBER"
# assert v["CYCLE_NUMBER"].dimensions == ("N_PROF",)
# assert v["CYCLE_NUMBER"].dtype == np.dtype("int32")
# assert v["CYCLE_NUMBER"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert v["CYCLE_NUMBER"].long_name == "Float cycle number"
# assert (
# v["CYCLE_NUMBER"].conventions
# == "0...N, 0 : launch cycle (if exists), 1 : first complete cycle"
# )
# assert v["CYCLE_NUMBER"]._FillValue == 99999
array = np.asarray(v["CYCLE_NUMBER"]).ravel()
# assert (array != v["CYCLE_NUMBER"]._FillValue).all()
append("cycle_number", array)
# assert v["LATITUDE"].name == "LATITUDE"
# assert v["LATITUDE"].dimensions == ("N_PROF",)
# assert v["LATITUDE"].dtype == np.dtype("float64")
# assert v["LATITUDE"].ncattrs() == [
# "long_name",
# "standard_name",
# "units",
# "_FillValue",
# "valid_min",
# "valid_max",
# "axis",
# ]
# assert v["LATITUDE"].long_name == "Latitude of the station, best estimate"
# assert v["LATITUDE"].standard_name == "latitude"
# assert v["LATITUDE"].units == "degree_north"
# assert v["LATITUDE"]._FillValue == 99999.0
# assert v["LATITUDE"].valid_min == -90.0
# assert v["LATITUDE"].valid_max == 90.0
# assert v["LATITUDE"].axis == "Y"
array = np.asarray(v["LATITUDE"]).ravel()
# assert (array != v["LATITUDE"]._FillValue).all()
append("latitude", array)
# assert v["LONGITUDE"].name == "LONGITUDE"
# assert v["LONGITUDE"].dimensions == ("N_PROF",)
# assert v["LONGITUDE"].dtype == np.dtype("float64")
# assert v["LONGITUDE"].ncattrs() == [
# "long_name",
# "standard_name",
# "units",
# "_FillValue",
# "valid_min",
# "valid_max",
# "axis",
# ]
# assert v["LONGITUDE"].long_name == "Longitude of the station, best estimate"
# assert v["LONGITUDE"].standard_name == "longitude"
# assert v["LONGITUDE"].units == "degree_east"
# assert v["LONGITUDE"]._FillValue == 99999.0
# assert v["LONGITUDE"].valid_min == -180.0
# assert v["LONGITUDE"].valid_max == 180.0
# assert v["LONGITUDE"].axis == "X"
array = np.asarray(v["LONGITUDE"]).ravel()
# assert (array != v["LONGITUDE"]._FillValue).all()
append("longitude", array)
# assert v["CONFIG_MISSION_NUMBER"].name == "CONFIG_MISSION_NUMBER"
# assert v["CONFIG_MISSION_NUMBER"].dimensions == ("N_PROF",)
# assert v["CONFIG_MISSION_NUMBER"].dtype == np.dtype("int32")
# assert v["CONFIG_MISSION_NUMBER"].ncattrs() == [
# "long_name",
# "conventions",
# "_FillValue",
# ]
# assert (
# v["CONFIG_MISSION_NUMBER"].long_name
# == "Unique number denoting the missions performed by the float"
# )
# assert v["CONFIG_MISSION_NUMBER"].conventions == "1...N, 1 : first complete mission"
# assert v["CONFIG_MISSION_NUMBER"]._FillValue == 99999
array = np.asarray(v["CONFIG_MISSION_NUMBER"]).ravel()
# assert (array != v["CONFIG_MISSION_NUMBER"]._FillValue).all()
append("config_mission_number", array)
# assert v["PRES"].name == "PRES"
# assert v["PRES"].dimensions == ("N_PROF", "N_LEVELS")
# assert v["PRES"].dtype == np.dtype("float32")
# assert v["PRES"].ncattrs() == [
# "long_name",
# "standard_name",
# "_FillValue",
# "units",
# "valid_min",
# "valid_max",
# "C_format",
# "FORTRAN_format",
# "resolution",
# "axis",
# ]
# assert v["PRES"].long_name == "Sea water pressure, equals 0 at sea-level"
# assert v["PRES"].standard_name == "sea_water_pressure"
# assert v["PRES"]._FillValue == 99999.0
# assert v["PRES"].units == "decibar"
# assert v["PRES"].valid_min == 0.0
# assert v["PRES"].valid_max == 12000.0
# assert v["PRES"].C_format == "%7.1f"
# assert v["PRES"].FORTRAN_format == "F7.1"
# assert v["PRES"].resolution == 1.0
# assert v["PRES"].axis == "Z"
array = np.array(v["PRES"], copy=True).ravel()
array[array == v["PRES"]._FillValue] = np.nan
append("pres", array)
# assert v["PRES_ADJUSTED"].name == "PRES_ADJUSTED"
# assert v["PRES_ADJUSTED"].dimensions == ("N_PROF", "N_LEVELS")
# assert v["PRES_ADJUSTED"].dtype == np.dtype("float32")
# assert v["PRES_ADJUSTED"].ncattrs() == [
# "long_name",
# "standard_name",
# "_FillValue",
# "units",
# "valid_min",
# "valid_max",
# "C_format",
# "FORTRAN_format",
# "resolution",
# "axis",
# ]
# assert v["PRES_ADJUSTED"].long_name == "Sea water pressure, equals 0 at sea-level"
# assert v["PRES_ADJUSTED"].standard_name == "sea_water_pressure"
# assert v["PRES_ADJUSTED"]._FillValue == 99999.0
# assert v["PRES_ADJUSTED"].units == "decibar"
# assert v["PRES_ADJUSTED"].valid_min == 0.0
# assert v["PRES_ADJUSTED"].valid_max == 12000.0
# assert v["PRES_ADJUSTED"].C_format == "%7.1f"
# assert v["PRES_ADJUSTED"].FORTRAN_format == "F7.1"
# assert v["PRES_ADJUSTED"].resolution == 1.0
# assert v["PRES_ADJUSTED"].axis == "Z"
array = np.array(v["PRES_ADJUSTED"], copy=True).ravel()
array[array == v["PRES_ADJUSTED"]._FillValue] = np.nan
append("pres_adjusted", array)
# assert v["PRES_ADJUSTED_ERROR"].name == "PRES_ADJUSTED_ERROR"
# assert v["PRES_ADJUSTED_ERROR"].dimensions == (
# "N_PROF",
# "N_LEVELS",
# )
# assert v["PRES_ADJUSTED_ERROR"].dtype == np.dtype("float32")
# assert v["PRES_ADJUSTED_ERROR"].ncattrs() == [
# "long_name",
# "_FillValue",
# "units",
# "C_format",
# "FORTRAN_format",
# "resolution",
# ]
# assert (
# v["PRES_ADJUSTED_ERROR"].long_name
# == "Contains the error on the adjusted values as determined by the delayed mode QC process"
# )
# assert v["PRES_ADJUSTED_ERROR"]._FillValue == 99999.0
# assert v["PRES_ADJUSTED_ERROR"].units == "decibar"
# assert v["PRES_ADJUSTED_ERROR"].C_format == "%7.1f"
# assert v["PRES_ADJUSTED_ERROR"].FORTRAN_format == "F7.1"
# assert v["PRES_ADJUSTED_ERROR"].resolution == 1.0
array = np.array(v["PRES_ADJUSTED_ERROR"], copy=True).ravel()
array[array == v["PRES_ADJUSTED_ERROR"]._FillValue] = np.nan
append("pres_adjusted_error", array)
# assert v["TEMP"].name == "TEMP"
# assert v["TEMP"].dimensions == ("N_PROF", "N_LEVELS")
# assert v["TEMP"].dtype == np.dtype("float32")
# assert v["TEMP"].ncattrs() == [
# "long_name",
# "standard_name",
# "_FillValue",
# "units",
# "valid_min",
# "valid_max",
# "C_format",
# "FORTRAN_format",
# "resolution",
# ]
# assert v["TEMP"].long_name == "Sea temperature in-situ ITS-90 scale"
# assert v["TEMP"].standard_name == "sea_water_temperature"
# assert v["TEMP"]._FillValue == 99999.0
# assert v["TEMP"].units == "degree_Celsius"
# assert v["TEMP"].valid_min == -2.5
# assert v["TEMP"].valid_max == 40.0
# assert v["TEMP"].C_format == "%9.3f"
# assert v["TEMP"].FORTRAN_format == "F9.3"
array = np.array(v["TEMP"], copy=True).ravel()
array[array == v["TEMP"]._FillValue] = np.nan
append("temp", array)
if "PSAL" in v:
# assert v["PSAL"].name == "PSAL"
# assert v["PSAL"].dimensions == ("N_PROF", "N_LEVELS")
# assert v["PSAL"].dtype == np.dtype("float32")
# assert v["PSAL"].ncattrs() == [
# "long_name",
# "standard_name",
# "_FillValue",
# "units",
# "valid_min",
# "valid_max",
# "C_format",
# "FORTRAN_format",
# "resolution",
# ]
# assert v["PSAL"].long_name == 'Practical salinity'
# assert v["PSAL"].standard_name == 'sea_water_salinity'
# assert v["PSAL"]._FillValue == 99999.0
# assert v["PSAL"].units == 'psu'
# assert v["PSAL"].valid_min == 2.0
# assert v["PSAL"].valid_max == 41.0
# assert v["PSAL"].C_format == '%9.3f'
# assert v["PSAL"].FORTRAN_format == 'F9.3'
array = np.array(v["PSAL"], copy=True).ravel()
array[array == v["PSAL"]._FillValue] = np.nan
append("psal", array)
else:
append("psal", np.full(len(array), np.nan, np.float32))
# assert v["TEMP_ADJUSTED"].name == "TEMP_ADJUSTED"
# assert v["TEMP_ADJUSTED"].dimensions == ("N_PROF", "N_LEVELS")
# assert v["TEMP_ADJUSTED"].dtype == np.dtype("float32")
# assert v["TEMP_ADJUSTED"].ncattrs() == [
# "long_name",
# "standard_name",
# "_FillValue",
# "units",
# "valid_min",
# "valid_max",
# "C_format",
# "FORTRAN_format",
# "resolution",
# ]
# assert v["TEMP_ADJUSTED"].long_name == "Sea temperature in-situ ITS-90 scale"
# assert v["TEMP_ADJUSTED"].standard_name == "sea_water_temperature"
# assert v["TEMP_ADJUSTED"]._FillValue == 99999.0
# assert v["TEMP_ADJUSTED"].units == "degree_Celsius"
# assert v["TEMP_ADJUSTED"].valid_min == -2.5
# assert v["TEMP_ADJUSTED"].valid_max == 40.0
# assert v["TEMP_ADJUSTED"].C_format == "%9.3f"
# assert v["TEMP_ADJUSTED"].FORTRAN_format == "F9.3"
array = np.array(v["TEMP_ADJUSTED"], copy=True).ravel()
array[array == v["TEMP_ADJUSTED"]._FillValue] = np.nan
append("temp_adjusted", array)
if "PSAL_ADJUSTED" in v:
# assert v["PSAL_ADJUSTED"].name == "PSAL_ADJUSTED"
# assert v["PSAL_ADJUSTED"].dimensions == ("N_PROF", "N_LEVELS")
# assert v["PSAL_ADJUSTED"].dtype == np.dtype("float32")
# assert v["PSAL_ADJUSTED"].ncattrs() == [
# "long_name",
# "standard_name",
# "_FillValue",
# "units",
# "valid_min",
# "valid_max",
# "C_format",
# "FORTRAN_format",
# "resolution",
# ]
# assert v["PSAL_ADJUSTED"].long_name == 'Practical salinity'
# assert v["PSAL_ADJUSTED"].standard_name == 'sea_water_salinity'
# assert v["PSAL_ADJUSTED"]._FillValue == 99999.0
# assert v["PSAL_ADJUSTED"].units == 'psu'
# assert v["PSAL_ADJUSTED"].valid_min == 2.0
# assert v["PSAL_ADJUSTED"].valid_max == 41.0
# assert v["PSAL_ADJUSTED"].C_format == "%9.3f"
# assert v["PSAL_ADJUSTED"].FORTRAN_format == "F9.3"
array = np.array(v["PSAL_ADJUSTED"], copy=True).ravel()
array[array == v["PSAL_ADJUSTED"]._FillValue] = np.nan
append("psal_adjusted", array)
else:
append("psal_adjusted", np.full(len(array), np.nan, np.float32))
# assert v["TEMP_ADJUSTED_ERROR"].name == "TEMP_ADJUSTED_ERROR"
# assert v["TEMP_ADJUSTED_ERROR"].dimensions == (
# "N_PROF",
# "N_LEVELS",
# )
# assert v["TEMP_ADJUSTED_ERROR"].dtype == np.dtype("float32")
# assert v["TEMP_ADJUSTED_ERROR"].ncattrs() == [
# "long_name",
# "_FillValue",
# "units",
# "C_format",
# "FORTRAN_format",
# "resolution",
# ]
# assert (
# v["TEMP_ADJUSTED_ERROR"].long_name
# == "Contains the error on the adjusted values as determined by the delayed mode QC process"
# )
# assert v["TEMP_ADJUSTED_ERROR"]._FillValue == 99999.0
# assert v["TEMP_ADJUSTED_ERROR"].units == "degree_Celsius"
# assert v["TEMP_ADJUSTED_ERROR"].C_format == "%9.3f"
# assert v["TEMP_ADJUSTED_ERROR"].FORTRAN_format == "F9.3"
array = np.array(v["TEMP_ADJUSTED_ERROR"], copy=True).ravel()
array[array == v["TEMP_ADJUSTED_ERROR"]._FillValue] = np.nan
append("temp_adjusted_error", array)
if "PSAL_ADJUSTED_ERROR" in v:
# assert v["PSAL_ADJUSTED_ERROR"].name == "PSAL_ADJUSTED_ERROR"
# assert v["PSAL_ADJUSTED_ERROR"].dimensions == (
# "N_PROF",
# "N_LEVELS",
# )
# assert v["PSAL_ADJUSTED_ERROR"].dtype == np.dtype("float32")
# assert v["PSAL_ADJUSTED_ERROR"].ncattrs() == [
# "long_name",
# "_FillValue",
# "units",
# "C_format",
# "FORTRAN_format",
# "resolution",
# ]
# assert (
# v["PSAL_ADJUSTED_ERROR"].long_name
# == "Contains the error on the adjusted values as determined by the delayed mode QC process"
# )
# assert v["PSAL_ADJUSTED_ERROR"]._FillValue == 99999.0
# assert v["PSAL_ADJUSTED_ERROR"].units == 'psu'
# assert v["PSAL_ADJUSTED_ERROR"].C_format == "%9.3f"
# assert v["PSAL_ADJUSTED_ERROR"].FORTRAN_format == "F9.3"
array = np.array(v["PSAL_ADJUSTED_ERROR"], copy=True).ravel()
array[array == v["PSAL_ADJUSTED_ERROR"]._FillValue] = np.nan
append("psal_adjusted_error", array)
else:
append("psal_adjusted_error", np.full(len(array), np.nan, np.float32))
for x in files.values():
x.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment