Created
February 2, 2024 15:54
-
-
Save piotr-florek-mohc/071256205f9e5f055f8b523a52694a01 to your computer and use it in GitHub Desktop.
utility functions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
import json | |
from datetime import date | |
MIP_ERA = 'decadal' | |
DATASET_ROOT = '/data/users/hadlh/decxchg_data/files4sharing/' | |
MIP_TABLES_DIR = '/home/h04/pflorek/Documents/python/cdds_github/cmor_light/tables' | |
BASE_TIME_UNIT = 'days since 2000-1-1' | |
CV_VERSION = 'v1.0' | |
def parse_time_unit(time_unit=BASE_TIME_UNIT): | |
match = re.match(r'days since (\d{4})\-(\d{1,2})\-(\d{1,2})', time_unit) | |
if match: | |
return int(match.group(1)), int(match.group(2)), int(match.group(3)) | |
def parse_filename(filename): | |
match = re.match(r"[a-z]+_(\w+)_(\w+)_\w+_([ripf\d\-]+)_(\w+)_climatology-(\d+)-(\d+)\.nc", filename) | |
mip_table = match.group(1) | |
institution_id = match.group(2) | |
ensemble_label = match.group(3) | |
grid_label = match.group(4) | |
climatology = match.group(5) + '-' + match.group(6) | |
match = re.match(r"r\d-\d+i(\d+)p(\d+)f(\d+)", ensemble_label) | |
initialization_index = match.group(1) | |
physics_index = match.group(2) | |
forcing_index = match.group(3) | |
return mip_table, institution_id, ensemble_label, grid_label, climatology, initialization_index, physics_index, forcing_index | |
def load_cv(mip_tables_dir, mip_era): | |
with open(os.path.join(mip_tables_dir, '{}_CV.json'.format(mip_era))) as f: | |
d = json.load(f) | |
return d | |
def load_mip_table(mip_tables_dir, mip_era, mip_table): | |
with open(os.path.join(mip_tables_dir, '{}_{}.json'.format(mip_era, mip_table))) as f: | |
d = json.load(f) | |
return d | |
def generate_dataset_info(mip_era, activity_id, calendar, ensemble_label, experiment_id, institution_id, | |
source_id, source_type='AOGCM', grid='grid', grid_label='gn', nominal_resolution='100 km', | |
physics_index=1, forcing_index=1, initialization_index=1, realization_index=None): | |
cv_json = load_cv(MIP_TABLES_DIR, mip_era) | |
version = cv_json['CV']['version_metadata']['CV_collection_version'].split(' ')[1] | |
experiment = cv_json['CV']['experiment_id'][experiment_id] | |
cv_json['CV']['grid_label'][grid_label] # not used, just checking it exists in the CV | |
institution = cv_json['CV']['institution_id'][institution_id] | |
license = cv_json['CV']['license'][0] | |
source = cv_json['CV']['source_id'][source_id] | |
if realization_index is None: | |
variant_label = ensemble_label | |
else: | |
variant_label = 'r{}i{}p{}f{}'.format(realization_index, initialization_index, physics_index, forcing_index) | |
dataset_info = { | |
"_AXIS_ENTRY_FILE": "{}_coordinate.json".format(mip_era), | |
"_FORMULA_VAR_FILE": "{}_formula_terms.json".format(mip_era), | |
"_controlled_vocabulary_file": "{}_CV.json".format(mip_era), | |
"activity_id": activity_id, | |
"calendar": calendar, | |
"cv_version": CV_VERSION, | |
"ensemble_label": ensemble_label, | |
"experiment": experiment['experiment'], | |
"experiment_id": experiment_id, | |
"forcing_index": forcing_index, | |
"grid": grid, | |
"grid_label": grid_label, | |
"initialization_index": initialization_index, | |
"institution": institution, | |
"institution_id": institution_id, | |
"license": license, | |
"mip_era": mip_era, | |
"nominal_resolution": nominal_resolution, | |
"outpath": ".", | |
"physics_index": physics_index, | |
"source": source['source'], | |
"source_id": source_id, | |
"source_type": source_type, | |
"variant_label": variant_label, | |
"output_file_template": '<variable_id><table><source_id><experiment_id><ensemble_label><grid_label>' | |
} | |
if realization_index is not None: | |
dataset_info.update({'realization_index': realization_index}) | |
return dataset_info | |
def create_cmor_json_config(contents, filepath): | |
with open(filepath, 'w') as input_file_handle: | |
json.dump(contents, input_file_handle, sort_keys=True, indent=4) | |
return filepath |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment