Skip to content

Instantly share code, notes, and snippets.

@piotr-florek-mohc
Created February 2, 2024 15:54
Show Gist options
  • Save piotr-florek-mohc/071256205f9e5f055f8b523a52694a01 to your computer and use it in GitHub Desktop.
Save piotr-florek-mohc/071256205f9e5f055f8b523a52694a01 to your computer and use it in GitHub Desktop.
utility functions
import os
import re
import json
from datetime import date
MIP_ERA = 'decadal'
DATASET_ROOT = '/data/users/hadlh/decxchg_data/files4sharing/'
MIP_TABLES_DIR = '/home/h04/pflorek/Documents/python/cdds_github/cmor_light/tables'
BASE_TIME_UNIT = 'days since 2000-1-1'
CV_VERSION = 'v1.0'
def parse_time_unit(time_unit=BASE_TIME_UNIT):
match = re.match(r'days since (\d{4})\-(\d{1,2})\-(\d{1,2})', time_unit)
if match:
return int(match.group(1)), int(match.group(2)), int(match.group(3))
def parse_filename(filename):
match = re.match(r"[a-z]+_(\w+)_(\w+)_\w+_([ripf\d\-]+)_(\w+)_climatology-(\d+)-(\d+)\.nc", filename)
mip_table = match.group(1)
institution_id = match.group(2)
ensemble_label = match.group(3)
grid_label = match.group(4)
climatology = match.group(5) + '-' + match.group(6)
match = re.match(r"r\d-\d+i(\d+)p(\d+)f(\d+)", ensemble_label)
initialization_index = match.group(1)
physics_index = match.group(2)
forcing_index = match.group(3)
return mip_table, institution_id, ensemble_label, grid_label, climatology, initialization_index, physics_index, forcing_index
def load_cv(mip_tables_dir, mip_era):
with open(os.path.join(mip_tables_dir, '{}_CV.json'.format(mip_era))) as f:
d = json.load(f)
return d
def load_mip_table(mip_tables_dir, mip_era, mip_table):
with open(os.path.join(mip_tables_dir, '{}_{}.json'.format(mip_era, mip_table))) as f:
d = json.load(f)
return d
def generate_dataset_info(mip_era, activity_id, calendar, ensemble_label, experiment_id, institution_id,
source_id, source_type='AOGCM', grid='grid', grid_label='gn', nominal_resolution='100 km',
physics_index=1, forcing_index=1, initialization_index=1, realization_index=None):
cv_json = load_cv(MIP_TABLES_DIR, mip_era)
version = cv_json['CV']['version_metadata']['CV_collection_version'].split(' ')[1]
experiment = cv_json['CV']['experiment_id'][experiment_id]
cv_json['CV']['grid_label'][grid_label] # not used, just checking it exists in the CV
institution = cv_json['CV']['institution_id'][institution_id]
license = cv_json['CV']['license'][0]
source = cv_json['CV']['source_id'][source_id]
if realization_index is None:
variant_label = ensemble_label
else:
variant_label = 'r{}i{}p{}f{}'.format(realization_index, initialization_index, physics_index, forcing_index)
dataset_info = {
"_AXIS_ENTRY_FILE": "{}_coordinate.json".format(mip_era),
"_FORMULA_VAR_FILE": "{}_formula_terms.json".format(mip_era),
"_controlled_vocabulary_file": "{}_CV.json".format(mip_era),
"activity_id": activity_id,
"calendar": calendar,
"cv_version": CV_VERSION,
"ensemble_label": ensemble_label,
"experiment": experiment['experiment'],
"experiment_id": experiment_id,
"forcing_index": forcing_index,
"grid": grid,
"grid_label": grid_label,
"initialization_index": initialization_index,
"institution": institution,
"institution_id": institution_id,
"license": license,
"mip_era": mip_era,
"nominal_resolution": nominal_resolution,
"outpath": ".",
"physics_index": physics_index,
"source": source['source'],
"source_id": source_id,
"source_type": source_type,
"variant_label": variant_label,
"output_file_template": '<variable_id><table><source_id><experiment_id><ensemble_label><grid_label>'
}
if realization_index is not None:
dataset_info.update({'realization_index': realization_index})
return dataset_info
def create_cmor_json_config(contents, filepath):
with open(filepath, 'w') as input_file_handle:
json.dump(contents, input_file_handle, sort_keys=True, indent=4)
return filepath
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment