[27]:
import roocs_utils
[28]:
dir(roocs_utils)
[28]:
['AreaParameter',
 'CONFIG',
 'CollectionParameter',
 'LevelParameter',
 'TimeParameter',
 '__author__',
 '__builtins__',
 '__cached__',
 '__contact__',
 '__copyright__',
 '__doc__',
 '__file__',
 '__license__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '__version__',
 'area_parameter',
 'base_parameter',
 'collection_parameter',
 'config',
 'exceptions',
 'get_config',
 'level_parameter',
 'parameter',
 'parameterise',
 'roocs_utils',
 'time_parameter',
 'utils',
 'xarray_utils']

Parameters

Parameters classes are used to parse inputs of collection, area, time and level used as arguments in the subsetting operation

The area values can be input as: * A string of comma separated values: “0.,49.,10.,65” * A sequence of strings: (“0”, “-10”, “120”, “40”) * A sequence of numbers: [0, 49.5, 10, 65]

[29]:
area = roocs_utils.AreaParameter("0.,49.,10.,65")

# the lat/lon bounds can be returned in a dictionary
print(area.asdict())

# the values can be returned as a tuple
print(area.tuple)
{'lon_bnds': (0.0, 10.0), 'lat_bnds': (49.0, 65.0)}
(0.0, 49.0, 10.0, 65.0)

A collection can be input as * A string of comma separated values: “cmip5.output1.INM.inmcm4.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga,cmip5.output1.MPI-M.MPI-ESM-LR.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga” * A sequence of strings: e.g. (“cmip5.output1.INM.inmcm4.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga”,“cmip5.output1.MPI-M.MPI-ESM-LR.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga”)

[30]:
collection = roocs_utils.CollectionParameter("cmip5.output1.INM.inmcm4.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga,cmip5.output1.MPI-M.MPI-ESM-LR.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga")

# the collection ids can be returned as a tuple
print(collection.tuple)
('cmip5.output1.INM.inmcm4.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga', 'cmip5.output1.MPI-M.MPI-ESM-LR.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga')

Level can be input as: * A string of slash separated values: “1000/2000” * A sequence of strings: e.g. (“1000.50”, “2000.60”) A sequence of numbers: e.g. (1000.50, 2000.60)

Level inputs should be a range of the levels you want to subset over

[31]:
level = roocs_utils.LevelParameter((1000.50, 2000.60))

# the first and last level in the range provided can be returned in a dictionary
print(level.asdict())

# the values can be returned as a tuple
print(level.tuple)
{'first_level': 1000.5, 'last_level': 2000.6}
(1000.5, 2000.6)

Time can be input as: * A string of slash separated values: “2085-01-01T12:00:00Z/2120-12-30T12:00:00Z” * A sequence of strings: e.g. (“2085-01-01T12:00:00Z”, “2120-12-30T12:00:00Z”)

Time inputs should be the start and end of the time range you want to subset over

[32]:
time = roocs_utils.TimeParameter("2085-01-01T12:00:00Z/2120-12-30T12:00:00Z")

# the first and last time in the range provided can be returned in a dictionary
print(time.asdict())

# the values can be returned as a tuple
print(time.tuple)
{'start_time': '2085-01-01T12:00:00+00:00', 'end_time': '2120-12-30T12:00:00+00:00'}
('2085-01-01T12:00:00+00:00', '2120-12-30T12:00:00+00:00')

Parameterise parameterises inputs to instances of parameter classes which allows them to be used throughout roocs.

[33]:
roocs_utils.parameter.parameterise("cmip5.output1.INM.inmcm4.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga", "0.,49.,10.,65", (1000.50, 2000.60), "2085-01-01T12:00:00Z/2120-12-30T12:00:00Z")
[33]:
{'collection': Datasets to analyse:
 cmip5.output1.INM.inmcm4.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga,
 'area': Area to subset over:
  (0.0, 49.0, 10.0, 65.0),
 'level': Level range to subset over
  first_level: 1000.5
  last_level: 2000.6,
 'time': Time period to subset over
  start time: 2085-01-01T12:00:00+00:00
  end time: 2120-12-30T12:00:00+00:00}

Xarray utils

Xarray utils can bu used to identify the main variable in a dataset as well as idnetifying the type of a coordinate or returning a coordinate based on an attribute or a type

[34]:
from roocs_utils.xarray_utils import xarray_utils as xu
import xarray as xr
[35]:
ds = xr.open_mfdataset("../tests/mini-esgf-data/test_data/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", use_cftime=True, combine="by_coords")
[36]:
# find the main variable of the dataset
main_var = xu.get_main_variable(ds)

print("main var =", main_var)

ds[main_var]
main var = tas
[36]:
<xarray.DataArray 'tas' (time: 3530, lat: 2, lon: 2)>
dask.array<concatenate, shape=(3530, 2, 2), dtype=float32, chunksize=(300, 2, 2), chunktype=numpy.ndarray>
Coordinates:
    height   float64 1.5
  * lat      (lat) float64 -90.0 35.0
  * lon      (lon) float64 0.0 187.5
  * time     (time) object 2005-12-16 00:00:00 ... 2299-12-16 00:00:00
Attributes:
    standard_name:     air_temperature
    long_name:         Near-Surface Air Temperature
    comment:           near-surface (usually, 2 meter) air temperature.
    units:             K
    original_name:     mo: m01s03i236
    cell_methods:      time: mean
    cell_measures:     area: areacella
    history:           2010-12-04T13:50:30Z altered by CMOR: Treated scalar d...
    associated_files:  baseURL: http://cmip-pcmdi.llnl.gov/CMIP5/dataLocation...
[37]:
# to get the coord types

for coord in ds.coords:
    print("\ncoord name =", coord, "\ncoord type =", xu.get_coord_type(ds[coord]))

print("\n There is a level, time, latitude and longitude coordinate in this dataset")

coord name = height
coord type = level

coord name = lat
coord type = latitude

coord name = lon
coord type = longitude

coord name = time
coord type = time

 There is a level, time, latitude and longitude coordinate in this dataset
[38]:
# to check the type of a coord

print(xu.is_level(ds["height"]))
print(xu.is_latitude(ds["lon"]))
True
None
[39]:
# to find a coordinate of a specific type

print("time =", xu.get_coord_by_type(ds, "time"))

# to find the level coordinate,set ignore_aux_coords to False

print("\nlevel =", xu.get_coord_by_type(ds, "level", ignore_aux_coords=False))
time = <xarray.DataArray 'time' (time: 3530)>
array([cftime.Datetime360Day(2005, 12, 16, 0, 0, 0, 0),
       cftime.Datetime360Day(2006, 1, 16, 0, 0, 0, 0),
       cftime.Datetime360Day(2006, 2, 16, 0, 0, 0, 0), ...,
       cftime.Datetime360Day(2299, 10, 16, 0, 0, 0, 0),
       cftime.Datetime360Day(2299, 11, 16, 0, 0, 0, 0),
       cftime.Datetime360Day(2299, 12, 16, 0, 0, 0, 0)], dtype=object)
Coordinates:
    height   float64 1.5
  * time     (time) object 2005-12-16 00:00:00 ... 2299-12-16 00:00:00
Attributes:
    bounds:         time_bnds
    axis:           T
    long_name:      time
    standard_name:  time

level = <xarray.DataArray 'height' ()>
array(1.5)
Coordinates:
    height   float64 1.5
Attributes:
    units:          m
    axis:           Z
    positive:       up
    long_name:      height
    standard_name:  height
[40]:
# to find a coordinate based on an attribute you expect it to have

xu.get_coord_by_attr(ds, "standard_name", "latitude")
[40]:
<xarray.DataArray 'lat' (lat: 2)>
array([-90.,  35.])
Coordinates:
    height   float64 1.5
  * lat      (lat) float64 -90.0 35.0
Attributes:
    bounds:         lat_bnds
    units:          degrees_north
    axis:           Y
    long_name:      latitude
    standard_name:  latitude

Other utilities

Other utilities allow parsing a memory size of any unit into bytes and converting a time object into an ISO 8601 string

[41]:
from roocs_utils.utils.common import parse_size
from roocs_utils.utils.time_utils import to_isoformat
from datetime import datetime
[42]:
# to parse a size into bytes
size = '50MiB'
size_in_b = parse_size(size)
size_in_b
[42]:
52428800.0
[43]:
# to convert a time object into a time string
time = datetime(2005, 7, 14, 12, 30)
time_str = to_isoformat(time)
time_str
[43]:
'2005-07-14T12:30:00'