Skip to content

API reference

earthcarekit.utils.xarray

Utilities based on xarray.

Notes

This module does not depend on other internal modules.


concat_datasets

concat_datasets(ds1: Dataset, ds2: Dataset, dim: str) -> Dataset

Concatenate two xarray.Dataset objects along a specified dimension, padding other dimensions to match.

Pads all non-concatenation dimensions in both datasets to the maximum size among them (if they differ) before concatenating. Integer variables are padded with -9999 or data type-specific minimum value (e.g., -128 for int8), non-interger variables are padded with NaN.

Parameters:

Name Type Description Default
ds1 Dataset

The first dataset to concatenate.

required
ds2 Dataset

The second dataset to concatenate.

required
dim str

The name of the dimension to concatenate along.

required

Returns:

Name Type Description
Dataset Dataset

A new dataset resulting from the concatenation.

Source code in earthcarekit/utils/xarray/_concat.py
def concat_datasets(ds1: Dataset, ds2: Dataset, dim: str) -> Dataset:
    """Concatenate two `xarray.Dataset` objects along a specified dimension, padding other dimensions to match.

    Pads all non-concatenation dimensions in both datasets to the maximum size among them
    (if they differ) before concatenating. Integer variables are padded with -9999 or data
    type-specific minimum value (e.g., -128 for int8), non-interger variables are padded with NaN.

    Args:
        ds1 (Dataset): The first dataset to concatenate.
        ds2 (Dataset): The second dataset to concatenate.
        dim (str): The name of the dimension to concatenate along.

    Returns:
        Dataset: A new dataset resulting from the concatenation.
    """

    def get_scalars(ds: xr.Dataset) -> list:
        scalars = [k for k, v in ds.data_vars.items() if v.ndim == 0]
        return scalars

    ds1_scalars = get_scalars(ds1)
    ds2_scalars = get_scalars(ds2)
    scalar_vars: list = list(set(ds1_scalars + ds2_scalars))

    scalar_data: dict = {v: [] for v in scalar_vars}
    for v in scalar_vars:
        if v in ds1:
            scalar_data[v].extend(np.atleast_1d(ds1[v].values))
        if v in ds2:
            scalar_data[v].extend(np.atleast_1d(ds2[v].values))

    max_dim_sizes = {
        d: max(ds1.sizes.get(d, 0), ds2.sizes.get(d, 0))
        for d in set(ds1.dims).union(ds2.dims)
        if d != dim
    }

    ds1_padded = pad_dataset(ds1, max_dim_sizes)
    ds2_padded = pad_dataset(ds2, max_dim_sizes)

    ds_combined = xr.concat([ds1_padded, ds2_padded], dim=dim, data_vars="all")

    if "concat_dim" in ds_combined.dims:
        ds_combined = ds_combined.drop_dims("concat_dim", errors="ignore")

    for v in scalar_vars:
        da = xr.DataArray(scalar_data[v], dims=["concat_dim"])
        ds_combined[v] = da

    source1 = ds1.encoding.get("source")
    source2 = ds2.encoding.get("source")
    sources = [s for s in [source1, source2] if isinstance(s, str)]

    if len(sources) > 0:
        ds_combined.encoding["sources"] = sources

    return ds_combined

convert_scalar_var_to_str

convert_scalar_var_to_str(ds: Dataset, var: str) -> Dataset

Converts a given scalar variable inside a xarray.Dataset to string.

Source code in earthcarekit/utils/xarray/_scalars.py
def convert_scalar_var_to_str(ds: xr.Dataset, var: str) -> xr.Dataset:
    """Converts a given scalar variable inside a `xarray.Dataset` to string."""
    val = ds[var].item()
    if isinstance(val, bytes):
        val = val.decode("utf-8")
    else:
        val = str(val)
    ds[var] = xr.DataArray(val)
    return ds

demote_coords

demote_coords(ds: Dataset, var: str, new_dim_name: str) -> Dataset

Converts a coordinate to a variable and renames the related dimension.

Source code in earthcarekit/utils/xarray/_demote_coordinate_dimension.py
def demote_coords(
    ds: xr.Dataset,
    var: str,
    new_dim_name: str,
) -> xr.Dataset:
    """Converts a coordinate to a variable and renames the related dimension."""
    if var in ds.coords:
        values = ds.coords[var].values
        _tmp_var = "tmp_var____"
        ds = ds.assign({_tmp_var: (var, values)})
        ds[_tmp_var] = ds[_tmp_var].assign_attrs(ds[var].attrs)
        ds = ds.drop_vars([var])
        ds = ds.rename({var: new_dim_name})
        ds = ds.rename_vars({_tmp_var: var})
    return ds

insert_var

insert_var(
    ds: Dataset,
    var: str,
    data: Any,
    index: int | None = None,
    before_var: str | None = None,
    after_var: str | None = None,
) -> Dataset

Inserts a new variable in a xarray.Dataset before or after a given variable or at a given index.

Parameters:

Name Type Description Default
ds Dataset

The original dataset to which the variable will be added.

required
var str

Name of the new variable to be added.

required
data Any

Data stored in the new variable.

required
index int | None

Index at which the new variable will be added. Will be ignored when either before_var or after_var are given and valid. Defaults to None.

None
before_var str | None

Name of the variable before which the new variable should be inserted. Defaults to None.

None
after_var str | None

Name of the variable after which the new variable should be inserted. Will be ignored when before_var is given and valid. Defaults to None.

None

Returns:

Name Type Description
Dataset Dataset

The original dataset with the new variable inserted.

Source code in earthcarekit/utils/xarray/_insert_var.py
def insert_var(
    ds: Dataset,
    var: str,
    data: Any,
    index: int | None = None,
    before_var: str | None = None,
    after_var: str | None = None,
) -> Dataset:
    """
    Inserts a new variable in a `xarray.Dataset` before or after a given variable or at a given index.

    Args:
        ds (Dataset):
            The original dataset to which the variable will be added.
        var (str):
            Name of the new variable to be added.
        data (Any):
            Data stored in the new variable.
        index (int | None, optional):
            Index at which the new variable will be added. Will be ignored when either `before_var` or
            `after_var` are given and valid. Defaults to None.
        before_var (str | None, optional):
            Name of the variable before which the new variable should be inserted. Defaults to None.
        after_var (str | None, optional):
            Name of the variable after which the new variable should be inserted. Will be ignored
            when `before_var` is given and valid. Defaults to None.

    Returns:
        Dataset: The original dataset with the new variable inserted.
    """
    if var in ds.data_vars:
        ds = ds.drop_vars(var)

    if isinstance(index, int) or isinstance(before_var, str) or isinstance(after_var, str):
        vars = list(ds.data_vars)

        if isinstance(before_var, str) and before_var in vars:
            index = vars.index(before_var)
        elif isinstance(after_var, str) and after_var in vars:
            index = vars.index(after_var) + 1
        elif not isinstance(index, int):
            index = len(vars)

        vars.insert(index, var)

        ds[var] = data
        ds = ds[vars]
    else:
        ds[var] = data

    return ds

merge_datasets

merge_datasets(ds1: Dataset, ds2: Dataset, keep_sec: bool = False) -> Dataset

Merges two datasets while keeping all global attributes from one dataset.

Source code in earthcarekit/utils/xarray/_merge.py
def merge_datasets(
    ds1: xr.Dataset,
    ds2: xr.Dataset,
    keep_sec: bool = False,
) -> xr.Dataset:
    """Merges two datasets while keeping all global attributes from one dataset."""
    ds_merged = xr.merge([ds1, ds2])
    if keep_sec:
        ds_merged.attrs = ds2.attrs.copy()
        ds_merged.encoding = ds2.encoding.copy()
    else:
        ds_merged.attrs = ds1.attrs.copy()
        ds_merged.encoding = ds1.encoding.copy()
    return ds_merged

remove_dims

remove_dims(ds: Dataset, dims_to_remove: list[str]) -> Dataset

Drop a list of dimensions and all associated variables and coordinates from a given xarray.dataset.

Source code in earthcarekit/utils/xarray/_delete.py
def remove_dims(ds: xr.Dataset, dims_to_remove: list[str]) -> xr.Dataset:
    """Drop a list of dimensions and all associated variables and coordinates from a given `xarray.dataset`."""
    vars_to_drop = [
        var for var in ds.variables if any(dim in ds[var].dims for dim in dims_to_remove)
    ]
    coords_to_drop = [
        coord for coord in ds.coords if any(dim in ds[coord].dims for dim in dims_to_remove)
    ]

    ds_dropped = ds.drop_vars(vars_to_drop + coords_to_drop, errors="ignore")

    for dim in dims_to_remove:
        if dim in ds_dropped.dims:
            ds_dropped = ds_dropped.drop_dims(dim)

    return ds_dropped