Skip to content

Commit bbd307d

Browse files
Cache regridding weights if possible (#2344)
Co-authored-by: Valeriu Predoi <valeriu.predoi@gmail.com>
1 parent 6cf32c7 commit bbd307d

File tree

6 files changed

+383
-181
lines changed

6 files changed

+383
-181
lines changed

doc/recipe/preprocessor.rst

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -780,10 +780,6 @@ regridding is based on the horizontal grid of another cube (the reference
780780
grid). If the horizontal grids of a cube and its reference grid are sufficiently
781781
the same, regridding is automatically and silently skipped for performance reasons.
782782

783-
The underlying regridding mechanism in ESMValCore uses
784-
:obj:`iris.cube.Cube.regrid`
785-
from Iris.
786-
787783
The use of the horizontal regridding functionality is flexible depending on
788784
what type of reference grid and what interpolation scheme is preferred. Below
789785
we show a few examples.
@@ -821,7 +817,7 @@ cell specification is oftentimes used when operating on localized data.
821817
target_grid: 2.5x2.5
822818
scheme: nearest
823819
824-
In this case the ``NearestNeighbour`` interpolation scheme is used (see below
820+
In this case the nearest-neighbor interpolation scheme is used (see below
825821
for scheme definitions).
826822

827823
When using a ``MxN`` type of grid it is possible to offset the grid cell
@@ -917,9 +913,6 @@ Built-in regridding schemes
917913
:class:`~esmvalcore.preprocessor.regrid_schemes.ESMPyAreaWeighted`.
918914
Source data on an unstructured grid is not supported, yet.
919915

920-
See also :func:`esmvalcore.preprocessor.regrid`
921-
922-
923916
.. _generic regridding schemes:
924917

925918
Generic regridding schemes
@@ -1017,6 +1010,37 @@ scheme available in :doc:`iris-esmf-regrid:index`:
10171010
reference: esmf_regrid.schemes:regrid_rectilinear_to_rectilinear
10181011
mdtol: 0.7
10191012
1013+
.. _caching_regridding_weights:
1014+
1015+
Reusing regridding weights
1016+
--------------------------
1017+
1018+
If desired, regridding weights can be cached to reduce run times (see `here
1019+
<https://scitools-iris.readthedocs.io/en/latest/userguide/interpolation_and_regridding.html#caching-a-regridder>`__
1020+
for technical details on this).
1021+
This can speed up the regridding of different datasets with similar source and
1022+
target grids massively, but may take up a lot of memory for extremely
1023+
high-resolution data.
1024+
By default, this feature is disabled; to enable it, use the option
1025+
``cache_weights: true`` in the preprocessor definition:
1026+
1027+
.. code-block:: yaml
1028+
1029+
preprocessors:
1030+
regrid_preprocessor:
1031+
regrid:
1032+
target_grid: 0.1x0.1
1033+
scheme: linear
1034+
cache_weights: true
1035+
1036+
Not all regridding schemes support weights caching. An overview of those that
1037+
do is given `here
1038+
<https://scitools-iris.readthedocs.io/en/latest/further_topics/which_regridder_to_use.html#which-regridder-to-use>`__
1039+
and in the docstrings :ref:`here <regridding_schemes>`.
1040+
1041+
See also :func:`esmvalcore.preprocessor.regrid`
1042+
1043+
10201044
.. _ensemble statistics:
10211045

10221046
Ensemble statistics

esmvalcore/preprocessor/_regrid.py

Lines changed: 92 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -500,8 +500,7 @@ def _get_target_grid_cube(
500500
elif isinstance(target_grid, (str, Path)) and os.path.isfile(target_grid):
501501
target_grid_cube = iris.load_cube(target_grid)
502502
elif isinstance(target_grid, str):
503-
# Generate a target grid from the provided cell-specification,
504-
# and cache the resulting stock cube for later use.
503+
# Generate a target grid from the provided cell-specification
505504
target_grid_cube = _global_stock_cube(
506505
target_grid, lat_offset, lon_offset
507506
)
@@ -639,12 +638,82 @@ def _load_generic_scheme(scheme: dict):
639638
return loaded_scheme
640639

641640

641+
_CACHED_REGRIDDERS: dict[tuple, dict] = {}
642+
643+
644+
def _get_regridder(
645+
src_cube: Cube,
646+
tgt_cube: Cube,
647+
scheme: str | dict,
648+
cache_weights: bool,
649+
):
650+
"""Get regridder to actually perform regridding.
651+
652+
Note
653+
----
654+
If possible, this uses an existing regridder to reduce runtime (see also
655+
https://scitools-iris.readthedocs.io/en/latest/userguide/
656+
interpolation_and_regridding.html#caching-a-regridder.)
657+
658+
"""
659+
# (1) Weights caching enabled
660+
if cache_weights:
661+
# To search for a matching regridder in the cache, first check the
662+
# regridding scheme name and shapes of source and target coordinates.
663+
# Only if these match, check coordinates themselves (this is much more
664+
# expensive).
665+
coord_key = _get_coord_key(src_cube, tgt_cube)
666+
name_shape_key = _get_name_and_shape_key(src_cube, tgt_cube, scheme)
667+
if name_shape_key in _CACHED_REGRIDDERS:
668+
# We cannot simply do a test for `coord_key in
669+
# _CACHED_REGRIDDERS[shape_key]` below since the hash() of a
670+
# coordinate is simply its id() (thus, coordinates loaded from two
671+
# different files would never be considered equal)
672+
for (key, regridder) in _CACHED_REGRIDDERS[name_shape_key].items():
673+
if key == coord_key:
674+
return regridder
675+
676+
# Regridder is not in cached -> return a new one and cache it
677+
loaded_scheme = _load_scheme(src_cube, scheme)
678+
regridder = loaded_scheme.regridder(src_cube, tgt_cube)
679+
_CACHED_REGRIDDERS.setdefault(name_shape_key, {})
680+
_CACHED_REGRIDDERS[name_shape_key][coord_key] = regridder
681+
682+
# (2) Weights caching disabled
683+
else:
684+
loaded_scheme = _load_scheme(src_cube, scheme)
685+
regridder = loaded_scheme.regridder(src_cube, tgt_cube)
686+
687+
return regridder
688+
689+
690+
def _get_coord_key(src_cube: Cube, tgt_cube: Cube) -> tuple:
691+
"""Get dict key from coordinates."""
692+
src_lat = src_cube.coord('latitude')
693+
src_lon = src_cube.coord('longitude')
694+
tgt_lat = tgt_cube.coord('latitude')
695+
tgt_lon = tgt_cube.coord('longitude')
696+
return (src_lat, src_lon, tgt_lat, tgt_lon)
697+
698+
699+
def _get_name_and_shape_key(
700+
src_cube: Cube,
701+
tgt_cube: Cube,
702+
scheme: str | dict,
703+
) -> tuple:
704+
"""Get dict key from scheme name and coordinate shapes."""
705+
name = str(scheme)
706+
shapes = [c.shape for c in _get_coord_key(src_cube, tgt_cube)]
707+
return (name, *shapes)
708+
709+
642710
def regrid(
643711
cube: Cube,
644712
target_grid: Cube | Dataset | Path | str | dict,
645713
scheme: str | dict,
646714
lat_offset: bool = True,
647715
lon_offset: bool = True,
716+
cache_weights: bool = False,
648717
) -> Cube:
649718
"""Perform horizontal regridding.
650719
@@ -691,6 +760,14 @@ def regrid(
691760
Offset the grid centers of the longitude coordinate w.r.t. Greenwich
692761
meridian by half a grid step. This argument is ignored if
693762
`target_grid` is a cube or file.
763+
cache_weights:
764+
If ``True``, cache regridding weights for later usage. This can speed
765+
up the regridding of different datasets with similar source and target
766+
grids massively, but may take up a lot of memory for extremely
767+
high-resolution data. This option is ignored for schemes that do not
768+
support weights caching. More details on this are given in the section
769+
on :ref:`caching_regridding_weights`. To clear the cache, use
770+
:func:`esmvalcore.preprocessor.regrid.cache_clear`.
694771
695772
Returns
696773
-------
@@ -757,16 +834,26 @@ def regrid(
757834
)
758835
return cube
759836

760-
# Load scheme, rechunk and regrid
837+
# Load scheme and reuse existing regridder if possible
761838
if isinstance(scheme, str):
762839
scheme = scheme.lower()
763-
loaded_scheme = _load_scheme(cube, scheme)
840+
regridder = _get_regridder(cube, target_grid_cube, scheme, cache_weights)
841+
842+
# Rechunk and actually perform the regridding
764843
cube = _rechunk(cube, target_grid_cube)
765-
cube = cube.regrid(target_grid_cube, loaded_scheme)
844+
cube = regridder(cube)
766845

767846
return cube
768847

769848

849+
def _cache_clear():
850+
"""Clear regridding weights cache."""
851+
_CACHED_REGRIDDERS.clear()
852+
853+
854+
regrid.cache_clear = _cache_clear # type: ignore
855+
856+
770857
def _rechunk(cube: Cube, target_grid: Cube) -> Cube:
771858
"""Re-chunk cube with optimal chunk sizes for target grid."""
772859
if not cube.has_lazy_data() or cube.ndim < 3:

esmvalcore/preprocessor/_regrid_esmpy.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
class ESMPyRegridder:
4444
"""General ESMPy regridder.
4545
46+
Does not support lazy regridding nor weights caching.
47+
4648
Parameters
4749
----------
4850
src_cube:

esmvalcore/preprocessor/regrid_schemes.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131
class GenericRegridder:
3232
r"""Generic function regridder.
3333
34+
Does support lazy regridding if `func` does. Does not support weights
35+
caching.
36+
3437
Parameters
3538
----------
3639
src_cube:

0 commit comments

Comments
 (0)