diff --git a/docs/_code/prepare_hef.py b/docs/_code/prepare_hef.py index 297b75dad..e680d63bd 100644 --- a/docs/_code/prepare_hef.py +++ b/docs/_code/prepare_hef.py @@ -46,7 +46,7 @@ df = workflow.calibrate_inversion_from_consensus(gdir, volume_m3_reference=refv) np.testing.assert_allclose(refv, df.vol_oggm_m3, rtol=0.01) -cl = gdir.read_pickle('inversion_input')[-1] +cl = gdir.read_store('inversion_input')[-1] mbmod = massbalance.ConstantMassBalance(gdir, y0=1985) mbx = mbmod.get_annual_mb(cl['hgt']) * cfg.SEC_IN_YEAR * cfg.PARAMS['ice_density'] fdf = pd.DataFrame(index=np.arange(len(mbx))*cl['dx']) diff --git a/docs/api.rst b/docs/api.rst index f0be130d0..5e26b9821 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -476,7 +476,7 @@ level 5. prepro_border=80) gdir = gdirs[0] # init_glacier_directories always returns a list - fls = gdir.read_pickle('model_flowlines') + fls = gdir.read_store('model_flowlines') fls [fl.order for fl in fls] diff --git a/oggm/cfg.py b/oggm/cfg.py index 62376badd..5a009f18a 100644 --- a/oggm/cfg.py +++ b/oggm/cfg.py @@ -300,6 +300,10 @@ def _log_param_change(self, key, value): _doc = "The outlines of this glacier complex sub-entities (for RGI7C only!)." BASENAMES['complex_sub_entities'] = ('complex_sub_entities.shp', _doc) +_doc = "A zarr store containing previous pickled data." +BASENAMES['data_store'] = ('data_store.zarr', _doc) + + def set_logging_config(logging_level='INFO'): """Set the global logger parameters. diff --git a/oggm/cli/benchmark.py b/oggm/cli/benchmark.py index 227ef2a3c..ba96d7e57 100644 --- a/oggm/cli/benchmark.py +++ b/oggm/cli/benchmark.py @@ -73,8 +73,9 @@ def run_benchmark(rgi_version=None, rgi_reg=None, border=None, # Initialize OGGM and set up the run parameters cfg.initialize(logging_level=logging_level, params=override_params) - # Use multiprocessing? - cfg.PARAMS['use_multiprocessing'] = platform.system() != 'Darwin' + # Allow multiprocessing override in tests/platform + if 'use_multiprocessing' not in override_params: + cfg.PARAMS['use_multiprocessing'] = platform.system() != 'Darwin' # How many grid points around the glacier? # Make it large if you expect your glaciers to grow large diff --git a/oggm/cli/prepro_levels.py b/oggm/cli/prepro_levels.py index 891f455a4..e52c10579 100644 --- a/oggm/cli/prepro_levels.py +++ b/oggm/cli/prepro_levels.py @@ -392,27 +392,32 @@ def _time_log(): # Try to avoid concurrency if rgi_version == '70C': - fp = file_downloader('https://cluster.klima.uni-bremen.de/~oggm/' - 'ref_mb_params/oggm_v1.6/inv_rgi7/' - 'rgi7c_rgi_year_2025.1.csv') - rgi_year_by_id = pd.read_csv(fp, index_col=0)['rgi_year'].astype(int).astype(str) - rgidf['src_date'] = rgidf['rgi_id'].map(rgi_year_by_id) + '-01-01 00:00:00' + from oggm.utils._downloads import get_lock + with get_lock(): + fp = file_downloader('https://cluster.klima.uni-bremen.de/~oggm/' + 'ref_mb_params/oggm_v1.6/inv_rgi7/' + 'rgi7c_rgi_year_2025.1.csv') + rgi_year_by_id = pd.read_csv(fp, index_col=0)['rgi_year'].astype(int).astype(str) + rgidf['src_date'] = rgidf['rgi_id'].map(rgi_year_by_id) + '-01-01 00:00:00' # Add a new default source if not dem_source: fs_url = 'https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.6/rgitopo/2025.4/' if rgi_version == '62': - fs = utils.file_downloader(fs_url + 'chosen_dem_RGI62_20251029.csv') - dfs = pd.read_csv(fs, index_col=0) - rgidf['dem_source'] = dfs.loc[rgidf['RGIId'], 'dem_source'].values + with get_lock(): + fs = utils.file_downloader(fs_url + 'chosen_dem_RGI62_20251029.csv') + dfs = pd.read_csv(fs, index_col=0) + rgidf['dem_source'] = dfs.loc[rgidf['RGIId'], 'dem_source'].values if rgi_version == '70G': - fs = utils.file_downloader(fs_url + 'chosen_dem_RGI70G_20251029.csv') - dfs = pd.read_csv(fs, index_col=0) - rgidf['dem_source'] = dfs.loc[rgidf['rgi_id'], 'dem_source'].values + with get_lock(): + fs = utils.file_downloader(fs_url + 'chosen_dem_RGI70G_20251029.csv') + dfs = pd.read_csv(fs, index_col=0) + rgidf['dem_source'] = dfs.loc[rgidf['rgi_id'], 'dem_source'].values if rgi_version == '70C': - fs = utils.file_downloader(fs_url + 'chosen_dem_RGI70C_20251029.csv') - dfs = pd.read_csv(fs, index_col=0) - rgidf['dem_source'] = dfs.loc[rgidf['rgi_id'], 'dem_source'].values + with get_lock(): + fs = utils.file_downloader(fs_url + 'chosen_dem_RGI70C_20251029.csv') + dfs = pd.read_csv(fs, index_col=0) + rgidf['dem_sourc`e'] = dfs.loc[rgidf['rgi_id'], 'dem_source'].values # L0 - go if start_level == 0: diff --git a/oggm/core/centerlines.py b/oggm/core/centerlines.py index 4c5d038bc..48a7c682f 100644 --- a/oggm/core/centerlines.py +++ b/oggm/core/centerlines.py @@ -905,7 +905,7 @@ def compute_centerlines(gdir, heads=None): raise InvalidParamsError('`force_one_flowline` is deprecated') # open - geom = gdir.read_pickle('geometries') + geom = gdir.read_store('geometries') grids_file = gdir.get_filepath('gridded_data') with utils.ncDataset(grids_file) as nc: # Variables @@ -974,7 +974,7 @@ def compute_centerlines(gdir, heads=None): 'found!'.format(gdir.rgi_id)) # Write the data - gdir.write_pickle(cls, 'centerlines') + gdir.write_store(cls, 'centerlines') if is_first_call: # For diagnostics of filtered centerlines @@ -1009,7 +1009,7 @@ def compute_downstream_line(gdir): # Look for the starting points try: # Normal OGGM flowlines - p = gdir.read_pickle('centerlines')[-1].tail + p = gdir.read_store('centerlines')[-1].tail head = (int(p.y), int(p.x)) except FileNotFoundError: # Squeezes lines @@ -1050,7 +1050,7 @@ def compute_downstream_line(gdir): if line is None: raise GeometryError('Downstream line not found') - cl = gdir.read_pickle('inversion_flowlines')[-1] + cl = gdir.read_store('inversion_flowlines')[-1] if cl.line is not None: # normal OGGM lines lline, dline = _line_extend(cl.line, line, cl.dx) @@ -1060,7 +1060,7 @@ def compute_downstream_line(gdir): _, dline = _line_extend(shpg.LineString(), line, cl.dx) out = dict(full_line=None, downstream_line=dline) - gdir.write_pickle(out, 'downstream_line') + gdir.write_store(out, 'downstream_line') def _approx_parabola(x, y, y0=0): @@ -1282,8 +1282,8 @@ def compute_downstream_bedshape(gdir): return # We make a flowline out of the downstream for simplicity - tpl = gdir.read_pickle('inversion_flowlines')[-1] - cl = gdir.read_pickle('downstream_line')['downstream_line'] + tpl = gdir.read_store('inversion_flowlines')[-1] + cl = gdir.read_store('downstream_line')['downstream_line'] cl = Centerline(cl, dx=tpl.dx, map_dx=gdir.grid.dx) # Topography @@ -1316,11 +1316,13 @@ def compute_downstream_bedshape(gdir): assert np.all(w0s >= w0_min), 'np.all(w0s >= w0_min)' # write output - out = gdir.read_pickle('downstream_line') + out = gdir.read_store('downstream_line') out['bedshapes'] = bs out['surface_h'] = hgts out['w0s'] = w0s - gdir.write_pickle(out, 'downstream_line') + from oggm.utils.geozarr import convert_pickles_to_datatree + convert_pickles_to_datatree({'downstream_line': gdir.read_store('downstream_line')}) + gdir.write_store(out, 'downstream_line') def _mask_to_polygon(mask, gdir=None): @@ -1562,8 +1564,8 @@ def catchment_area(gdir): """ # Variables - cls = gdir.read_pickle('centerlines') - geom = gdir.read_pickle('geometries') + cls = gdir.read_store('centerlines') + geom = gdir.read_store('geometries') glacier_pix = geom['polygon_pix'] fpath = gdir.get_filepath('gridded_data') with utils.ncDataset(fpath) as nc: @@ -1576,7 +1578,7 @@ def catchment_area(gdir): if len(cls) == 1: cl_catchments = [np.array(np.nonzero(glacier_mask == 1)).T] geom['catchment_indices'] = cl_catchments - gdir.write_pickle(geom, 'geometries') + gdir.write_store(geom, 'geometries', use_pickle=True) return # Cost array @@ -1659,7 +1661,7 @@ def catchment_area(gdir): # Write the data geom['catchment_indices'] = cl_catchments - gdir.write_pickle(geom, 'geometries') + gdir.write_store(geom, 'geometries', use_pickle=True) @entity_task(log, writes=['flowline_catchments', 'catchments_intersects']) @@ -1676,7 +1678,7 @@ def catchment_intersections(gdir): where to write the data """ - catchment_indices = gdir.read_pickle('geometries')['catchment_indices'] + catchment_indices = gdir.read_store('geometries')['catchment_indices'] # Loop over the lines mask = np.zeros((gdir.grid.ny, gdir.grid.nx)) @@ -1722,7 +1724,7 @@ def initialize_flowlines(gdir): """ # variables - cls = gdir.read_pickle('centerlines') + cls = gdir.read_store('centerlines') # Initialise the flowlines dx = cfg.PARAMS['flowline_dx'] @@ -1811,7 +1813,7 @@ def initialize_flowlines(gdir): fl.set_flows_to(fls[cls.index(cl.flows_to)]) # Write the data - gdir.write_pickle(fls, 'inversion_flowlines') + gdir.write_store(fls, 'inversion_flowlines') gdir.add_to_diagnostics('flowline_type', 'centerlines') if do_filter: out = diag_n_bad_slopes/diag_n_pix @@ -1831,8 +1833,8 @@ def catchment_width_geom(gdir): """ # variables - flowlines = gdir.read_pickle('inversion_flowlines') - catchment_indices = gdir.read_pickle('geometries')['catchment_indices'] + flowlines = gdir.read_store('inversion_flowlines') + catchment_indices = gdir.read_store('geometries')['catchment_indices'] # Topography is to filter the unrealistic lines afterwards. # I take the non-smoothed topography @@ -1928,8 +1930,8 @@ def catchment_width_geom(gdir): fl.geometrical_widths = wlines fl.is_rectangular = is_rectangular - # Overwrite pickle - gdir.write_pickle(flowlines, 'inversion_flowlines') + # Overwrite + gdir.write_store(flowlines, 'inversion_flowlines') @entity_task(log, writes=['inversion_flowlines']) @@ -1949,8 +1951,8 @@ def catchment_width_correction(gdir): """ # variables - fls = gdir.read_pickle('inversion_flowlines') - catchment_indices = gdir.read_pickle('geometries')['catchment_indices'] + fls = gdir.read_store('inversion_flowlines') + catchment_indices = gdir.read_store('geometries')['catchment_indices'] # Topography for altitude-area distribution # I take the non-smoothed topography and remove the borders @@ -2077,8 +2079,8 @@ def catchment_width_correction(gdir): for fl in fls: fl.widths *= fac - # Overwrite centerlines - gdir.write_pickle(fls, 'inversion_flowlines') + # Overwrite + gdir.write_store(fls, 'inversion_flowlines') @entity_task(log, writes=['inversion_flowlines']) @@ -2099,7 +2101,7 @@ def terminus_width_correction(gdir, new_width=None): """ # variables - fls = gdir.read_pickle('inversion_flowlines') + fls = gdir.read_store('inversion_flowlines') fl = fls[-1] mapdx = gdir.grid.dx @@ -2120,8 +2122,8 @@ def terminus_width_correction(gdir, new_width=None): width[:-5] = width[:-5] * cor_factor fl.widths = width - # Overwrite centerlines - gdir.write_pickle(fls, 'inversion_flowlines') + # Overwrite + gdir.write_store(fls, 'inversion_flowlines') def intersect_downstream_lines(gdir, candidates=None): @@ -2152,7 +2154,7 @@ def intersect_downstream_lines(gdir, candidates=None): buffer = cfg.PARAMS['kbuffer'] # get main glacier downstream line and CRS - dline = gdir.read_pickle('downstream_line')['full_line'] + dline = gdir.read_store('downstream_line')['full_line'] crs = gdir.grid # return list @@ -2165,7 +2167,7 @@ def intersect_downstream_lines(gdir, candidates=None): continue # get tributary glacier downstream line and CRS - _dline = trib.read_pickle('downstream_line')['full_line'] + _dline = trib.read_store('downstream_line')['full_line'] _crs = trib.grid # use salem to transform the grids @@ -2449,5 +2451,5 @@ def fixed_dx_elevation_band_flowline(gdir, bin_variables=None, fl.is_rectangular[-5:] = True fl.is_trapezoid[-5:] = False - gdir.write_pickle([fl], 'inversion_flowlines') + gdir.write_store([fl], 'inversion_flowlines') gdir.add_to_diagnostics('flowline_type', 'elevation_band') diff --git a/oggm/core/dynamic_spinup.py b/oggm/core/dynamic_spinup.py index e4c8fd68c..9fd5407aa 100644 --- a/oggm/core/dynamic_spinup.py +++ b/oggm/core/dynamic_spinup.py @@ -3,6 +3,7 @@ import logging import copy import os +import shutil import warnings # External libs @@ -259,7 +260,7 @@ def run_dynamic_spinup(gdir, init_model_filesuffix=None, init_model_yr=None, init_model_fls = fmod.fls if init_model_fls is None: - fls_spinup = gdir.read_pickle('model_flowlines', + fls_spinup = gdir.read_store('model_flowlines', filesuffix=model_flowline_filesuffix) else: fls_spinup = copy.deepcopy(init_model_fls) @@ -1118,7 +1119,7 @@ def dynamic_melt_f_run_with_dynamic_spinup( # ATTENTION: it is assumed that the flowlines in gdir have the volume # we want to match during calibrate_inversion_from_consensus when we # set_local_variables - fls_ref = gdir.read_pickle('model_flowlines') + fls_ref = gdir.read_store('model_flowlines') local_variables['vol_m3_ref'] = np.sum([f.volume_m3 for f in fls_ref]) # we are done with preparing the local_variables for the upcoming iterations @@ -1150,7 +1151,7 @@ def dynamic_melt_f_run_with_dynamic_spinup( np.all(getattr(fl_prov, 'bed_h') == getattr(fl_orig, 'bed_h')) for fl_prov, fl_orig in - zip(fls_init, gdir.read_pickle('model_flowlines'))]): + zip(fls_init, gdir.read_store('model_flowlines'))]): raise InvalidWorkflowError('If you want to perform a dynamic ' 'melt_f calibration including an ' 'inversion, it is not possible to ' @@ -1400,6 +1401,16 @@ def dynamic_melt_f_run_with_dynamic_spinup_fallback( 'model_flowlines_dyn_melt_f_calib.pkl')): os.remove(os.path.join(gdir.dir, 'model_flowlines_dyn_melt_f_calib.pkl')) + zarr_fp = gdir.get_filepath("data_store").replace(".pkl", ".zarr") + zarr_group = os.path.join(zarr_fp, "model_flowlines__dyn_melt_f_calib") + if os.path.exists(zarr_group): + shutil.rmtree(zarr_group) + try: + import zarr as _zarr + + _zarr.consolidate_metadata(zarr_fp) + except Exception: + pass if target_yr is None: target_yr = gdir.rgi_date + 1 # + 1 converted to hydro years @@ -1869,7 +1880,7 @@ def run_dynamic_melt_f_calibration( init_model_fls = fmod.fls if init_model_fls is None: - fls_init = gdir.read_pickle('model_flowlines') + fls_init = gdir.read_store('model_flowlines') else: fls_init = copy.deepcopy(init_model_fls) diff --git a/oggm/core/flowline.py b/oggm/core/flowline.py index 7ae79f2ba..b7dda13c9 100644 --- a/oggm/core/flowline.py +++ b/oggm/core/flowline.py @@ -52,7 +52,7 @@ class Flowline(Centerline): def __init__(self, line=None, dx=1, map_dx=None, surface_h=None, bed_h=None, rgi_id=None, - water_level=None, gdir=None): + water_level=None, gdir=None, grid=None): """ Initialize a Flowline Parameters @@ -71,6 +71,7 @@ def __init__(self, line=None, dx=1, map_dx=None, The glacier's RGI identifier water_level : float The water level (to compute volume below sea-level) + grid : :py:class:`salem.Grid` """ # This is do add flexibility for testing @@ -79,6 +80,8 @@ def __init__(self, line=None, dx=1, map_dx=None, if line is None: coords = np.arange(len(surface_h)) * dx line = shpg.LineString(np.vstack([coords, coords * 0.]).T) + if gdir is not None: + grid = gdir.grid super(Flowline, self).__init__(line, dx, surface_h) @@ -91,8 +94,8 @@ def __init__(self, line=None, dx=1, map_dx=None, self._point_lons = None self._point_lats = None self.map_trafo = None - if gdir is not None: - self.map_trafo = partial(gdir.grid.ij_to_crs, crs=salem.wgs84) + if grid is not None: + self.map_trafo = partial(grid.ij_to_crs, crs=salem.wgs84) # volume not yet removed from the flowline self.calving_bucket_m3 = 0 @@ -458,7 +461,7 @@ class MixedBedFlowline(Flowline): def __init__(self, *, line=None, dx=None, map_dx=None, surface_h=None, bed_h=None, section=None, bed_shape=None, is_trapezoid=None, lambdas=None, widths_m=None, rgi_id=None, - water_level=None, gdir=None): + water_level=None, gdir=None, grid=None): """ Instantiate. Parameters @@ -472,7 +475,7 @@ def __init__(self, *, line=None, dx=None, map_dx=None, surface_h=None, bed_h=bed_h.copy(), rgi_id=rgi_id, water_level=water_level, - gdir=gdir) + gdir=gdir, grid=grid) # To speedup calculations if no trapezoid bed is present self._do_trapeze = np.any(is_trapezoid) @@ -3208,11 +3211,11 @@ def init_present_time_glacier(gdir, filesuffix='', """ # Some vars - invs = gdir.read_pickle('inversion_output') + invs = gdir.read_store('inversion_output') map_dx = gdir.grid.dx def_lambda = cfg.PARAMS['trapezoid_lambdas'] - cls = gdir.read_pickle('inversion_flowlines') + cls = gdir.read_store('inversion_flowlines') # Fill the tributaries new_fls = [] @@ -3272,7 +3275,7 @@ def init_present_time_glacier(gdir, filesuffix='', if not gdir.is_tidewater and inv['is_last']: # for valley glaciers, simply add the downstream line, depending on # selected shape parabola or trapezoidal - dic_ds = gdir.read_pickle('downstream_line') + dic_ds = gdir.read_store('downstream_line') if cfg.PARAMS['downstream_line_shape'] == 'parabola': bed_shape = np.append(bed_shape, dic_ds['bedshapes']) lambdas = np.append(lambdas, dic_ds['bedshapes'] * np.nan) @@ -3336,7 +3339,7 @@ def init_present_time_glacier(gdir, filesuffix='', fl.order = line_order(fl) # Write the data - gdir.write_pickle(new_fls, 'model_flowlines', filesuffix=filesuffix) + gdir.write_store(new_fls, 'model_flowlines', filesuffix=filesuffix) def decide_evolution_model(evolution_model=None): @@ -3500,7 +3503,7 @@ def flowline_model_run(gdir, output_filesuffix=None, mb_model=None, delete=True) if init_model_fls is None: - fls = gdir.read_pickle('model_flowlines') + fls = gdir.read_store('model_flowlines') else: fls = copy.deepcopy(init_model_fls) if zero_initial_glacier: @@ -4516,13 +4519,13 @@ def merge_to_one_glacier(main, tribs, filename='climate_historical', """ # read flowlines of the Main glacier - fls = main.read_pickle('model_flowlines') + fls = main.read_store('model_flowlines') mfl = fls.pop(-1) # remove main line from list and treat separately for trib in tribs: # read tributary flowlines and append to list - tfls = trib.read_pickle('model_flowlines') + tfls = trib.read_store('model_flowlines') # copy climate file and calib to new gdir # if we have a merge-merge situation we need to copy multiple files @@ -4589,7 +4592,7 @@ def merge_to_one_glacier(main, tribs, filename='climate_historical', fls = fls + [mfl] # Finally write the flowlines - main.write_pickle(fls, 'model_flowlines') + main.write_store(fls, 'model_flowlines') def clean_merged_flowlines(gdir, buffer=None): @@ -4615,7 +4618,7 @@ def clean_merged_flowlines(gdir, buffer=None): # Number of pixels to arbitrarily remove at junctions lid = int(cfg.PARAMS['flowline_junction_pix']) - fls = gdir.read_pickle('model_flowlines') + fls = gdir.read_store('model_flowlines') # separate the main main flowline mainfl = fls.pop(-1) @@ -4773,7 +4776,7 @@ def clean_merged_flowlines(gdir, buffer=None): assert allfls[-1] == mainfl # Finally write the flowlines - gdir.write_pickle(allfls, 'model_flowlines') + gdir.write_store(allfls, 'model_flowlines') @entity_task(log) diff --git a/oggm/core/gis.py b/oggm/core/gis.py index 4d1985da0..3f377f54d 100644 --- a/oggm/core/gis.py +++ b/oggm/core/gis.py @@ -981,7 +981,7 @@ def proj(x, y): geometries['polygon_hr'] = glacier_poly_hr geometries['polygon_pix'] = glacier_poly_pix geometries['polygon_area'] = geometry.area - gdir.write_pickle(geometries, 'geometries') + gdir.write_store(geometries, 'geometries', use_pickle=True) # write out the grids in the netcdf file with GriddedNcdfFile(gdir) as nc: @@ -1660,13 +1660,13 @@ def _fill_2d_like(data): # Hardest part - MB per catchment try: - cls = gdir.read_pickle('centerlines') + cls = gdir.read_store('centerlines') except FileNotFoundError: return # Make everything we need flat # Catchment areas - cis = gdir.read_pickle('geometries')['catchment_indices'] + cis = gdir.read_store('geometries')['catchment_indices'] for j, ci in enumerate(cis): catchment_mask_2d[tuple(ci.T)] = j @@ -1871,7 +1871,7 @@ def project(x, y): geometries['polygon_hr'] = glacier_poly_hr geometries['polygon_pix'] = glacier_poly_pix geometries['polygon_area'] = geometry.area - gdir.write_pickle(geometries, 'geometries') + gdir.write_store(geometries, 'geometries', use_pickle=True) @entity_task(log) diff --git a/oggm/core/inversion.py b/oggm/core/inversion.py index 6183f12cc..32ecf0bcd 100644 --- a/oggm/core/inversion.py +++ b/oggm/core/inversion.py @@ -68,7 +68,7 @@ def prepare_for_inversion(gdir, """ # variables - fls = gdir.read_pickle('inversion_flowlines') + fls = gdir.read_store('inversion_flowlines') towrite = [] for fl in fls: @@ -140,7 +140,7 @@ def prepare_for_inversion(gdir, towrite.append(cl_dic) # Write out - gdir.write_pickle(towrite, 'inversion_input') + gdir.write_store(towrite, 'inversion_input') def _inversion_poly(a3, a0): @@ -407,7 +407,7 @@ def mass_conservation_inversion(gdir, glen_a=None, fs=None, write=True, out_volume = 0. - cls = gdir.read_pickle('inversion_input') + cls = gdir.read_store('inversion_input') for cl in cls: # Clip slope to avoid negative and small slopes slope = cl['slope_angle'] @@ -495,7 +495,7 @@ def mass_conservation_inversion(gdir, glen_a=None, fs=None, write=True, out_volume += np.sum(volume) if write: - gdir.write_pickle(cls, 'inversion_output', filesuffix=filesuffix) + gdir.write_store(cls, 'inversion_output', filesuffix=filesuffix) gdir.add_to_diagnostics('inversion_glen_a', glen_a) gdir.add_to_diagnostics('inversion_fs', fs) @@ -529,7 +529,7 @@ def filter_inversion_output(gdir, n_smoothing=5, min_ice_thick=1., if gdir.is_tidewater: # No need for filter in tidewater case - cls = gdir.read_pickle('inversion_output') + cls = gdir.read_store('inversion_output') init_vol = np.sum([np.sum(cl['volume']) for cl in cls]) return init_vol @@ -539,8 +539,8 @@ def filter_inversion_output(gdir, n_smoothing=5, min_ice_thick=1., 'compute_dowstream_line and ' 'compute_downstream_bedshape tasks') - dic_ds = gdir.read_pickle('downstream_line') - cls = gdir.read_pickle('inversion_output') + dic_ds = gdir.read_store('downstream_line') + cls = gdir.read_store('inversion_output') cl = cls[-1] # check that their are enough grid points for smoothing @@ -616,7 +616,7 @@ def filter_inversion_output(gdir, n_smoothing=5, min_ice_thick=1., cl['is_trapezoid'][n_smoothing:] = new_is_trapezoid cl['volume'][n_smoothing:] = new_volume - gdir.write_pickle(cls, 'inversion_output') + gdir.write_store(cls, 'inversion_output') # Return volume for convenience return np.sum([np.sum(cl['volume']) for cl in cls]) @@ -625,7 +625,7 @@ def filter_inversion_output(gdir, n_smoothing=5, min_ice_thick=1., @entity_task(log) def get_inversion_volume(gdir): """Small utility task to get to the volume od all glaciers.""" - cls = gdir.read_pickle('inversion_output') + cls = gdir.read_store('inversion_output') return np.sum([np.sum(cl['volume']) for cl in cls]) @@ -675,7 +675,7 @@ def compute_inversion_velocities(gdir, glen_a=None, fs=None, filesuffix='', with_sliding = fs != 0 # Getting the data for the main flowline - cls = gdir.read_pickle('inversion_output') + cls = gdir.read_store('inversion_output') for cl in cls: # vol in m3 and dx in m @@ -722,7 +722,7 @@ def compute_inversion_velocities(gdir, glen_a=None, fs=None, filesuffix='', cl['u_basal'] = u_basal cl['u_deformation'] = u_deformation - gdir.write_pickle(cls, 'inversion_output', filesuffix=filesuffix) + gdir.write_store(cls, 'inversion_output', filesuffix=filesuffix) @entity_task(log, writes=['gridded_data']) @@ -774,8 +774,8 @@ def distribute_thickness_per_altitude(gdir, add_slope=True, slope_factor = 1. # Along the lines - cls = gdir.read_pickle('inversion_output') - fls = gdir.read_pickle('inversion_flowlines') + cls = gdir.read_store('inversion_output') + fls = gdir.read_store('inversion_flowlines') hs, ts, vs, xs, ys = [], [], [], [], [] for cl, fl in zip(cls, fls): hs = np.append(hs, fl.surface_h) @@ -905,8 +905,8 @@ def distribute_thickness_interp(gdir, add_slope=True, smooth_radius=None, thick[:, -1] = 0. # Along the lines - cls = gdir.read_pickle('inversion_output') - fls = gdir.read_pickle('inversion_flowlines') + cls = gdir.read_store('inversion_output') + fls = gdir.read_store('inversion_flowlines') vs = [] for cl, fl in zip(cls, fls): vs.extend(cl['volume']) @@ -998,7 +998,7 @@ def calving_flux_from_depth(gdir, k=None, water_level=None, water_depth=None, k = cfg.PARAMS['inversion_calving_k'] # Read necessary data - fl = gdir.read_pickle('inversion_flowlines')[-1] + fl = gdir.read_store('inversion_flowlines')[-1] # Altitude at the terminus and frontal width free_board = utils.clip_min(fl.surface_h[-1], 0) - water_level @@ -1006,7 +1006,7 @@ def calving_flux_from_depth(gdir, k=None, water_level=None, water_depth=None, # Calving formula if thick is None: - cl = gdir.read_pickle('inversion_output')[-1] + cl = gdir.read_store('inversion_output')[-1] thick = cl['thick'][-1] if water_depth is None: water_depth = thick - free_board @@ -1077,7 +1077,7 @@ def find_inversion_calving_from_any_mb(gdir, mb_model=None, mb_years=None, gdir.add_to_diagnostics('volume_before_calving', v_ref) # Get the relevant variables - cls = gdir.read_pickle('inversion_input')[-1] + cls = gdir.read_store('inversion_input')[-1] slope = cls['slope_angle'][-1] width = cls['width'][-1] @@ -1158,7 +1158,7 @@ def to_minimize(h): out = calving_flux_from_depth(gdir, water_level=water_level) - fl = gdir.read_pickle('inversion_flowlines')[-1] + fl = gdir.read_store('inversion_flowlines')[-1] f_calving = (fl.flux[-1] * (gdir.grid.dx ** 2) * 1e-9 / cfg.PARAMS['ice_density']) diff --git a/oggm/core/massbalance.py b/oggm/core/massbalance.py index 8f1ef9168..fa55fe0b4 100644 --- a/oggm/core/massbalance.py +++ b/oggm/core/massbalance.py @@ -757,7 +757,7 @@ def __init__(self, gdir, mb_model_class=MonthlyTIModel, # This is a quick'n dirty optimisation try: - fls = gdir.read_pickle('model_flowlines') + fls = gdir.read_store('model_flowlines') h = [] for fl in fls: # We use bed because of overdeepenings @@ -1213,11 +1213,11 @@ def __init__(self, gdir, fls=None, mb_model_class=MonthlyTIModel, # Read in the flowlines if use_inversion_flowlines: - fls = gdir.read_pickle('inversion_flowlines') + fls = gdir.read_store('inversion_flowlines') if fls is None: try: - fls = gdir.read_pickle('model_flowlines') + fls = gdir.read_store('model_flowlines') except FileNotFoundError: raise InvalidWorkflowError('Need a valid `model_flowlines` ' 'file. If you explicitly want to ' @@ -1645,7 +1645,7 @@ def mb_calibration_to_rmsd(gdir, *, temp_bias_max = cfg.PARAMS['temp_bias_max'] if not use_2d_mb: - fls = gdir.read_pickle('inversion_flowlines') + fls = gdir.read_store('inversion_flowlines') else: # if the 2D data is used, the flowline is not needed. fls = None @@ -2115,7 +2115,7 @@ def mb_calibration_from_scalar_mb(gdir, *, 'at the same time.') if not use_2d_mb: - fls = gdir.read_pickle('inversion_flowlines') + fls = gdir.read_store('inversion_flowlines') else: # if the 2D data is used, the flowline is not needed. fls = None @@ -2423,7 +2423,7 @@ def to_minimize(ela_h): # For each flowline compute the apparent MB rho = cfg.PARAMS['ice_density'] - fls = gdir.read_pickle('inversion_flowlines') + fls = gdir.read_store('inversion_flowlines') # Reset flux for fl in fls: fl.flux = np.zeros(len(fl.surface_h)) @@ -2435,9 +2435,9 @@ def to_minimize(ela_h): # Check and write _check_terminus_mass_flux(gdir, fls) - gdir.write_pickle(fls, 'inversion_flowlines') - gdir.write_pickle({'ela_h': ela_h, 'grad': mb_gradient}, - 'linear_mb_params') + gdir.write_store(fls, 'inversion_flowlines') + gdir.write_store({'ela_h': ela_h, 'grad': mb_gradient}, + 'linear_mb_params') @entity_task(log, writes=['inversion_flowlines']) @@ -2473,7 +2473,7 @@ def apparent_mb_from_any_mb(gdir, mb_model=None, is_calving = cmb != 0 # For each flowline compute the apparent MB - fls = gdir.read_pickle('inversion_flowlines') + fls = gdir.read_store('inversion_flowlines') if mb_model is None: mb_model = mb_model_class(gdir) @@ -2537,7 +2537,7 @@ def to_minimize(residual_to_opt): # Check and write _check_terminus_mass_flux(gdir, fls) gdir.add_to_diagnostics('apparent_mb_from_any_mb_residual', residual) - gdir.write_pickle(fls, 'inversion_flowlines') + gdir.write_store(fls, 'inversion_flowlines') @entity_task(log) diff --git a/oggm/graphics.py b/oggm/graphics.py index 9940d9296..7175e33af 100644 --- a/oggm/graphics.py +++ b/oggm/graphics.py @@ -271,7 +271,7 @@ def plot_raster(gdirs, var_name=None, cmap='viridis', ax=None, smap=None): crs = gdir.grid.center_grid try: - geom = gdir.read_pickle('geometries') + geom = gdir.read_store('geometries') # Plot boundaries poly_pix = geom['polygon_pix'] smap.set_geometry(poly_pix, crs=crs, fc='none', @@ -320,7 +320,7 @@ def plot_domain(gdirs, ax=None, smap=None, use_netcdf=False): crs = gdir.grid.center_grid try: - geom = gdir.read_pickle('geometries') + geom = gdir.read_store('geometries') # Plot boundaries poly_pix = geom['polygon_pix'] @@ -366,7 +366,7 @@ def plot_centerlines(gdirs, ax=None, smap=None, use_flowlines=False, smap.set_data(topo) for gdir in gdirs: crs = gdir.grid.center_grid - geom = gdir.read_pickle('geometries') + geom = gdir.read_store('geometries') # Plot boundaries poly_pix = geom['polygon_pix'] @@ -379,7 +379,7 @@ def plot_centerlines(gdirs, ax=None, smap=None, use_flowlines=False, smap.set_geometry(l, crs=crs, color='black', linewidth=0.5) # plot Centerlines - cls = gdir.read_pickle(filename) + cls = gdir.read_store(filename) # Go in reverse order for red always being the longest cls = cls[::-1] @@ -387,7 +387,7 @@ def plot_centerlines(gdirs, ax=None, smap=None, use_flowlines=False, color = gencolor(len(cls) + 1, cmap=lines_cmap) for i, (l, c) in enumerate(zip(cls, color)): if add_downstream and not gdir.is_tidewater and l is cls[0]: - line = gdir.read_pickle('downstream_line')['full_line'] + line = gdir.read_store('downstream_line')['full_line'] else: line = l.line @@ -428,7 +428,7 @@ def plot_catchment_areas(gdirs, ax=None, smap=None, lines_cmap='Set1', smap.set_topography(topo) crs = gdir.grid.center_grid - geom = gdir.read_pickle('geometries') + geom = gdir.read_store('geometries') # Plot boundaries poly_pix = geom['polygon_pix'] @@ -438,14 +438,14 @@ def plot_catchment_areas(gdirs, ax=None, smap=None, lines_cmap='Set1', smap.set_geometry(l, crs=crs, color='black', linewidth=0.5) # plot Centerlines - cls = gdir.read_pickle('centerlines')[::-1] + cls = gdir.read_store('centerlines')[::-1] color = gencolor(len(cls) + 1, cmap=lines_cmap) for l, c in zip(cls, color): smap.set_geometry(l.line, crs=crs, color=c, linewidth=2.5, zorder=50) # catchment areas - cis = gdir.read_pickle('geometries')['catchment_indices'] + cis = gdir.read_store('geometries')['catchment_indices'] for j, ci in enumerate(cis[::-1]): mask[tuple(ci.T)] = j+1 @@ -483,7 +483,7 @@ def plot_catchment_width(gdirs, ax=None, smap=None, corrected=False, for gdir in gdirs: crs = gdir.grid.center_grid - geom = gdir.read_pickle('geometries') + geom = gdir.read_store('geometries') # Plot boundaries poly_pix = geom['polygon_pix'] @@ -498,7 +498,7 @@ def plot_catchment_width(gdirs, ax=None, smap=None, corrected=False, smap.set_shapefile(gdf, color='k', linewidth=3.5, zorder=3) # plot Centerlines - cls = gdir.read_pickle('inversion_flowlines')[::-1] + cls = gdir.read_store('inversion_flowlines')[::-1] color = gencolor(len(cls) + 1, cmap=lines_cmap) for l, c in zip(cls, color): smap.set_geometry(l.line, crs=crs, color=c, @@ -564,8 +564,8 @@ def plot_inversion(gdirs, ax=None, smap=None, linewidth=3, vmax=None, vol = [] for gdir in gdirs: crs = gdir.grid.center_grid - geom = gdir.read_pickle('geometries') - inv = gdir.read_pickle('inversion_output') + geom = gdir.read_store('geometries') + inv = gdir.read_store('inversion_output') # Plot boundaries poly_pix = geom['polygon_pix'] smap.set_geometry(poly_pix, crs=crs, fc='none', zorder=2, @@ -574,7 +574,7 @@ def plot_inversion(gdirs, ax=None, smap=None, linewidth=3, vmax=None, smap.set_geometry(l, crs=crs, color='black', linewidth=0.5) # Plot Centerlines - cls = gdir.read_pickle('inversion_flowlines') + cls = gdir.read_store('inversion_flowlines') for l, c in zip(cls, inv): smap.set_geometry(l.line, crs=crs, color='gray', @@ -641,7 +641,7 @@ def plot_distributed_thickness(gdirs, ax=None, smap=None, varname_suffix=''): # Try to read geometries.pkl as the glacier boundary, # if it can't be found, we use the shapefile to instead. try: - geom = gdir.read_pickle('geometries') + geom = gdir.read_store('geometries') poly_pix = geom['polygon_pix'] smap.set_geometry(poly_pix, crs=crs, fc='none', zorder=2, linewidth=.2) for l in poly_pix.interiors: @@ -718,7 +718,7 @@ def plot_modeloutput_map(gdirs, ax=None, smap=None, model=None, modelyr = models[0].yr for gdir, model in zip(gdirs, models): - geom = gdir.read_pickle('geometries') + geom = gdir.read_store('geometries') poly_pix = geom['polygon_pix'] crs = gdir.grid.center_grid diff --git a/oggm/sandbox/distribute_2d.py b/oggm/sandbox/distribute_2d.py index 84a3c3ef2..dd00f86f7 100644 --- a/oggm/sandbox/distribute_2d.py +++ b/oggm/sandbox/distribute_2d.py @@ -146,7 +146,7 @@ def assign_points_to_band(gdir, topo_variable='glacier_topo_smoothed', weighting_per_pixel = topo_data * 0. # container # For the flowline we need the model flowlines only - fls = gdir.read_pickle('model_flowlines') + fls = gdir.read_store('model_flowlines') assert len(fls) == 1, 'Only works with one flowline.' fl = fls[0] diff --git a/oggm/sandbox/pygem_compat.py b/oggm/sandbox/pygem_compat.py index f19d4cefa..43873b48e 100644 --- a/oggm/sandbox/pygem_compat.py +++ b/oggm/sandbox/pygem_compat.py @@ -117,5 +117,5 @@ def present_time_glacier_from_bins(gdir, data=None, bed_shape=bed_shape, rgi_id=gdir.rgi_id) # Write the data - gdir.write_pickle([nfl], 'model_flowlines') + gdir.write_store([nfl], 'model_flowlines') return [nfl] diff --git a/oggm/tests/conftest.py b/oggm/tests/conftest.py index aa7251ee5..9b1231b62 100644 --- a/oggm/tests/conftest.py +++ b/oggm/tests/conftest.py @@ -102,6 +102,34 @@ def test_wrapper(*args, **kwargs): if not HAS_MPL_FOR_TESTS: item.add_marker(graphic_marker) +def _full_zarr_cleanup(): + """Shut down Zarr's background asyncio threads completely. + + This introduces a five second timeout *only* to tests that call + `restore_oggm_cfg()`, but a slow test is better than a failing one. + + .. note: Workaround since Zarr v3 doesn't seem to wait properly for + iothreads to stop, which can cause a deadlock. Works by stopping + executor before event loop is closed before cleanup, then explicitly + joining the queue. + """ + try: + import zarr.core.sync as zs + iothread = zs.iothread[0] # save ref before cleanup clears it + loop = zs.loop[0] + if loop is not None and not loop.is_closed(): + exc = getattr(loop, '_default_executor', None) + if exc is not None: + # to stop all worker threads cleanly + exc.shutdown(wait=True) + loop._default_executor = None + zs.cleanup_resources() + # ensure thread is truly dead before any subsequent fork + if iothread is not None and iothread.is_alive(): + iothread.join(timeout=5.0) + except Exception: + pass + @pytest.fixture(scope='function', autouse=True) def restore_oggm_cfg(): @@ -110,6 +138,7 @@ def restore_oggm_cfg(): reset_multiprocessing() yield cfg.unpack_config(old_cfg) + _full_zarr_cleanup() @pytest.fixture(scope='class', autouse=True) diff --git a/oggm/tests/test_benchmarks.py b/oggm/tests/test_benchmarks.py index f3151e5e3..16ba8ea75 100644 --- a/oggm/tests/test_benchmarks.py +++ b/oggm/tests/test_benchmarks.py @@ -520,7 +520,7 @@ def test_set_width(self): topo = nc.variables['topo_smoothed'][:] rhgt = topo[np.where(mask)][:] - fls = gdir.read_pickle('inversion_flowlines') + fls = gdir.read_store('inversion_flowlines') hgt, widths = gdir.get_inversion_flowline_hw() bs = 100 @@ -537,7 +537,7 @@ def test_set_width(self): centerlines.terminus_width_correction(gdir, new_width=714) - fls = gdir.read_pickle('inversion_flowlines') + fls = gdir.read_store('inversion_flowlines') hgt, widths = gdir.get_inversion_flowline_hw() # Check that the width is ok diff --git a/oggm/tests/test_geozarr.py b/oggm/tests/test_geozarr.py new file mode 100644 index 000000000..ab246c383 --- /dev/null +++ b/oggm/tests/test_geozarr.py @@ -0,0 +1,494 @@ +import os +import pickle +from functools import partial + +import pytest +import pyproj +import shapely.geometry as shpg +import numpy as np +import xarray as xr +from pathlib import Path +from numpy.testing import assert_allclose +import matplotlib.pyplot as plt +from oggm import Centerline + +salem = pytest.importorskip("salem") +gpd = pytest.importorskip("geopandas") + +# Locals +import oggm.cfg as cfg +from oggm.tests.funcs import get_test_dir +import oggm.utils.geozarr as oggmzarr + +# Globals +pytestmark = pytest.mark.test_env("workflow") +_TEST_DIR = os.path.join(get_test_dir(), "tmp_workflow") +CLI_LOGF = os.path.join(_TEST_DIR, "clilog.pkl") + + +def _make_centerline(n=5): + """Create a minimal Centerline.""" + coords = np.arange(n, dtype=float) + line = shpg.LineString(np.vstack([coords, np.zeros(n)]).T) + surface_h = np.linspace(3000.0, 2000.0, n) + cl = Centerline( + line, + dx=1.0, + surface_h=surface_h, + orig_head=shpg.Point(0, 0), + rgi_id="RGI60-11.00897", + map_dx=100.0, + ) + cl.order = 1 + cl._widths = np.ones(n) + cl.is_rectangular = np.zeros(n, dtype=bool) + cl.is_trapezoid = np.zeros(n, dtype=bool) + cl.apparent_mb = np.zeros(n) + cl.flux = np.zeros(n) + cl.flux_out = 0.0 + return cl + + +def _make_mixed_bed_flowline(n=5): + """Create a minimal MixedBedFlowline.""" + from oggm.core.flowline import MixedBedFlowline + + coords = np.arange(n, dtype=float) + line = shpg.LineString(np.vstack([coords, np.zeros(n)]).T) + surface_h = np.linspace(3000.0, 2000.0, n) + bed_h = surface_h # zero ice thickness + bed_shape = np.full(n, 3.0e-3) + is_trapezoid = np.zeros(n, dtype=bool) + lambdas = np.zeros(n) + section = np.zeros(n) + + return MixedBedFlowline( + line=line, + dx=1.0, + map_dx=100.0, + surface_h=surface_h, + bed_h=bed_h, + section=section, + bed_shape=bed_shape, + is_trapezoid=is_trapezoid, + lambdas=lambdas, + widths_m=np.zeros(n) + 10.0, + rgi_id="RGI60-11.00897", + ) + + +class TestZarrUtilities: + """Tests for any Zarr operations called via workflow or _workflow.""" + + # File operations + + def test_get_pickle_paths_returns_only_pkl(self, tmp_path): + for name in ("data.pkl", "other.pkl", "README.txt", "noextension"): + (tmp_path / name).touch() + + class _MockGDir: + dir = str(tmp_path) + + paths = oggmzarr.get_pickle_paths(_MockGDir().dir) + assert all(isinstance(p, Path) for p in paths) + assert all(str(p).endswith(".pkl") for p in paths) + assert len(paths) == 2 + + def test_get_pickle_paths_empty_dir(self, tmp_path): + class _MockGDir: + dir = str(tmp_path) + + assert not oggmzarr.get_pickle_paths(_MockGDir().dir) + + def test_get_pickle_data_reads_dict_pickle(self, tmp_path): + payload = {"array": np.array([1.0, 2.0]), "val": 42} + with open(tmp_path / "mydata.pkl", "wb") as f: + pickle.dump(payload, f) + + class _MockGDir: + dir = str(tmp_path) + + def read_pickle(self, stem): + with open(os.path.join(self.dir, stem + ".pkl"), "rb") as fh: + return pickle.load(fh) + + result = oggmzarr.get_pickle_data([Path("mydata.pkl")], _MockGDir()) + assert "mydata" in result + assert_allclose(result["mydata"]["array"], payload["array"]) + assert result["mydata"]["val"] == 42 + + def test_get_pickle_data_type_only(self, tmp_path): + payload = {"array": np.array([1.0, 2.0]), "val": 42} + with open(tmp_path / "mydata.pkl", "wb") as f: + pickle.dump(payload, f) + + class _MockGDir: + dir = str(tmp_path) + + def read_pickle(self, stem): + with open(os.path.join(self.dir, stem + ".pkl"), "rb") as fh: + return pickle.load(fh) + + result = oggmzarr.get_pickle_data( + [Path("mydata.pkl")], _MockGDir(), type_only=True + ) + assert result["mydata"]["array"] is np.ndarray + assert result["mydata"]["val"] is int + + def test_get_pickle_data_reads_list_of_dicts(self, tmp_path): + payload = [{"a": 1, "b": np.array([3.0])}, {"c": 2}] + with open(tmp_path / "listdata.pkl", "wb") as f: + pickle.dump(payload, f) + + class _MockGDir: + dir = str(tmp_path) + + def read_pickle(self, stem): + with open(os.path.join(self.dir, stem + ".pkl"), "rb") as fh: + return pickle.load(fh) + + result = oggmzarr.get_pickle_data([Path("listdata.pkl")], _MockGDir()) + assert "listdata" in result + # Each dict in the list is processed through get_tranche + assert isinstance(result["listdata"], list) + assert result["listdata"][0]["a"] == 1 + + def test_get_tranche_returns_values(self): + assert oggmzarr.get_tranche({}) == {} + data = {"a": 1, "b": "hello", "c": np.array([1, 2, 3])} + result = oggmzarr.get_tranche(data, type_only=False) + assert result["a"] == 1 + assert result["b"] == "hello" + assert_allclose(result["c"], data["c"]) + + def test_get_tranche_returns_types(self): + data = {"a": 1, "b": "hello", "c": np.array([1, 2, 3])} + result = oggmzarr.get_tranche(data, type_only=True) + assert result["a"] is int + assert result["b"] is str + assert result["c"] is np.ndarray + + def test_filter_arrays_from_dict(self): + assert oggmzarr.filter_arrays_from_dict({"x": 1, "y": "z"}) == {} + data = { + "array": np.array([1, 2, 3]), + "scalar": 42, + "string": "hello", + "lst": [1, 2], + } + result = oggmzarr.filter_arrays_from_dict(data) + assert set(result.keys()) == {"array"} + assert_allclose(result["array"], data["array"]) + + def test_filter_lists_from_dict(self): + assert ( + oggmzarr.filter_lists_from_dict({"x": 1, "y": np.array([1])}) == {} + ) + data = { + "arr": np.array([1, 2, 3]), + "scalar": 42, + "lst": [1, 2], + "tup": (3, 4), + } + result = oggmzarr.filter_lists_from_dict(data) + assert set(result.keys()) == {"lst"} + assert result["lst"] == [1, 2] + + # Downstream line + + def test_get_downstream_line_from_pkl_convert_linestring(self): + line = shpg.LineString([(0, 0), (1, 1), (2, 0)]) + data = {"downstream_line": line, "extra": 99} + result = oggmzarr.get_downstream_line_from_pkl(data) + assert isinstance(result["downstream_line"], xr.DataArray) + assert result["extra"] == 99 + expected = np.array(shpg.mapping(line)["coordinates"]) + assert_allclose(result["downstream_line"].values, expected) + + def test_get_downstream_line_from_pkl_skip_non_linestring(self): + array = np.array([1.0, 2.0, 3.0]) + data = {"downstream_line": array} + result = oggmzarr.get_downstream_line_from_pkl(data) + assert_allclose(result["downstream_line"], array) + + def test_get_downstream_line_from_pkl_errors(self): + with pytest.raises(TypeError): + oggmzarr.get_downstream_line_from_pkl("not a dict") + with pytest.raises(KeyError): + oggmzarr.get_downstream_line_from_pkl({"other_key": 42}) + + # Inversion flowlines + + def test_get_inversion_flowlines_extracts_expected_keys(self): + cl = _make_centerline() + result = oggmzarr.get_inversion_flowlines_from_pkl([cl]) + assert len(result) == 1 + data = result[0] + for key in ( + "line", + "dx", + "surface_h", + "orig_head", + "rgi_id", + "map_dx", + "order", + "_widths", + "is_rectangular", + "is_trapezoid", + "apparent_mb", + "flux", + "flux_out", + ): + assert key in data, f"Expected key '{key}' missing from result" + + def test_get_inversion_flowlines_correct_values(self): + assert oggmzarr.get_inversion_flowlines_from_pkl([]) == [] + cl = _make_centerline(n=7) + result = oggmzarr.get_inversion_flowlines_from_pkl([cl]) + data = result[0] + assert_allclose(data["surface_h"], cl.surface_h) + assert data["rgi_id"] == "RGI60-11.00897" + assert data["dx"] == 1.0 + assert data["map_dx"] == 100.0 + assert data["order"] == 1 + + def test_get_inversion_flowlines_raises_on_non_centerline(self): + with pytest.raises(TypeError): + oggmzarr.get_inversion_flowlines_from_pkl(["not a centerline"]) + + # Datacube operations + + def test_add_datacube_adds_group(self): + dt = xr.DataTree() + datacubes = {"var": xr.DataArray([1.0, 2.0])} + result = oggmzarr.add_datacube(dt, datacubes, "group_01") + assert "group_01" in result.children + + dt = xr.DataTree() + datacubes = {"var": xr.DataArray([1.0, 2.0])} + dt = oggmzarr.add_datacube(dt, datacubes, "group_01") + dt = oggmzarr.add_datacube(dt, datacubes, "group_01", overwrite=True) + assert "group_01" in dt.children + + dt = oggmzarr.add_datacube(dt, {"b": xr.DataArray([2.0])}, "group_02") + assert "group_01" in dt.children + assert "group_02" in dt.children + + def test_add_datacube_raises_on_non_dict_datacubes(self): + dt = xr.DataTree() + with pytest.raises(ValueError, match="dictionary"): + oggmzarr.add_datacube(dt, [1, 2, 3], "group_01") + + dt = xr.DataTree() + datacubes = {"var": xr.DataArray([1.0, 2.0])} + dt = oggmzarr.add_datacube(dt, datacubes, "group_01") + with pytest.raises(ValueError, match="already exists"): + oggmzarr.add_datacube(dt, datacubes, "group_01", overwrite=False) + + # Conversion + + def test_convert_pickles_to_datatree_downstream_line(self): + line = shpg.LineString([(0, 0), (1, 1), (2, 0)]) + pickle_data = {"downstream_line": {"downstream_line": line}} + result = oggmzarr.convert_pickles_to_datatree(pickle_data) + assert isinstance(result, xr.DataTree) + assert "downstream_line" in result.children + + def test_convert_pickles_to_datatree_inversion_flowlines(self): + cl = _make_centerline() + pickle_data = {"inversion_flowlines": [cl]} + result = oggmzarr.convert_pickles_to_datatree(pickle_data) + assert isinstance(result, xr.DataTree) + assert "inversion_flowlines" in result.children + + def test_convert_pickles_to_datatree_generic(self): + pickle_data = {"mydata": {"key": xr.DataArray([1.0, 2.0, 3.0])}} + result = oggmzarr.convert_pickles_to_datatree(pickle_data) + assert isinstance(result, xr.DataTree) + assert "mydata" in result.children + + pickle_data = {"unsupported": 42} + result = oggmzarr.convert_pickles_to_datatree(pickle_data) + assert isinstance(result, xr.DataTree) + assert "unsupported" not in result.children + + result = oggmzarr.convert_pickles_to_datatree({}) + assert isinstance(result, xr.DataTree) + assert len(result.children) == 0 + + def test_convert_linestring_to_dataarray(self): + line = shpg.LineString([(0, 0), (1, 1), (2, 0)]) + expected = np.array(shpg.mapping(line)["coordinates"]) + result = oggmzarr.convert_linestring_to_dataarray(line) + assert isinstance(result, xr.DataArray) + assert_allclose(result.values, expected) + assert set(result.dims) == {"x", "y"} + + def test_get_datatree_value(self): + ds = xr.Dataset({"surface_h": xr.DataArray([1.0, 2.0, 3.0])}) + dt = xr.DataTree(dataset=ds) + result = oggmzarr.get_datatree_value(dt, "surface_h") + assert_allclose(result, [1.0, 2.0, 3.0]) + + # returns None for missing attribute, empty child + dt = xr.DataTree() + result = oggmzarr.get_datatree_value(dt, "nonexistent") + assert result is None + dt = xr.DataTree() + dt["child"] = xr.DataTree() + result = oggmzarr.get_datatree_value(dt, "child") + assert result is None + + # get_dict_from_datatree + + def test_get_dict_from_datatree(self): + + # empties + dt = xr.DataTree() + result = oggmzarr.get_dict_from_datatree(dt) + assert result == {} + dt = xr.DataTree() + dt["child"] = xr.DataTree() + result = oggmzarr.get_dict_from_datatree(dt) + assert "child" in result + assert result["child"] is None + + array = xr.DataArray([1.0, 2.0, 3.0], dims=["x"]) + dt = xr.DataTree(dataset=xr.Dataset({"flux": array})) + result = oggmzarr.get_dict_from_datatree(dt) + assert "flux" in result + assert_allclose(result["flux"], [1.0, 2.0, 3.0]) + + array = xr.DataArray([10.0, 20.0], dims=["x"], coords={"x": [0, 1]}) + dt = xr.DataTree(dataset=xr.Dataset({"flux": array})) + result = oggmzarr.get_dict_from_datatree(dt) + assert "x" in result + assert_allclose(result["x"], [0, 1]) + + def test_restore_projection_converts_dict_to_proj(self): + crs = pyproj.CRS.from_epsg(32632) + dt = xr.DataTree() + dt.attrs["pyproj_srs"] = crs.to_json_dict() + assert isinstance(dt.attrs["pyproj_srs"], dict) + oggmzarr.restore_projection(dt) + assert isinstance(dt.attrs["pyproj_srs"], pyproj.Proj) + + dt = xr.DataTree() + dt.attrs["other"] = "value" + oggmzarr.restore_projection(dt) + assert "pyproj_srs" not in dt.attrs + + proj = pyproj.Proj("epsg:32632") + dt = xr.DataTree() + dt.attrs["pyproj_srs"] = proj + oggmzarr.restore_projection(dt) + assert dt.attrs["pyproj_srs"] == proj + + def test_get_grid_params_from_partial(self): + proj = pyproj.Proj("epsg:32632") + grid = salem.Grid( + proj=proj, + nxny=(10.0, 8.0), + dxdy=(200.0, 100.0), + x0y0=(500.0, 300.0), + pixel_ref="center", + ) + p = partial(grid.ij_to_crs, crs=salem.wgs84) + result = oggmzarr.get_grid_params_from_partial(p) + for key in ("pyproj_srs", "nxny", "dxdy", "x0y0", "pixel_ref"): + assert key in result, f"Expected key '{key}' missing" + + assert result["nxny"] == (10.0, 8.0) + assert result["dxdy"] == (200.0, 100.0) + assert result["x0y0"] == (500.0, 300.0) + assert result["pixel_ref"] == "center" + assert isinstance(result["pyproj_srs"], dict) + + def test_get_map_trafo_from_grid(self): + # Test with data tree + proj = pyproj.Proj("epsg:32632") + dt = xr.DataTree() + dt.attrs["pyproj_srs"] = proj + dt.attrs["nxny"] = (10.0, 8.0) + dt.attrs["dxdy"] = (200.0, 100.0) + dt.attrs["x0y0"] = (500.0, 300.0) + dt.attrs["pixel_ref"] = "center" + result = oggmzarr.get_map_trafo_from_grid(dt) + assert callable(result) + result = oggmzarr.get_grid_params_from_partial(result) + + assert result["nxny"] == (10.0, 8.0) + assert result["dxdy"] == (200.0, 100.0) + assert result["x0y0"] == (500.0, 300.0) + assert result["pixel_ref"] == "center" + assert isinstance(result["pyproj_srs"], dict) + + grid = salem.Grid( + proj=proj, + nxny=(10.0, 8.0), + dxdy=(200.0, 100.0), + x0y0=(500.0, 300.0), + pixel_ref="center", + ) + # reconstruction + p = partial(grid.ij_to_crs, crs=salem.wgs84) + params = oggmzarr.get_grid_params_from_partial(p) + + dt = xr.DataTree() + dt.attrs["pyproj_srs"] = proj + dt.attrs["nxny"] = params["nxny"] + dt.attrs["dxdy"] = params["dxdy"] + dt.attrs["x0y0"] = params["x0y0"] + dt.attrs["pixel_ref"] = params["pixel_ref"] + result = oggmzarr.get_map_trafo_from_grid(dt) + assert callable(result) + + def test_get_model_flowlines(self): + result = oggmzarr.get_model_flowlines_from_pkl([]) + assert not result and isinstance(result, list) + + fl = _make_mixed_bed_flowline() + result = oggmzarr.get_model_flowlines_from_pkl([fl]) + assert len(result) == 1 + data = result[0] + for key in ( + "line", + "dx", + "map_dx", + "surface_h", + "bed_h", + "section", + "bed_shape", + "is_trapezoid", + "lambdas", + "rgi_id", + ): + assert key in data, f"Expected key '{key}' missing" + assert isinstance(result[0]["line"], xr.DataArray) + + fl = _make_mixed_bed_flowline(n=7) + result = oggmzarr.get_model_flowlines_from_pkl([fl]) + data = result[0] + assert_allclose(data["surface_h"], fl.surface_h) + assert data["rgi_id"] == "RGI60-11.00897" + assert data["dx"] == 1.0 + assert data["map_dx"] == 100.0 + + cl = _make_centerline() + with pytest.raises(TypeError): + oggmzarr.get_model_flowlines_from_pkl([cl]) + + def test_get_pickle_data(self, tmp_path): + payload = 42 + with open(tmp_path / "scalar.pkl", "wb") as f: + pickle.dump(payload, f) + + class _MockGDir: + dir = str(tmp_path) + + def read_pickle(self, stem): + with open(os.path.join(self.dir, stem + ".pkl"), "rb") as fh: + return pickle.load(fh) + + result = oggmzarr.get_pickle_data([Path("scalar.pkl")], _MockGDir()) + assert "scalar" not in result diff --git a/oggm/tests/test_graphics.py b/oggm/tests/test_graphics.py index 029490fda..d992fc9c8 100644 --- a/oggm/tests/test_graphics.py +++ b/oggm/tests/test_graphics.py @@ -208,7 +208,7 @@ def test_modelsection(): gdir = init_hef() flowline.init_present_time_glacier(gdir) - fls = gdir.read_pickle('model_flowlines') + fls = gdir.read_store('model_flowlines') model = flowline.FlowlineModel(fls) fig = plt.figure(figsize=(12, 6)) @@ -223,7 +223,7 @@ def test_modelsection_withtrib(): gdir = init_hef() flowline.init_present_time_glacier(gdir) - fls = gdir.read_pickle('model_flowlines') + fls = gdir.read_store('model_flowlines') model = flowline.FlowlineModel(fls) fig = plt.figure(figsize=(14, 10)) @@ -237,7 +237,7 @@ def test_modeloutput_map(): gdir = init_hef() flowline.init_present_time_glacier(gdir) - fls = gdir.read_pickle('model_flowlines') + fls = gdir.read_store('model_flowlines') model = flowline.FlowlineModel(fls) fig, ax = plt.subplots() @@ -282,7 +282,7 @@ def test_multiple_models(): models = [] for gdir in gdirs: flowline.init_present_time_glacier(gdir) - fls = gdir.read_pickle('model_flowlines') + fls = gdir.read_store('model_flowlines') models.append(flowline.FlowlineModel(fls)) fig, ax = plt.subplots() @@ -335,7 +335,7 @@ def test_model_section_calving(): workflow.inversion_tasks(utils.tolist(gdir)) flowline.init_present_time_glacier(gdir) - fls = gdir.read_pickle('model_flowlines') + fls = gdir.read_store('model_flowlines') mb_mod = massbalance.LinearMassBalance(1600) model = flowline.FluxBasedModel(fls, mb_model=mb_mod, y0=0, inplace=True, @@ -394,7 +394,7 @@ def test_chhota_shigri(): models = [] for gdir in gdirs: flowline.init_present_time_glacier(gdir) - fls = gdir.read_pickle('model_flowlines') + fls = gdir.read_store('model_flowlines') models.append(flowline.FlowlineModel(fls)) fig, ax = plt.subplots() @@ -482,9 +482,9 @@ def test_coxe(): flowline.init_present_time_glacier(gdir) - fls = gdir.read_pickle('model_flowlines') + fls = gdir.read_store('model_flowlines') - p = gdir.read_pickle('linear_mb_params') + p = gdir.read_store('linear_mb_params') mb_mod = massbalance.LinearMassBalance(ela_h=p['ela_h'], grad=p['grad']) mb_mod.temp_bias = -0.3 diff --git a/oggm/tests/test_models.py b/oggm/tests/test_models.py index 636025595..e315cc319 100644 --- a/oggm/tests/test_models.py +++ b/oggm/tests/test_models.py @@ -82,8 +82,8 @@ def test_init_present_time_glacier(self, hef_gdir, downstream_line_shape): cfg.PARAMS['downstream_line_shape'] = downstream_line_shape init_present_time_glacier(gdir) - fls = gdir.read_pickle('model_flowlines') - inv = gdir.read_pickle('inversion_output') + fls = gdir.read_store('model_flowlines') + inv = gdir.read_store('inversion_output') assert gdir.rgi_date == 2003 assert len(fls) == 3 @@ -148,7 +148,7 @@ def test_init_present_time_glacier(self, hef_gdir, downstream_line_shape): # test if providing a filesuffix is working init_present_time_glacier(gdir, filesuffix='_test') - assert os.path.isfile(os.path.join(gdir.dir, 'model_flowlines_test.pkl')) + assert gdir.has_file('model_flowlines', filesuffix='_test') cfg.PARAMS['downstream_line_shape'] = 'free_shape' with pytest.raises(InvalidParamsError): @@ -174,7 +174,7 @@ def test_init_present_time_glacier_obs_thick(self, hef_elev_gdir, tasks.init_present_time_glacier(gdir, filesuffix='_consensus', use_binned_thickness_data=vn) - fl_consensus = gdir.read_pickle('model_flowlines', + fl_consensus = gdir.read_store('model_flowlines', filesuffix='_consensus')[0] # check that resulting flowline has the same volume as observation @@ -187,7 +187,7 @@ def test_init_present_time_glacier_obs_thick(self, hef_elev_gdir, # test that we can use fl in an dynamic model run without an error mb = LinearMassBalance(3000.) - model_ref = FluxBasedModel(gdir.read_pickle('model_flowlines'), + model_ref = FluxBasedModel(gdir.read_store('model_flowlines'), mb_model=mb) model_ref.run_until(100) model_consensus = FluxBasedModel([fl_consensus], mb_model=mb) @@ -203,7 +203,7 @@ def test_init_present_time_glacier_obs_thick(self, hef_elev_gdir, index_col=0) # Check consistency between csv and flowline object - fl_inv = gdir.read_pickle('inversion_flowlines')[0] + fl_inv = gdir.read_store('inversion_flowlines')[0] assert fl_inv.nx == len(df_fixed_dx) assert fl_inv.dx_meter == (df_fixed_dx.index[1] - df_fixed_dx.index[0]) np.testing.assert_allclose(fl_inv.dx_meter * np.arange(fl_inv.nx), df_fixed_dx.index) @@ -216,7 +216,7 @@ def test_init_present_time_glacier_obs_thick(self, hef_elev_gdir, tasks.init_present_time_glacier(gdir, filesuffix='_consensus_rect', use_binned_thickness_data=vn) - fl_consensus_rect = gdir.read_pickle('model_flowlines', + fl_consensus_rect = gdir.read_store('model_flowlines', filesuffix='_consensus_rect')[0] np.testing.assert_allclose(fl_consensus_rect.volume_m3, ref_vol_rect) @@ -229,7 +229,7 @@ def test_present_time_glacier_massbalance(self, hef_gdir): mb_mod = massbalance.MonthlyTIModel(gdir) - fls = gdir.read_pickle('model_flowlines') + fls = gdir.read_store('model_flowlines') glacier = FlowlineModel(fls) mbdf = gdir.get_ref_mb_data() @@ -350,20 +350,20 @@ def test_define_divides(self, class_case_dir): init_present_time_glacier(gdir) myarea = 0. - cls = gdir.read_pickle('inversion_flowlines') + cls = gdir.read_store('inversion_flowlines') for cl in cls: myarea += np.sum(cl.widths * cl.dx * gdir.grid.dx ** 2) np.testing.assert_allclose(myarea, gdir.rgi_area_m2, rtol=1e-2) myarea = 0. - cls = gdir.read_pickle('inversion_flowlines') + cls = gdir.read_store('inversion_flowlines') for cl in cls: myarea += np.sum(cl.widths * cl.dx * gdir.grid.dx ** 2) np.testing.assert_allclose(myarea, gdir.rgi_area_m2, rtol=1e-2) - fls = gdir.read_pickle('model_flowlines') + fls = gdir.read_store('model_flowlines') if cfg.PARAMS['grid_dx_method'] == 'square': assert len(fls) == 3 vol = 0. @@ -464,7 +464,7 @@ def test_past_mb_model(self, hef_gdir): assert_allclose(mb[50], mb[-50]) # Go for glacier wide now - fls = gdir.read_pickle('inversion_flowlines') + fls = gdir.read_store('inversion_flowlines') mb_gw_mod = massbalance.MultipleFlowlineMassBalance(gdir, fls=fls, repeat=True, ys=1901, ye=1950) @@ -567,7 +567,7 @@ def test_glacierwide_mb_model(self, hef_gdir, cl): gdir = hef_gdir init_present_time_glacier(gdir) - fls = gdir.read_pickle('model_flowlines') + fls = gdir.read_store('model_flowlines') h = np.array([]) w = np.array([]) for fl in fls: @@ -653,7 +653,7 @@ def test_get_annual_specific_mass_balance(self, hef_gdir, ref_year): gdir = hef_gdir init_present_time_glacier(gdir) - test_fls = gdir.read_pickle("model_flowlines") + test_fls = gdir.read_store("model_flowlines") assert len(test_fls) > 1 mb_mod = massbalance.MultipleFlowlineMassBalance( gdir, fls=test_fls, mb_model_class=massbalance.MonthlyTIModel @@ -665,7 +665,7 @@ def test_get_annual_specific_mass_balance(self, hef_gdir, ref_year): # Compare to old function flowline_models = mb_mod.flowline_mb_models - fls = gdir.read_pickle("model_flowlines") + fls = gdir.read_store("model_flowlines") year = ref_year mbs = [] widths = [] @@ -693,7 +693,7 @@ def test_get_specific_mb(self, hef_gdir): ye = 2019 years = np.arange(ys, ye + 1) - fls = gdir.read_pickle("inversion_flowlines") + fls = gdir.read_store("inversion_flowlines") mb_mod = massbalance.MultipleFlowlineMassBalance( gdir, fls=fls, @@ -731,7 +731,7 @@ def test_get_ela(self, hef_gdir): ye = 2019 years = np.arange(ys, ye + 1) - fls = gdir.read_pickle("inversion_flowlines") + fls = gdir.read_store("inversion_flowlines") mb_mod = massbalance.MultipleFlowlineMassBalance( gdir, fls=fls, @@ -2093,9 +2093,9 @@ def test_run_annual_step(self, class_case_dir): np.testing.assert_allclose(model.fls[0].section, fmodel.fls[0].section) - def test_gdir_copy(self, hef_gdir): + def test_gdir_copy(self, hef_gdir, tmpdir): - new_dir = os.path.join(get_test_dir(), 'tmp_testcopy') + new_dir = tmpdir.join('tmp_testcopy') if os.path.exists(new_dir): shutil.rmtree(new_dir) new_gdir = tasks.copy_to_basedir(hef_gdir, base_dir=new_dir, @@ -2127,7 +2127,7 @@ def test_hef(self, class_case_dir, hef_gdir): init_present_time_glacier(hef_gdir) - fls = hef_gdir.read_pickle('model_flowlines') + fls = hef_gdir.read_store('model_flowlines') model = FluxBasedModel(fls) model.to_geometry_netcdf(p) @@ -2143,7 +2143,7 @@ def test_hef(self, class_case_dir, hef_gdir): assert fl.flows_to_indice == fl_.flows_to_indice # mixed flowline - fls = hef_gdir.read_pickle('model_flowlines') + fls = hef_gdir.read_store('model_flowlines') model = FluxBasedModel(fls) p = os.path.join(class_case_dir, 'grp_hef_mix.nc') @@ -2190,7 +2190,7 @@ def inversion_gdir(class_case_dir): class TestIdealisedInversion(): def simple_plot(self, model, gdir): # pragma: no cover - ocls = gdir.read_pickle('inversion_output') + ocls = gdir.read_store('inversion_output') ithick = ocls[-1]['thick'] pg = np.where((model.fls[-1].thick > 0) & (model.fls[-1].widths_m > 1)) plt.figure() @@ -2206,7 +2206,7 @@ def simple_plot(self, model, gdir): # pragma: no cover plt.show() def double_plot(self, model, gdir): # pragma: no cover - ocls = gdir.read_pickle('inversion_output') + ocls = gdir.read_store('inversion_output') f, axs = plt.subplots(1, 2, figsize=(8, 4), sharey=True) for i, ax in enumerate(axs): ithick = ocls[i]['thick'] @@ -2238,7 +2238,7 @@ def test_inversion_rectangular(self, inversion_gdir): flo.widths = fl.widths[pg] flo.is_rectangular = np.ones(flo.nx).astype(bool) fls.append(flo) - inversion_gdir.write_pickle(copy.deepcopy(fls), 'inversion_flowlines') + inversion_gdir.write_store(copy.deepcopy(fls), 'inversion_flowlines') massbalance.apparent_mb_from_linear_mb(inversion_gdir) inversion.prepare_for_inversion(inversion_gdir) @@ -2250,7 +2250,7 @@ def test_inversion_rectangular(self, inversion_gdir): mb_on_z = mb.get_annual_mb(fl.surface_h[pg]) flux = np.cumsum(fl.widths_m[pg] * fl.dx_meter * mb_on_z) - inv_out = inversion_gdir.read_pickle('inversion_input') + inv_out = inversion_gdir.read_store('inversion_input') inv_flux = inv_out[0]['flux'] slope = - np.gradient(fl.surface_h[pg], fl.dx_meter) @@ -2283,7 +2283,7 @@ def test_inversion_rectangular(self, inversion_gdir): utils.rmsd(est_h_ofl[25:95], mod_h[25:95])) # And with our current inversion? - inv_out = inversion_gdir.read_pickle('inversion_output') + inv_out = inversion_gdir.read_store('inversion_output') our_h = inv_out[0]['thick'] assert_allclose(est_h[25:75], our_h[25:75], rtol=0.01) @@ -2312,7 +2312,7 @@ def test_inversion_trapeze(self, inversion_gdir): flo.widths = fl.widths[pg] flo.is_rectangular = np.ones(flo.nx).astype(bool) fls.append(flo) - inversion_gdir.write_pickle(copy.deepcopy(fls), 'inversion_flowlines') + inversion_gdir.write_store(copy.deepcopy(fls), 'inversion_flowlines') massbalance.apparent_mb_from_linear_mb(inversion_gdir) @@ -2350,14 +2350,14 @@ def test_inversion_parabolic(self, inversion_gdir): flo.widths = fl.widths[pg] flo.is_rectangular = np.zeros(flo.nx).astype(bool) fls.append(flo) - inversion_gdir.write_pickle(copy.deepcopy(fls), 'inversion_flowlines') + inversion_gdir.write_store(copy.deepcopy(fls), 'inversion_flowlines') massbalance.apparent_mb_from_linear_mb(inversion_gdir) inversion.prepare_for_inversion(inversion_gdir) v = inversion.mass_conservation_inversion(inversion_gdir) assert_allclose(v, model.volume_m3, rtol=0.01) - inv = inversion_gdir.read_pickle('inversion_output')[-1] + inv = inversion_gdir.read_store('inversion_output')[-1] bed_shape_gl = 4 * inv['thick'] / \ (flo.widths * inversion_gdir.grid.dx) ** 2 bed_shape_ref = (4 * fl.thick[pg] / @@ -2367,7 +2367,7 @@ def test_inversion_parabolic(self, inversion_gdir): mb_on_z = mb.get_annual_mb(fl.surface_h[pg]) flux = np.cumsum(fl.widths_m[pg] * fl.dx_meter * mb_on_z) - inv_out = inversion_gdir.read_pickle('inversion_input') + inv_out = inversion_gdir.read_store('inversion_input') inv_flux = inv_out[0]['flux'] slope = - np.gradient(fl.surface_h[pg], fl.dx_meter) @@ -2397,7 +2397,7 @@ def test_inversion_parabolic(self, inversion_gdir): utils.rmsd(est_h_ofl[25:95], mod_h[25:95])) # And with our current inversion? - inv_out = inversion_gdir.read_pickle('inversion_output') + inv_out = inversion_gdir.read_store('inversion_output') our_h = inv_out[0]['thick'] assert_allclose(est_h[25:75], our_h[25:75], rtol=0.01) @@ -2428,7 +2428,7 @@ def test_inversion_mixed(self, inversion_gdir): flo.widths = fl.widths[pg] flo.is_rectangular = fl.is_trapezoid[pg] fls.append(flo) - inversion_gdir.write_pickle(copy.deepcopy(fls), 'inversion_flowlines') + inversion_gdir.write_store(copy.deepcopy(fls), 'inversion_flowlines') massbalance.apparent_mb_from_linear_mb(inversion_gdir) inversion.prepare_for_inversion(inversion_gdir) @@ -2457,7 +2457,7 @@ def test_inversion_cliff(self, inversion_gdir): flo.widths = fl.widths[pg] flo.is_rectangular = np.ones(flo.nx).astype(bool) fls.append(flo) - inversion_gdir.write_pickle(copy.deepcopy(fls), 'inversion_flowlines') + inversion_gdir.write_store(copy.deepcopy(fls), 'inversion_flowlines') massbalance.apparent_mb_from_linear_mb(inversion_gdir) inversion.prepare_for_inversion(inversion_gdir) @@ -2484,7 +2484,7 @@ def test_inversion_noisy(self, inversion_gdir): flo.widths = fl.widths[pg] flo.is_rectangular = np.ones(flo.nx).astype(bool) fls.append(flo) - inversion_gdir.write_pickle(copy.deepcopy(fls), 'inversion_flowlines') + inversion_gdir.write_store(copy.deepcopy(fls), 'inversion_flowlines') massbalance.apparent_mb_from_linear_mb(inversion_gdir) inversion.prepare_for_inversion(inversion_gdir) @@ -2515,7 +2515,7 @@ def test_inversion_tributary(self, inversion_gdir): fls[0].set_flows_to(fls[1]) - inversion_gdir.write_pickle(copy.deepcopy(fls), 'inversion_flowlines') + inversion_gdir.write_store(copy.deepcopy(fls), 'inversion_flowlines') massbalance.apparent_mb_from_linear_mb(inversion_gdir) inversion.prepare_for_inversion(inversion_gdir) @@ -2547,7 +2547,7 @@ def test_inversion_non_equilibrium(self, inversion_gdir): flo.widths = fl.widths[pg] flo.is_rectangular = np.ones(flo.nx).astype(bool) fls.append(flo) - inversion_gdir.write_pickle(copy.deepcopy(fls), 'inversion_flowlines') + inversion_gdir.write_store(copy.deepcopy(fls), 'inversion_flowlines') massbalance.apparent_mb_from_linear_mb(inversion_gdir) inversion.prepare_for_inversion(inversion_gdir) @@ -2555,7 +2555,7 @@ def test_inversion_non_equilibrium(self, inversion_gdir): # expected errors assert v > model.volume_m3 - ocls = inversion_gdir.read_pickle('inversion_output') + ocls = inversion_gdir.read_store('inversion_output') ithick = ocls[0]['thick'] assert np.mean(ithick) > np.mean(model.fls[0].thick) * 1.1 if do_plot: # pragma: no cover @@ -2578,7 +2578,7 @@ def test_inversion_and_run(self, inversion_gdir): flo.widths = fl.widths[pg] flo.is_rectangular = np.zeros(flo.nx).astype(bool) fls.append(flo) - inversion_gdir.write_pickle(copy.deepcopy(fls), 'inversion_flowlines') + inversion_gdir.write_store(copy.deepcopy(fls), 'inversion_flowlines') massbalance.apparent_mb_from_linear_mb(inversion_gdir) inversion.prepare_for_inversion(inversion_gdir) @@ -2586,7 +2586,7 @@ def test_inversion_and_run(self, inversion_gdir): assert_allclose(v, model.volume_m3, rtol=0.01) - inv = inversion_gdir.read_pickle('inversion_output')[-1] + inv = inversion_gdir.read_store('inversion_output')[-1] bed_shape_gl = 4 * inv['thick'] / (flo.widths * inversion_gdir.grid.dx) ** 2 ithick = inv['thick'] @@ -2666,7 +2666,7 @@ def test_flux_gate_on_hef(self, hef_gdir, inversion_params): init_present_time_glacier(hef_gdir) mb_mod = massbalance.ScalarMassBalance() - fls = hef_gdir.read_pickle('model_flowlines') + fls = hef_gdir.read_store('model_flowlines') for fl in fls: fl.thick = fl.thick * 0 model = FluxBasedModel(fls, mb_model=mb_mod, y0=0., @@ -2906,7 +2906,7 @@ def test_equilibrium_glacier_wide(self, hef_gdir, inversion_params): glen_a=inversion_params['inversion_glen_a']) init_present_time_glacier(hef_gdir) - fls = hef_gdir.read_pickle('model_flowlines') + fls = hef_gdir.read_store('model_flowlines') model = FluxBasedModel(fls, mb_model=mb_mod, y0=0., fs=inversion_params['inversion_fs'], glen_a=inversion_params['inversion_glen_a'], @@ -2938,7 +2938,7 @@ def test_commitment(self, hef_gdir, inversion_params): mb_mod = massbalance.ConstantMassBalance(hef_gdir, y0=2002 - 15) - fls = hef_gdir.read_pickle('model_flowlines') + fls = hef_gdir.read_store('model_flowlines') model = FluxBasedModel(fls, mb_model=mb_mod, y0=0., fs=inversion_params['inversion_fs'], glen_a=inversion_params['inversion_glen_a']) @@ -2953,9 +2953,9 @@ def test_commitment(self, hef_gdir, inversion_params): init_present_time_glacier(hef_gdir) - glacier = hef_gdir.read_pickle('model_flowlines') + glacier = hef_gdir.read_store('model_flowlines') - fls = hef_gdir.read_pickle('model_flowlines') + fls = hef_gdir.read_store('model_flowlines') model = FluxBasedModel(fls, mb_model=mb_mod, y0=0., fs=inversion_params['inversion_fs'], glen_a=inversion_params['inversion_glen_a']) @@ -3085,7 +3085,7 @@ def test_start_from_spinup(self, hef_gdir): init_present_time_glacier(hef_gdir) cfg.PARAMS['store_model_geometry'] = True - fls = hef_gdir.read_pickle('model_flowlines') + fls = hef_gdir.read_store('model_flowlines') vol = 0 area = 0 for fl in fls: @@ -3116,7 +3116,7 @@ def test_start_from_spinup_minmax_ys(self, hef_gdir): init_present_time_glacier(hef_gdir) cfg.PARAMS['store_model_geometry'] = True - fls = hef_gdir.read_pickle('model_flowlines') + fls = hef_gdir.read_store('model_flowlines') vol = 0 area = 0 for fl in fls: @@ -3402,7 +3402,7 @@ class TestDynamicSpinup: def test_run_dynamic_spinup(self, hef_gdir, minimise_for): # value we want to match after dynamic spinup - fls = hef_gdir.read_pickle('model_flowlines') + fls = hef_gdir.read_store('model_flowlines') ref_value = 0 if minimise_for == 'area': unit = 'km2' @@ -3546,7 +3546,7 @@ def test_run_dynamic_spinup(self, hef_gdir, minimise_for): # spinup function and if 'ignore_errors' works # create a flowline with zero ice - fls_zero_ice = hef_gdir.read_pickle('model_flowlines') + fls_zero_ice = hef_gdir.read_store('model_flowlines') for i in range(len(fls_zero_ice)): fls_zero_ice[i].section = np.zeros(len(fls_zero_ice[i].section)) @@ -3844,7 +3844,7 @@ def reset_melt_f(): # value we want to match after dynamic melt_f calibration with dynamic # spinup - fls = gdir.read_pickle('model_flowlines') + fls = gdir.read_store('model_flowlines') ref_value_dynamic_spinup = 0 if minimise_for == 'area': unit = 'km2' @@ -3874,8 +3874,7 @@ def reset_melt_f(): # only important if inversion is included, so original # model_flowlines are unchagned (to be able to conduct more dynamic # calibration runs in the same gdir) - assert not os.path.isfile( - os.path.join(gdir.dir, 'model_flowlines_dyn_melt_f_calib.pkl')) + assert not gdir.has_file('model_flowlines', '_dyn_melt_f_calib') # conduct a run including a dynamic spinup and inversion melt_f_max = 1000 * 12 / 365 @@ -3925,14 +3924,13 @@ def reset_melt_f(): if do_inversion: # after the run, check that the dyn model flowlines exists and that # the original model flowlines are unchanged - assert os.path.isfile( - os.path.join(gdir.dir, 'model_flowlines_dyn_melt_f_calib.pkl')) + assert gdir.has_file('model_flowlines', '_dyn_melt_f_calib') assert np.all([np.all(getattr(fl_prev, 'surface_h') == getattr(fl_now, 'surface_h')) and np.all(getattr(fl_prev, 'bed_h') == getattr(fl_now, 'bed_h')) for fl_prev, fl_now in - zip(fls, gdir.read_pickle('model_flowlines'))]) + zip(fls, gdir.read_store('model_flowlines'))]) # test that error is raised if ignore_error=False reset_melt_f() @@ -3999,7 +3997,7 @@ def reset_melt_f(): dynamic_melt_f_run_with_dynamic_spinup_fallback( gdir, melt_f=gdir.read_json('mb_calib')['melt_f'], - fls_init=gdir.read_pickle('model_flowlines'), + fls_init=gdir.read_store('model_flowlines'), ys=gdir.get_climate_info()['baseline_yr_0'], ye=gdir.get_climate_info()['baseline_yr_1'] + 1, local_variables=None, @@ -4032,7 +4030,7 @@ def reset_melt_f(): # value we want to match after dynamic melt_f calibration with dynamic # spinup - fls = gdir.read_pickle('model_flowlines') + fls = gdir.read_store('model_flowlines') ref_value_dynamic_spinup = 0 if minimise_for == 'area': unit = 'km2' @@ -4239,11 +4237,11 @@ def reset_melt_f(): dynamic_melt_f_run_with_dynamic_spinup_fallback( gdir, melt_f=original_melt_f, - fls_init=gdir.read_pickle('model_flowlines'), + fls_init=gdir.read_store('model_flowlines'), ys=gdir.get_climate_info()['baseline_yr_0'], ye=gdir.get_climate_info()['baseline_yr_1'] + 1, local_variables={'vol_m3_ref': - gdir.read_pickle('model_flowlines')[0].volume_m3}, + gdir.read_store('model_flowlines')[0].volume_m3}, minimise_for=minimise_for ) assert original_melt_f == gdir.read_json('mb_calib')['melt_f'] @@ -5478,7 +5476,7 @@ def test_merged_simulation(self): main_rgi_id='RGI50-11.00897', glcdf=glcdf, buffer=0) assert 'RGI50-11.00897' == np.unique([fl.rgi_id for fl in - merge0.read_pickle( + merge0.read_store( 'model_flowlines')])[0] # merge, but with 50 buffer. overlapping glaciers should be excluded @@ -5486,9 +5484,9 @@ def test_merged_simulation(self): main_rgi_id='RGI50-11.00897', glcdf=glcdf, buffer=50) assert 'RGI50-11.00719_d01' in [fl.rgi_id for fl in - merge1.read_pickle('model_flowlines')] + merge1.read_store('model_flowlines')] assert 'RGI50-11.00779' not in [fl.rgi_id for fl in - merge1.read_pickle('model_flowlines')] + merge1.read_store('model_flowlines')] # merge HEF and Vernagt, include Gepatsch but it should not be merged gdir_merged = workflow.merge_glacier_tasks(gdirs, @@ -5496,7 +5494,7 @@ def test_merged_simulation(self): glcdf=glcdf) # test flowlines - fls = gdir_merged.read_pickle('model_flowlines') + fls = gdir_merged.read_store('model_flowlines') # check for gepatsch, should not be there assert 'RGI50-11.00746' not in [fl.rgi_id for fl in fls] @@ -5570,7 +5568,7 @@ def test_equilibrium(self, hef_elev_gdir, inversion_params): # year 1930 is used with equilibrium climate period in mind (old t*) mb_mod = massbalance.ConstantMassBalance(hef_elev_gdir, y0=1930) - fls = hef_elev_gdir.read_pickle('model_flowlines') + fls = hef_elev_gdir.read_store('model_flowlines') model = SemiImplicitModel(fls, mb_model=mb_mod, y0=0, fs=inversion_params['inversion_fs'], glen_a=inversion_params['inversion_glen_a'], @@ -5866,7 +5864,7 @@ def test_distribute(self, hef_elev_gdir, inversion_params): seed=4) mb_mod.temp_bias = 0.5 - fls = hef_elev_gdir.read_pickle('model_flowlines') + fls = hef_elev_gdir.read_store('model_flowlines') model = SemiImplicitModel(fls, mb_model=mb_mod, y0=2000, fs=inversion_params['inversion_fs'], glen_a=inversion_params['inversion_glen_a']) diff --git a/oggm/tests/test_prepro.py b/oggm/tests/test_prepro.py index ba0cc3b49..2533d8694 100644 --- a/oggm/tests/test_prepro.py +++ b/oggm/tests/test_prepro.py @@ -530,9 +530,10 @@ def test_custom_basename(self): cfg.add_to_basenames('mybn', 'testfb.pkl', docstr='Some docs') + # TODO: replace read & write pickle methods out = {'foo': 1.5} - gdir.write_pickle(out, 'mybn') - assert gdir.read_pickle('mybn') == out + gdir.write_store(out, 'mybn') + assert gdir.read_store('mybn') == out def test_gridded_data_var_to_geotiff(self): @@ -653,7 +654,7 @@ def test_centerlines(self): gis.glacier_masks(gdir) centerlines.compute_centerlines(gdir) - cls = gdir.read_pickle('centerlines') + cls = gdir.read_store('centerlines') for cl in cls: for j, ip, ob in zip(cl.inflow_indices, cl.inflow_points, cl.inflows): @@ -684,8 +685,8 @@ def test_downstream(self): centerlines.initialize_flowlines(gdir) centerlines.compute_downstream_line(gdir) - d = gdir.read_pickle('downstream_line') - cl = gdir.read_pickle('inversion_flowlines')[-1] + d = gdir.read_store('downstream_line') + cl = gdir.read_store('inversion_flowlines')[-1] self.assertEqual( len(d['full_line'].coords) - len(d['downstream_line'].coords), cl.nx) @@ -707,13 +708,13 @@ def test_downstream_bedshape(self): centerlines.compute_downstream_line(gdir) centerlines.compute_downstream_bedshape(gdir) - out = gdir.read_pickle('downstream_line') + out = gdir.read_store('downstream_line') for o, h in zip(out['bedshapes'], out['surface_h']): assert np.all(np.isfinite(o)) assert np.all(np.isfinite(h)) - tpl = gdir.read_pickle('inversion_flowlines')[-1] - c = gdir.read_pickle('downstream_line')['downstream_line'] + tpl = gdir.read_store('inversion_flowlines')[-1] + c = gdir.read_store('downstream_line')['downstream_line'] c = centerlines.Centerline(c, dx=tpl.dx) # Independent reproduction for a few points @@ -786,7 +787,7 @@ def test_baltoro_centerlines(self): centerlines.compute_centerlines(gdir) my_mask = np.zeros((gdir.grid.ny, gdir.grid.nx), dtype=np.uint8) - cls = gdir.read_pickle('centerlines') + cls = gdir.read_store('centerlines') assert gdir.rgi_date == 2009 @@ -918,7 +919,7 @@ def test_to_inversion_flowline(self): hgt = [] harea = [] - cls = gdir.read_pickle('inversion_flowlines') + cls = gdir.read_store('inversion_flowlines') for cl in cls: harea.extend(list(cl.widths * cl.dx)) hgt.extend(list(cl.surface_h)) @@ -1008,7 +1009,7 @@ def test_run(self): centerlines.elevation_band_flowline(gdir) centerlines.fixed_dx_elevation_band_flowline(gdir) centerlines.compute_downstream_line(gdir) - dl = gdir.read_pickle('downstream_line') + dl = gdir.read_store('downstream_line') np.testing.assert_allclose(dl['downstream_line'].length, 12, atol=0.5) centerlines.compute_downstream_bedshape(gdir) @@ -1067,7 +1068,7 @@ def test_catchment_area(self): centerlines.compute_centerlines(gdir) centerlines.catchment_area(gdir) - cis = gdir.read_pickle('geometries')['catchment_indices'] + cis = gdir.read_store('geometries')['catchment_indices'] # The catchment area must be as big as expected with utils.ncDataset(gdir.get_filepath('gridded_data')) as nc: @@ -1092,7 +1093,7 @@ def test_flowlines(self): centerlines.compute_centerlines(gdir) centerlines.initialize_flowlines(gdir) - cls = gdir.read_pickle('inversion_flowlines') + cls = gdir.read_store('inversion_flowlines') for cl in cls: for j, ip, ob in zip(cl.inflow_indices, cl.inflow_points, cl.inflows): @@ -1150,7 +1151,7 @@ def test_width(self): hgt = [] harea = [] - cls = gdir.read_pickle('inversion_flowlines') + cls = gdir.read_store('inversion_flowlines') for cl in cls: harea.extend(list(cl.widths * cl.dx)) hgt.extend(list(cl.surface_h)) @@ -1209,7 +1210,7 @@ def test_nodivides_correct_slope(self): centerlines.compute_centerlines(gdir) centerlines.initialize_flowlines(gdir) - fls = gdir.read_pickle('inversion_flowlines') + fls = gdir.read_store('inversion_flowlines') min_slope = np.deg2rad(cfg.PARAMS['min_slope']) for fl in fls: dx = fl.dx * gdir.grid.dx @@ -2075,12 +2076,12 @@ def test_mb_calibration_from_scalar_mb_multiple_fl(self): massbalance.apparent_mb_from_any_mb(gdir, mb_years=[1953, 2002]) # Artificially make some arms even lower to have multiple branches - fls = gdir.read_pickle('inversion_flowlines') + fls = gdir.read_store('inversion_flowlines') assert fls[0].flows_to is fls[-1] assert fls[1].flows_to is fls[-1] fls[0].surface_h -= 700 fls[1].surface_h -= 700 - gdir.write_pickle(fls, 'inversion_flowlines') + gdir.write_store(fls, 'inversion_flowlines') mb_calibration_from_scalar_mb(gdir, ref_mb=ref_mb, ref_period=ref_period) @@ -2197,7 +2198,7 @@ def test_invert_hef(self): # maxH = 242+-13 inversion.prepare_for_inversion(gdir) # Check how many clips: - cls = gdir.read_pickle('inversion_input') + cls = gdir.read_store('inversion_input') nabove = 0 maxs = 0. npoints = 0. @@ -2239,8 +2240,8 @@ def to_optimize(x): write=True) np.testing.assert_allclose(ref_v, v) - cls = gdir.read_pickle('inversion_output') - fls = gdir.read_pickle('inversion_flowlines') + cls = gdir.read_store('inversion_output') + fls = gdir.read_store('inversion_flowlines') maxs = 0. for cl, fl in zip(cls, fls): thick = cl['thick'] @@ -2252,7 +2253,7 @@ def to_optimize(x): maxs = 0. v = 0. - cls = gdir.read_pickle('inversion_output') + cls = gdir.read_store('inversion_output') for cl in cls: thick = cl['thick'] _max = np.max(thick) @@ -2264,7 +2265,7 @@ def to_optimize(x): np.testing.assert_allclose(ref_v, inversion.get_inversion_volume(gdir)) # Sanity check - velocities - inv = gdir.read_pickle('inversion_output')[-1] + inv = gdir.read_store('inversion_output')[-1] # vol in m3 and dx in m -> section in m2 section = inv['volume'] / inv['dx'] @@ -2278,7 +2279,7 @@ def to_optimize(x): # Some reference value I just computed - see if other computers agree np.testing.assert_allclose(np.mean(velocity[:-1]), 42, atol=5) inversion.compute_inversion_velocities(gdir, fs=fs, glen_a=glen_a) - inv = gdir.read_pickle('inversion_output')[-1] + inv = gdir.read_store('inversion_output')[-1] np.testing.assert_allclose(velocity, inv['u_integrated']) @pytest.mark.slow @@ -2431,7 +2432,7 @@ def test_invert_hef_water_level(self): inversion.prepare_for_inversion(gdir) v = inversion.mass_conservation_inversion(gdir, water_level=10000) - cls = gdir.read_pickle('inversion_output') + cls = gdir.read_store('inversion_output') v_bwl = np.nansum([np.nansum(fl.get('volume_bwl', 0)) for fl in cls]) n_trap = np.sum([np.sum(fl['is_trapezoid']) for fl in cls]) np.testing.assert_allclose(v, v_bwl) @@ -2461,7 +2462,7 @@ def test_invert_hef_from_linear_mb(self): inversion.prepare_for_inversion(gdir) # Check how many clips: - cls = gdir.read_pickle('inversion_input') + cls = gdir.read_store('inversion_input') nabove = 0 maxs = 0. npoints = 0. @@ -2503,8 +2504,8 @@ def to_optimize(x): write=True) np.testing.assert_allclose(ref_v, v) - cls = gdir.read_pickle('inversion_output') - fls = gdir.read_pickle('inversion_flowlines') + cls = gdir.read_store('inversion_output') + fls = gdir.read_store('inversion_flowlines') maxs = 0. for cl, fl in zip(cls, fls): thick = cl['thick'] @@ -2514,7 +2515,7 @@ def to_optimize(x): maxs = 0. v = 0. - cls = gdir.read_pickle('inversion_output') + cls = gdir.read_store('inversion_output') for cl in cls: thick = cl['thick'] _max = np.max(thick) @@ -2541,7 +2542,7 @@ def test_invert_hef_from_any_mb(self): # Reference massbalance.apparent_mb_from_linear_mb(gdir) inversion.prepare_for_inversion(gdir) - cls1 = gdir.read_pickle('inversion_input') + cls1 = gdir.read_store('inversion_input') v1 = inversion.mass_conservation_inversion(gdir) # New should be equivalent mb_model = massbalance.LinearMassBalance(ela_h=1800, grad=3) @@ -2549,7 +2550,7 @@ def test_invert_hef_from_any_mb(self): mb_years=np.arange(30)) inversion.prepare_for_inversion(gdir) v2 = inversion.mass_conservation_inversion(gdir) - cls2 = gdir.read_pickle('inversion_input') + cls2 = gdir.read_store('inversion_input') # Now the tests for cl1, cl2 in zip(cls1, cls2): @@ -2663,8 +2664,8 @@ def to_optimize(x): write=True) np.testing.assert_allclose(ref_v, v) - cls = gdir.read_pickle('inversion_output') - fls = gdir.read_pickle('inversion_flowlines') + cls = gdir.read_store('inversion_output') + fls = gdir.read_store('inversion_flowlines') maxs = 0. for cl, fl in zip(cls, fls): thick = cl['thick'] @@ -2687,7 +2688,7 @@ def to_optimize(x): write=True) np.testing.assert_allclose(v, ref_v, rtol=0.06) - cls = gdir.read_pickle('inversion_output') + cls = gdir.read_store('inversion_output') maxs = 0. for cl in cls: thick = cl['thick'] @@ -2697,7 +2698,7 @@ def to_optimize(x): inversion.compute_inversion_velocities(gdir, fs=0, glen_a=glen_a) - inv = gdir.read_pickle('inversion_output')[-1] + inv = gdir.read_store('inversion_output')[-1] # In the middle section the velocities look OK and should be close # to the no sliding assumption @@ -2789,13 +2790,13 @@ def test_inversion_with_calving(self): inversion.prepare_for_inversion(gdir) inversion.mass_conservation_inversion(gdir) - cls1 = gdir.read_pickle('inversion_output') + cls1 = gdir.read_store('inversion_output') # Increase calving for this one cfg.PARAMS['inversion_calving_k'] = 1 res_bef = gdir.get_diagnostics()['apparent_mb_from_any_mb_residual'] out = inversion.find_inversion_calving_from_any_mb(gdir) - cls2 = gdir.read_pickle('inversion_output') + cls2 = gdir.read_store('inversion_output') # Calving increases the volume and adds a residual v_ref = np.sum([np.sum(fl['volume']) for fl in cls1]) @@ -2808,7 +2809,7 @@ def test_inversion_with_calving(self): v_new_bsl = np.sum([np.sum(fl.get('volume_bsl', 0)) for fl in cls2]) v_new_bwl = np.sum([np.sum(fl.get('volume_bwl', 0)) for fl in cls2]) flowline.init_present_time_glacier(gdir) - flsg = gdir.read_pickle('model_flowlines') + flsg = gdir.read_store('model_flowlines') for fl in flsg: fl.water_level = out['calving_water_level'] v_new_bsl_g = np.sum([np.sum(fl.volume_bsl_m3) for fl in flsg]) @@ -2953,11 +2954,11 @@ def test_ideal_glacier(self): towrite.append(cl_dic) # Write out - gdir.write_pickle(towrite, 'inversion_input') + gdir.write_store(towrite, 'inversion_input') v = inversion.mass_conservation_inversion(gdir, glen_a=glen_a) np.testing.assert_allclose(v, model.volume_m3, rtol=0.01) - cl = gdir.read_pickle('inversion_output')[0] + cl = gdir.read_store('inversion_output')[0] rmsd = utils.rmsd(cl['thick'], model.fls[0].thick[:len(cl['thick'])]) assert rmsd < 10. @@ -2974,7 +2975,7 @@ def test_intersections(self): centerlines.catchment_width_correction(gdir) # see that we have as many catchments as flowlines - fls = gdir.read_pickle('inversion_flowlines') + fls = gdir.read_store('inversion_flowlines') gdfc = gdir.read_shapefile('flowline_catchments') self.assertEqual(len(fls), len(gdfc)) # and at least as many intersects @@ -3747,10 +3748,10 @@ def test_invert(self): massbalance.apparent_mb_from_linear_mb(gdir) inversion.prepare_for_inversion(gdir, invert_all_rectangular=True) v1 = inversion.mass_conservation_inversion(gdir) - tt1 = gdir.read_pickle('inversion_input')[0] + tt1 = gdir.read_store('inversion_input')[0] gdir1 = gdir - fl = gdir.read_pickle('inversion_flowlines')[0] + fl = gdir.read_store('inversion_flowlines')[0] map_dx = gdir.grid.dx gdir = utils.idealized_gdir(fl.surface_h, fl.widths * map_dx, @@ -3761,7 +3762,7 @@ def test_invert(self): inversion.prepare_for_inversion(gdir, invert_all_rectangular=True) v2 = inversion.mass_conservation_inversion(gdir) - tt2 = gdir.read_pickle('inversion_input')[0] + tt2 = gdir.read_store('inversion_input')[0] np.testing.assert_allclose(tt1['width'], tt2['width']) np.testing.assert_allclose(tt1['slope_angle'], tt2['slope_angle']) np.testing.assert_allclose(tt1['dx'], tt2['dx']) @@ -3962,7 +3963,7 @@ def test_flowlines_from_gmip_data(self): width_path=width_path) pygem_compat.present_time_glacier_from_bins(gdir, data=data) - fls = gdir.read_pickle('model_flowlines') + fls = gdir.read_store('model_flowlines') data = data.loc[::-1] area = np.asarray(data['area']) width = np.asarray(data['width']) diff --git a/oggm/tests/test_utils.py b/oggm/tests/test_utils.py index 4f5fd286c..36f5be4b3 100644 --- a/oggm/tests/test_utils.py +++ b/oggm/tests/test_utils.py @@ -803,7 +803,7 @@ def test_start_from_level_3(self): assert gdir.get_climate_info() # This we can read - gdir.read_pickle('inversion_flowlines') + gdir.read_store('inversion_flowlines') df = utils.compile_glacier_statistics(gdirs) assert 'dem_med_elev' in df @@ -836,7 +836,9 @@ def _read_shp(): return inter, rgidf -class TestPreproCLI(unittest.TestCase): +class TestPreproCLI: + + on_mac = platform.system() == 'Darwin' def setUp(self): from _pytest.monkeypatch import MonkeyPatch @@ -849,8 +851,10 @@ def tearDown(self): if os.path.exists(self.testdir): shutil.rmtree(self.testdir) - def reset_dir(self): - utils.mkdir(self.testdir, reset=True) + @pytest.fixture(scope="function", autouse=True) + def reset_dir(self, tmpdir): + utils.mkdir(tmpdir, reset=True) + # self.tearDown() def test_parse_args(self): @@ -1047,16 +1051,16 @@ def test_parse_args(self): assert kwargs['border'] == 120 @pytest.mark.slow - def test_full_run_defaults(self): + def test_full_run_defaults(self, tmpdir): from oggm.cli.prepro_levels import run_prepro_levels inter, rgidf = _read_shp() - wdir = os.path.join(self.testdir, 'wd') - utils.mkdir(wdir) - odir = os.path.join(self.testdir, 'my_levs') - topof = utils.get_demo_file('srtm_oetztal.tif') + wdir = tmpdir.mkdir("wd") + utils.mkdir(wdir, reset=True) + odir = tmpdir.mkdir("my_levs") + topof = utils.get_demo_file("srtm_oetztal.tif") np.random.seed(0) run_prepro_levels(rgi_version='61', rgi_reg='11', border=20, output_folder=odir, working_dir=wdir, is_test=True, @@ -1201,16 +1205,15 @@ def test_full_run_defaults(self): np.testing.assert_allclose(ods[vn].sel(time=1990), 0) @pytest.mark.slow - def test_distributed_thickness_and_geotiff_export(self): + def test_distributed_thickness_and_geotiff_export(self, tmpdir): from oggm.cli.prepro_levels import run_prepro_levels import rioxarray as rioxr inter, rgidf = _read_shp() - wdir = os.path.join(self.testdir, 'wd') - utils.mkdir(wdir) - odir = os.path.join(self.testdir, 'my_levs') + wdir = tmpdir.mkdir("wd") + odir = tmpdir.mkdir("my_levs") topof = utils.get_demo_file('srtm_oetztal.tif') np.random.seed(0) @@ -1265,16 +1268,15 @@ def test_distributed_thickness_and_geotiff_export(self): assert gtiff_ds.isel(band=0).sum() > 0 @pytest.mark.slow - def test_full_run_cru_centerlines(self): + def test_full_run_cru_centerlines(self, tmpdir): from oggm.cli.prepro_levels import run_prepro_levels inter, rgidf = _read_shp() - wdir = os.path.join(self.testdir, 'wd') - utils.mkdir(wdir) - odir = os.path.join(self.testdir, 'my_levs') - topof = utils.get_demo_file('srtm_oetztal.tif') + wdir = tmpdir.mkdir("wd") + odir = tmpdir.mkdir("my_levs") + topof = utils.get_demo_file("srtm_oetztal.tif") np.random.seed(0) ref_period = '2000-01-01_2010-01-01' run_prepro_levels(rgi_version='61', rgi_reg='11', border=20, @@ -1289,6 +1291,7 @@ def test_full_run_cru_centerlines(self): override_params={'geodetic_mb_period': ref_period, 'baseline_climate': 'CRU', 'prcp_fac': 2.5, + 'use_mp_spawn': True, } ) @@ -1420,7 +1423,7 @@ def test_full_run_cru_centerlines(self): np.testing.assert_allclose(ods[vn].sel(time=1990), 0) @pytest.mark.slow - def test_elev_bands_and_spinup_run_with_different_evolution_models(self): + def test_elev_bands_and_spinup_run_with_different_evolution_models(self, tmpdir): from oggm.cli.prepro_levels import run_prepro_levels @@ -1430,10 +1433,9 @@ def test_elev_bands_and_spinup_run_with_different_evolution_models(self): # Read in the RGI file inter, rgidf = _read_shp() - wdir = os.path.join(self.testdir, 'wd') - utils.mkdir(wdir, reset=True) - odir = os.path.join(self.testdir, 'my_levs') - topof = utils.get_demo_file('srtm_oetztal.tif') + wdir = tmpdir.mkdir(f"wd_{evolution_model}") + odir = tmpdir.mkdir(f"my_levs_{evolution_model}") + topof = utils.get_demo_file("srtm_oetztal.tif") np.random.seed(0) border = 80 bstr = 'b_080' @@ -1455,6 +1457,7 @@ def test_elev_bands_and_spinup_run_with_different_evolution_models(self): 'evolution_model': evolution_model, 'downstream_line_shape': downstream_line_shape, 'prcp_fac': 2.5, + 'use_mp_spawn': True, }) df = pd.read_csv(os.path.join(odir, 'RGI61', bstr, 'L0', 'summary', @@ -1577,17 +1580,16 @@ def test_elev_bands_and_spinup_run_with_different_evolution_models(self): rtol=0.02) @pytest.mark.slow - def test_geodetic_per_glacier_and_massredis_run(self): + def test_geodetic_per_glacier_and_massredis_run(self, tmpdir): from oggm.cli.prepro_levels import run_prepro_levels # Read in the RGI file inter, rgidf = _read_shp() - wdir = os.path.join(self.testdir, 'wd') - utils.mkdir(wdir) - odir = os.path.join(self.testdir, 'my_levs') - topof = utils.get_demo_file('srtm_oetztal.tif') + wdir = tmpdir.mkdir("wd") + odir = tmpdir.mkdir("my_levs") + topof = utils.get_demo_file("srtm_oetztal.tif") np.random.seed(0) params = {'geodetic_mb_period': '2000-01-01_2010-01-01', @@ -1602,7 +1604,7 @@ def test_geodetic_per_glacier_and_massredis_run(self): run_prepro_levels(rgi_version='61', rgi_reg='11', border=20, output_folder=odir, working_dir=wdir, is_test=True, rgi_file=rgidf, intersects_file=inter, - override_params=params, + override_params={**params, 'use_mp_spawn': True}, disable_mp=False, mb_calibration_strategy='melt_temp', test_topofile=topof, elev_bands=True) @@ -1679,7 +1681,7 @@ def test_geodetic_per_glacier_and_massredis_run(self): np.testing.assert_allclose(ds.hydro_month, 4) np.testing.assert_allclose(ds.calendar_month, 1) - def test_start_from_prepro(self): + def test_start_from_prepro(self, tmpdir): from oggm.cli.prepro_levels import run_prepro_levels @@ -1688,9 +1690,8 @@ def test_start_from_prepro(self): # Read in the RGI file inter, rgidf = _read_shp() - wdir = os.path.join(self.testdir, 'wd') - utils.mkdir(wdir) - odir = os.path.join(self.testdir, 'my_levs') + wdir = tmpdir.mkdir("wd") + odir = tmpdir.mkdir("my_levs") np.random.seed(0) ref_period = '2000-01-01_2010-01-01' run_prepro_levels(rgi_version='61', rgi_reg='11', border=20, @@ -1703,6 +1704,7 @@ def test_start_from_prepro(self): override_params={'geodetic_mb_period': ref_period, 'baseline_climate': 'CRU', 'prcp_fac': 2.5, + 'use_mp_spawn': True, } ) @@ -1726,9 +1728,9 @@ def test_start_from_prepro(self): rgi_file=rgidf, intersects_file=inter, start_level=2, max_level=4) - def test_source_run(self): + def test_source_run(self, tmpdir, monkeypatch): - self.monkeypatch.setattr(oggm.utils, 'DEM_SOURCES', ['USER']) + monkeypatch.setattr(oggm.utils, 'DEM_SOURCES', ['USER']) from oggm.cli.prepro_levels import run_prepro_levels @@ -1736,9 +1738,8 @@ def test_source_run(self): inter, rgidf = _read_shp() rgidf = rgidf.iloc[:4] - wdir = os.path.join(self.testdir, 'wd') - utils.mkdir(wdir) - odir = os.path.join(self.testdir, 'my_levs') + wdir = tmpdir.mkdir("wd") + odir = tmpdir.mkdir("my_levs") topof = utils.get_demo_file('srtm_oetztal.tif') np.random.seed(0) run_prepro_levels(rgi_version='61', rgi_reg='11', border=20, diff --git a/oggm/tests/test_workflow.py b/oggm/tests/test_workflow.py index 122d35e01..55d44287a 100644 --- a/oggm/tests/test_workflow.py +++ b/oggm/tests/test_workflow.py @@ -4,6 +4,7 @@ import pickle import pytest import json +import shapely import numpy as np import xarray as xr from numpy.testing import assert_allclose @@ -287,7 +288,10 @@ def test_shapefile_output(self): utils.mkdir(base_dir, reset=True) gdirs = workflow.execute_entity_task(utils.copy_to_basedir, gdirs, base_dir=base_dir, setup='all') - os.remove(gdirs[0].get_filepath('centerlines')) + path_centerline = gdirs[0].get_filepath('centerlines') + for path in [path_centerline,path_centerline.replace(".pkl", ".zarr")]: + if os.path.exists(path): + os.remove(path) cfg.PARAMS['continue_on_error'] = True write_centerlines_to_shape(gdirs) @@ -316,7 +320,7 @@ def test_random(self): from oggm.core.massbalance import LinearMassBalance from oggm.core.flowline import FluxBasedModel mb_mod = LinearMassBalance(ela_h=2500) - fls = gd.read_pickle('model_flowlines') + fls = gd.read_store('model_flowlines') model = FluxBasedModel(fls, mb_model=mb_mod) df.loc[gd.rgi_id, 'start_area_km2'] = model.area_km2 df.loc[gd.rgi_id, 'start_volume_km3'] = model.volume_km3 @@ -565,3 +569,154 @@ def test_rgi7_complex_glacier_dirs(): with xr.open_dataset(gdir.get_filepath('gridded_data')) as ds: assert ds.sub_entities.max().item() == (len(rgi7c_to_g_links[gdir.rgi_id]) - 1) + + +class TestZarrWorkflow: + """Tests for any Zarr operations called via workflow or _workflow.""" + + @pytest.mark.skip(reason="warning disabled for performance") + def test_pickle_warnings(self, hef_gdir): + """Test that write_pickle raises a warning if used.""" + gdir = hef_gdir + + with pytest.warns( + PendingDeprecationWarning, + match="gdir.write_pickle is deprecated and will be replaced by gdir.write_store in a future OGGM release.", + ): + gdir.write_pickle( + var={"array": [1, 2]}, + filename="inversion_input", + filesuffix="test_pickle", + ) + + @pytest.mark.skip + @pytest.mark.parametrize("arg_filesuffix", ["", "_exp01"]) + def test_write_zarr(tmp_path, hef_gdir, arg_filesuffix): + """Create a zarr store at the expected path.""" + cfg.initialize() + cfg.PATHS["working_dir"] = str(tmp_path) + gdir = hef_gdir + + ds = xr.Dataset( + { + "temperature": xr.DataArray( + np.array([1.0, 2.0, 3.0]), dims=["time"] + ) + } + ) + data_tree = xr.DataTree(dataset=ds) + + gdir.write_zarr( + data_tree=data_tree, + filename="data_store", + filesuffix=arg_filesuffix, + ) + + fp = gdir.get_filepath(filename="data_store", filesuffix=arg_filesuffix) + assert os.path.exists(fp) + + with xr.open_zarr(fp, consolidated=True) as ds_read: + np.testing.assert_array_equal( + ds_read["temperature"].values, [1.0, 2.0, 3.0] + ) + + # Test that overwrite=True replaces existing zarr content + + ds_new = xr.Dataset( + {"val": xr.DataArray(np.array([9.0, 8.0]), dims=["n"])} + ) + gdir.write_zarr( + xr.DataTree(dataset=ds_new), "data_store", overwrite=True + ) + + fp = gdir.get_filepath("data_store") + with xr.open_zarr(fp, consolidated=True) as ds_read: + np.testing.assert_array_equal(ds_read["val"].values, [9.0, 8.0]) + + # @pytest.mark.xfail(reason="zarr is not yet in oggm-sample-data") + def test_read_store(self, tmp_path, hef_gdir): + """Test that read_store returns xr.DataTree when zarr store exists.""" + cfg.initialize() + cfg.PATHS["working_dir"] = str(tmp_path) + gdir = hef_gdir + + ds = xr.Dataset( + {"flux": xr.DataArray(np.array([1.0, 2.0, 3.0]), dims=["time"])} + ) + data_tree = xr.DataTree() + data_tree["inversion_input_test"] = xr.DataTree.from_dict( + name="inversion_input_test", data=ds + ) + # until this is included via oggm-sample-data + gdir.write_zarr(data_tree=data_tree, filename="data_store", overwrite=False) + result = gdir.read_store(filename="inversion_input", filesuffix="test") + + assert isinstance(result, list) + assert isinstance(result[0], dict) + + # This will fail if it reads the pickle instead of the zarr + np.testing.assert_array_equal(result[0]["flux"], [1.0, 2.0, 3.0]) + + def test_read_store_fallback(self, hef_gdir): + """Test read_store falls back to read_pickle if zarr is not found.""" + gdir = hef_gdir + # Force the use of pickle or this test will never pass once + # switched to zarr + gdir.write_pickle( + var=[1, 2, 3], + filename="inversion_flowlines", + filesuffix="test_pickle", + ) + with pytest.warns( + Warning, + match="Zarr data not found, attempting to read pickle file instead.", + ): + ds_read = gdir.read_store( + filename="inversion_flowlines", filesuffix="test_pickle" + ) + assert not isinstance(ds_read, xr.DataTree) + assert isinstance(ds_read, list) + + def test_validate_store(self, hef_gdir): + gdir = hef_gdir + + ds = xr.Dataset( + {"flux": xr.DataArray(np.array([100.0, 200.0, 300.0]), dims=["x"])} + ) + data_tree = xr.DataTree(dataset=ds, name="inversion_input") + + result = gdir._validate_store(data_tree=data_tree) + + assert isinstance(result, list) + assert isinstance(result[0], dict) + np.testing.assert_array_equal(result[0]["flux"], [100.0, 200.0, 300.0]) + + def test_validate_store_logic(self, hef_gdir): + """Test that _validate_store modifies the data_tree as expected.""" + gdir = hef_gdir + downstream_line = gdir.read_pickle("downstream_line")["downstream_line"] + assert isinstance(downstream_line, shapely.LineString) + + # Create a DataTree with a downstream_line variable + data_tree = xr.DataTree() + ds = xr.Dataset( + { + "downstream_line": xr.DataArray( + np.array( + shapely.geometry.mapping(downstream_line)["coordinates"] + ), + dims=["x", "y"], + ) + } + ) + data_tree = xr.DataTree(dataset=ds, name="downstream_line") + assert data_tree.name == "downstream_line" + + result = gdir._validate_store(data_tree=data_tree) + + assert isinstance(result, dict) + assert "downstream_line" in result.keys() + assert isinstance(result["downstream_line"], shapely.LineString) + assert shapely.LineString(result["downstream_line"]).equals( + downstream_line + ) diff --git a/oggm/utils/_workflow.py b/oggm/utils/_workflow.py index 466feae22..b12e778a9 100644 --- a/oggm/utils/_workflow.py +++ b/oggm/utils/_workflow.py @@ -31,6 +31,7 @@ import numpy as np from scipy import stats import xarray as xr +import zarr import shapely.geometry as shpg import shapely.affinity as shpa from shapely.ops import transform as shp_trafo @@ -68,6 +69,7 @@ recursive_valid_polygons) from oggm.utils._downloads import (get_demo_file, get_wgms_files, get_rgi_glacier_entities) +import oggm.utils.geozarr as geozarr from oggm import cfg from oggm.exceptions import InvalidParamsError, InvalidWorkflowError @@ -701,9 +703,9 @@ def get_centerline_lonlat(gdir, a shapefile """ if flowlines_output or geometrical_widths_output or corrected_widths_output: - cls = gdir.read_pickle('inversion_flowlines') + cls = gdir.read_store('inversion_flowlines') else: - cls = gdir.read_pickle('centerlines') + cls = gdir.read_store('centerlines') exterior = None if ensure_exterior_match: @@ -1625,7 +1627,7 @@ def glacier_statistics(gdir, inversion_only=False, apply_func=None): vol = [] vol_bsl = [] vol_bwl = [] - cl = gdir.read_pickle('inversion_output') + cl = gdir.read_store('inversion_output') for c in cl: vol.extend(c['volume']) vol_bsl.extend(c.get('volume_bsl', [0])) @@ -1696,7 +1698,7 @@ def glacier_statistics(gdir, inversion_only=False, apply_func=None): try: # Centerlines - cls = gdir.read_pickle('centerlines') + cls = gdir.read_store('centerlines') longest = 0. for cl in cls: longest = np.max([longest, cl.dis_on_line[-1]]) @@ -1710,7 +1712,7 @@ def glacier_statistics(gdir, inversion_only=False, apply_func=None): h = np.array([]) widths = np.array([]) slope = np.array([]) - fls = gdir.read_pickle('inversion_flowlines') + fls = gdir.read_store('inversion_flowlines') dx = fls[0].dx * gdir.grid.dx for fl in fls: hgt = fl.surface_h @@ -2175,7 +2177,7 @@ def climate_statistics(gdir, add_climate_period=1995, halfsize=15, # Flowline related stuff h = np.array([]) widths = np.array([]) - fls = gdir.read_pickle('inversion_flowlines') + fls = gdir.read_store('inversion_flowlines') dx = fls[0].dx * gdir.grid.dx for fl in fls: hgt = fl.surface_h @@ -2571,7 +2573,8 @@ def idealized_gdir(surface_h, widths_m, map_dx, flowline_dx=1, fl = Centerline(line, dx=flowline_dx, surface_h=surface_h, map_dx=map_dx) fl.widths = widths_m / map_dx fl.is_rectangular = np.ones(fl.nx).astype(bool) - gdir.write_pickle([fl], 'inversion_flowlines') + + gdir.write_store([fl], 'inversion_flowlines') # Idealized map grid = salem.Grid(nxny=(1, 1), dxdy=(map_dx, map_dx), x0y0=(0, 0)) @@ -3229,7 +3232,14 @@ def has_file(self, filename, filesuffix='', _deprecation_check=True): fp = fp.replace('.shp', '.tar') if cfg.PARAMS['use_compression']: fp += '.gz' - return os.path.exists(fp) + if os.path.exists(fp): + return True + # check if data exists as a group in zarr store + zarr_fp = self.get_filepath("data_store").replace(".pkl", ".zarr") + group = "_".join(filter(None, (filename, filesuffix))) + if os.path.exists(os.path.join(zarr_fp, group)): + return True + return False def add_to_diagnostics(self, key, value): """Write a key, value pair to the gdir's runtime diagnostics. @@ -3310,6 +3320,242 @@ def read_pickle(self, filename, use_compression=None, filesuffix=''): return out + def read_zarr( + self, filename: str, filesuffix: str = "", chunks:dict=None,engine:str="zarr", consolidated:bool=True, decode_cf:bool=True, *kwargs + ) -> xr.DataTree: + """Reads a zarr file located in the glacier directory. + + The location of the zarr store for a given RGI-ID is invariant. + The zarr store is expected to have a group named `filename` + (without suffix). + + Parameters + ---------- + filename : str or None + File name (must be listed in cfg.BASENAMES). If `None`, the + entire zarr store is read. + filesuffix : str, optional + Append a suffix to the filename (useful for experiments). + **kwargs + Additional keyword arguments to pass to xarray.open_zarr(). + + Returns + ------- + xr.DataTree + An xarray.DataTree read from the zarr store. + """ + if chunks is None: + chunks = {} + fp = self.get_filepath("data_store") + if filename == "data_store": + group = None + else: + group = "_".join(filter(None,(filename, filesuffix))) + out = xr.open_datatree( + fp.replace(".pkl", ".zarr"), + group=group, + chunks=chunks, + engine=engine, + consolidated=consolidated, + decode_cf=decode_cf, + *kwargs) + + return out + + def read_store( + self, filename: str, filesuffix: str = "", *kwargs + ) -> dict | list: + """Reads a data store located in the glacier directory. + + Supports pickle and zarr. The location of a zarr store for a + given RGI-ID is invariant. A zarr store is expected to have a + group named `filename` (without suffix). If the zarr store is + not found, automatically falls back to reading a pickle file + with the same name (and suffix). + + For backwards compatibility reasons, the output of this method + is coerced imto the data structures expected from older pickle + files, e.g. all datatrees are converted to flattened + dictionaries. + + Parameters + ---------- + filename : str or None + File name (must be listed in cfg.BASENAMES). If `None`, the + entire zarr store is read. + filesuffix : str, optional + Append a suffix to the filename (useful for experiments). + **kwargs + Additional keyword arguments to pass to xarray.open_zarr(). + + Returns + ------- + list or dict + A list or dictionary of data read from the zarr store. + """ + + try: + out = self.read_zarr( + filename=filename, filesuffix=filesuffix, *kwargs + ) + out: list | dict = self._validate_store( + data_tree=out, group=filename + ) + except ( + FileNotFoundError, + KeyError, + ValueError, + ) as e: # fallback to pickle if zarr not found + warnings.warn( + "Zarr data not found, attempting to read pickle file instead." + ) + fp = self.get_filepath(filename, filesuffix=filesuffix) + if not os.path.exists(fp): + raise FileNotFoundError( + f"{e}\nNo zarr or pickle found for {fp}" + ) + + out: list | dict = self.read_pickle( + filename=filename, use_compression=None, filesuffix=filesuffix + ) + + return out + + def _validate_store(self, data_tree: xr.DataTree, group: str = "") -> dict: + """Ensure data structures in a data tree are OGGM-compatible. + + Some data structures used by the old pickle infrastructure + cannot be directly written to zarr via xarray. This method + ensures data structures within a data tree are compatible with + the types expected from older pickle files. + + Parameters + ---------- + data_tree : xarray.DataTree + The DataTree to reconstruct into a pickle-compatible + dictionary. + group : str, optional + The group within the zarr store that was read. This may be + necessary because the `name` attribute can be missing + depending on how the zarr store was read. + + Returns + ------- + dict | list + Either coerces a datatree into the dictionary, or + returns a list of OGGM objects to match the structures + expected from older pickle files. + """ + + if group: + name = group + elif data_tree.name: + name = data_tree.name + else: + name = "" + + if "downstream_line" in name: + # CAUTION: the downstream_line datatree contains a variable + # named downstream_line. Don't mix these up! + data_tree = geozarr.get_dict_from_datatree(data_tree) + if "downstream_line" in data_tree.keys(): + data_tree["downstream_line"] = geozarr._validate_linestring( + data_tree["downstream_line"] + ) + if "full_line" in data_tree.keys(): + data_tree["full_line"] = geozarr._validate_linestring( + data_tree["full_line"] + ) + else: + # avoid KeyError when storing as empty child which gets + # skipped + data_tree["full_line"] = None + return data_tree + elif "geometries" in name: + data_tree = geozarr.get_dict_from_datatree(data_tree) + for key in ["polygon_hr", "polygon_pix"]: + if key in data_tree.keys(): + data_tree[key] = geozarr._validate_polygon( + data_tree[key] + ) + return data_tree + elif "model_flowline" in name: + child_keys = list(data_tree.children.keys()) + if child_keys and all(k.isdigit() for k in child_keys): + sorted_keys = sorted(child_keys, key=int) + fls = [ + geozarr.get_flowline_from_datatree(data_tree[k]) + for k in sorted_keys + ] + # Restore flows_to connections + for i, k in enumerate(sorted_keys): + idx_da = geozarr.get_datatree_value( + data_tree[k], "_flows_to_list_idx" + ) + if idx_da is not None: + idx = int(idx_da) + if 0 <= idx < len(fls): + fls[i].set_flows_to(fls[idx]) + return fls + # Legacy single-flowline flat structure + flowline = geozarr.get_flowline_from_datatree(data_tree=data_tree) + return [flowline] + elif "centerlines" in name and "inversion" not in name: + child_keys = list(data_tree.children.keys()) + if child_keys and all(k.isdigit() for k in child_keys): + sorted_keys = sorted(child_keys, key=int) + fls = [ + geozarr.get_centerline_from_datatree(data_tree[k]) + for k in sorted_keys + ] + for i, k in enumerate(sorted_keys): + idx_da = geozarr.get_datatree_value( + data_tree[k], "_flows_to_list_idx" + ) + if idx_da is not None: + idx = int(idx_da) + if 0 <= idx < len(fls): + fls[i].set_flows_to(fls[idx]) + return fls + # Single centerline (legacy flat) + return [geozarr.get_centerline_from_datatree(data_tree=data_tree)] + elif "inversion_flowlines" in name: + child_keys = list(data_tree.children.keys()) + if child_keys and all(k.isdigit() for k in child_keys): + sorted_keys = sorted(child_keys, key=int) + fls = [ + geozarr.get_centerline_from_datatree(data_tree[k]) + for k in sorted_keys + ] + # Restore flows_to connections + for i, k in enumerate(sorted_keys): + idx_da = geozarr.get_datatree_value( + data_tree[k], "_flows_to_list_idx" + ) + if idx_da is not None: + idx = int(idx_da) + if 0 <= idx < len(fls): + fls[i].set_flows_to(fls[idx]) + return fls + # Legacy single-flowline flat structure + centerline = geozarr.get_centerline_from_datatree( + data_tree=data_tree + ) + return [centerline] + elif "inversion_" in name: + child_keys = list(data_tree.children.keys()) + if child_keys and all(k.isdigit() for k in child_keys): + # Multi-flowline format w. numbered children, one per flowline + return [ + geozarr.get_dict_from_datatree(data_tree[k]) + for k in sorted(child_keys, key=int) + ] + # single flowline + data_tree = geozarr.get_dict_from_datatree(data_tree) + return [data_tree] + + return geozarr.get_dict_from_datatree(data_tree) + def write_pickle(self, var, filename, use_compression=None, filesuffix=''): """ Writes a variable to a pickle on disk. @@ -3325,13 +3571,210 @@ def write_pickle(self, var, filename, use_compression=None, filesuffix=''): filesuffix : str append a suffix to the filename (useful for experiments). """ + + # TODO: disable warning as it tanks performance + # warnings.warn( + # "gdir.write_pickle is deprecated and will be replaced " + # "by gdir.write_store in a future OGGM release.", + # PendingDeprecationWarning, + # stacklevel=2, + # ) + use_comp = (use_compression if use_compression is not None else cfg.PARAMS['use_compression']) _open = gzip.open if use_comp else open fp = self.get_filepath(filename, filesuffix=filesuffix) - with _open(fp, 'wb') as f: + + # avoid serving stale data from a zarr if a user fell back to + # using a pickle. + group = "_".join(filter(None, (filename, filesuffix))) + zarr_fp = os.path.join(os.path.dirname(fp), "data_store.zarr") + group_dir = os.path.join(zarr_fp, group) + if os.path.exists(group_dir): + shutil.rmtree(group_dir) + try: + zarr.consolidate_metadata(zarr_fp) + except Exception: + pass + + with _open(fp, "wb") as f: pickle.dump(var, f, protocol=4) + def write_zarr( + self, + data_tree: xr.DataTree, + filename: str = "", + filesuffix: str = "", + overwrite: bool = False, + zarr_format: int = 2, + encoding: dict = None, + ) -> None: + """Write a datatree to Zarr file on disk. + + `read_store()` is set up to use the default filename of + `data_store` and an empty suffix. Data written to a different + location, may not be accessible. + + Parameters + ---------- + data_tree : xarray.DataTree + The datatree to write to Zarr. This must be Zarr-compatible, + so it cannot contain arbitrary Python objects. Use a helper + function or `write_store` instead. + filename : str, default empty + Name of a data store, which must be in cfg.BASENAMES. Note + that OGGM will expect data to be stored in `data_store`. + filesuffix : str, optional + Append a suffix to the filename. + overwrite : bool, default True + Whether to overwrite existing Zarr contents in the target + location. + zarr_format : int, default 2 + Zarr format version to use (2 or 3). + encoding : dict, optional + A dictionary specifying encoding options for the Zarr output. + """ + + fp = self.get_filepath("data_store").replace(".pkl", ".zarr") + + if not fp.endswith(".zarr"): + fp = f"{fp}.zarr" + + if overwrite: + data_tree.to_zarr( + fp, + mode="w", + consolidated=True, + zarr_format=zarr_format, + encoding=encoding, + ) + else: + """ + This writes each node's dataset independently to avoid + xarray's cross-group alignment validation (different groups + with the same dimension names but different sizes). + """ + + for node in data_tree.subtree: + node_ds = node.ds + if node_ds is None: + continue + if len(node_ds.data_vars) == 0 and len(node_ds.coords) == 0: + continue + node_path = node.path.lstrip("/") or None + node_ds.to_zarr( + fp, + group=node_path, + mode="a", + zarr_format=zarr_format, + encoding=encoding, + consolidated=False, + ) + zarr.consolidate_metadata(fp) + + def write_store( + self, + data, + filename: str = "", + filesuffix: str = "", + use_pickle: bool = False, + **kwargs, + ): + """Writes data to disk. + + Parameters + ---------- + data : xr.DataTree | object + Data or variable to write to disk + filename : str + File name (must be listed in cfg.BASENAME) + filesuffix : str + Append a suffix to the filename (useful for experiments). + use_pickle : bool, default True + Whether to use pickle for storage. If False, attempts to + write to zarr, and falls back to pickle if this fails. + **kwargs + Additional keyword arguments to pass either to + ``write_zarr()`` or ``write_pickle()``. + """ + + if not use_pickle: + try: + group = "_".join(filter(None, (filename, filesuffix))) + # Save original data in case of fallback to pickle + original_data = data + if not isinstance(data, xr.DataTree): + # Distinguish between supported and unsupported list types + if ( + isinstance(data, list) + and data + and not isinstance(data[0], dict) + ): + from oggm import Centerline + from oggm.core.flowline import MixedBedFlowline + _supported = (Centerline, MixedBedFlowline) + _supported_groups = ( + "inversion_flowlines", + "model_flowlines", + "centerlines", + ) + is_supported_flowline_list = all( + isinstance(item, _supported) for item in data + ) and any(kw in group for kw in _supported_groups) + if not is_supported_flowline_list: + raise NotImplementedError( + f"Cannot auto-convert list of " + f"{type(data[0]).__name__} to zarr. " + "Falling back to pickle." + ) + data = geozarr.convert_pickles_to_datatree( + {f"{group}": data} + ) + + zarr_fp = self.get_filepath("data_store").replace( + ".pkl", ".zarr" + ) + group_dir = os.path.join(zarr_fp, group) + """Use shutil to avoid creating mixed-format metadata. + (zarr.open_group defaults to v3 which adds a zarr.json + alongside the v2 .zgroup file). + """ + if os.path.exists(group_dir): + shutil.rmtree(group_dir) + self.write_zarr( + data_tree=data, + filename=filename, + filesuffix=filesuffix, + overwrite=False, + **kwargs, + ) + except Exception as e: + """Deal with failed writes. + Clean up interrupted group creation and prevent + corruption from subsequent writes. + """ + if "group_dir" in locals() and os.path.exists(group_dir): + shutil.rmtree(group_dir) + # Re-consolidate metadata so valid groups already in + # store stay readable after failed write. + if "zarr_fp" in locals() and os.path.exists(zarr_fp): + try: + zarr.consolidate_metadata(zarr_fp) + except Exception: + pass + warnings.warn( + f"{e} Failed to write zarr store, falling back to pickle.", + RuntimeWarning, + ) + # Use original_data so pickle doesn't contain a DataTree + self.write_pickle( + var=original_data, filename=filename, filesuffix=filesuffix + ) + else: + self.write_pickle( + var=data, filename=filename, filesuffix=filesuffix, **kwargs + ) + def read_json(self, filename, filesuffix='', allow_empty=False): """Reads a JSON file located in the directory. @@ -3628,7 +4071,7 @@ def get_inversion_flowline_hw(self): h = np.array([]) w = np.array([]) - fls = self.read_pickle('inversion_flowlines') + fls = self.read_store('inversion_flowlines') for fl in fls: w = np.append(w, fl.widths) h = np.append(h, fl.surface_h) @@ -4000,6 +4443,12 @@ def copy_to_basedir(gdir, base_dir=None, setup='run'): paths = ('*' + p + '*' for p in paths) shutil.copytree(gdir.dir, new_dir, ignore=include_patterns(*paths)) + """ + include_patterns never ignores directories, so data_store.zarr + is copied without internal files, resulting in empty store. + Remove it so next tasks can create a fresh store. + """ + _remove_zarr_store(new_dir, "data_store.zarr") elif setup == 'inversion': paths = ['inversion_params', 'downstream_line', 'outlines', 'inversion_flowlines', 'glacier_grid', 'diagnostics', @@ -4008,6 +4457,7 @@ def copy_to_basedir(gdir, base_dir=None, setup='run'): paths = ('*' + p + '*' for p in paths) shutil.copytree(gdir.dir, new_dir, ignore=include_patterns(*paths)) + _replace_zarr_store(gdir.dir, new_dir, "data_store.zarr") elif setup == 'run/spinup': paths = ['model_flowlines', 'inversion_params', 'outlines', 'mb_calib', 'climate_historical', 'glacier_grid', @@ -4016,6 +4466,7 @@ def copy_to_basedir(gdir, base_dir=None, setup='run'): paths = ('*' + p + '*' for p in paths) shutil.copytree(gdir.dir, new_dir, ignore=include_patterns(*paths)) + _replace_zarr_store(gdir.dir, new_dir, "data_store.zarr") elif setup == 'all': shutil.copytree(gdir.dir, new_dir) else: @@ -4023,6 +4474,51 @@ def copy_to_basedir(gdir, base_dir=None, setup='run'): return GlacierDirectory(gdir.rgi_id, base_dir=base_dir) +def _remove_zarr_store(new_dir: str, store: str = "data_store.zarr") -> str: + """Remove a zarr store if it exists. + + Parameters + ---------- + new_dir : str + The directory where the zarr store is located. + store : str, default data_store.zarr + The name of the zarr store to remove. + + Returns + ------- + str + The path to the zarr store used for new writes. + """ + zarr_store = os.path.join(new_dir, store) + if os.path.exists(zarr_store): + shutil.rmtree(zarr_store) + return zarr_store + + +def _replace_zarr_store(src_dir: str, new_dir: str, + store: str = "data_store.zarr") -> None: + """Replace an incomplete zarr store copy with a full copy from source. + + ``include_patterns`` in ``copy_to_basedir`` copies the zarr store + directory hierarchy but not the data chunk files inside it (because + chunk files don't match the include patterns). This function removes + that empty shell and copies the full store from *src_dir*. + + Parameters + ---------- + src_dir : str + The source glacier directory containing the zarr store. + new_dir : str + The destination glacier directory where the zarr store was copied. + store : str, default "data_store.zarr" + Name of the zarr store subdirectory. + """ + dst_zarr = _remove_zarr_store(new_dir, store) + src_zarr = os.path.join(src_dir, store) + if os.path.exists(src_zarr): + shutil.copytree(src_zarr, dst_zarr) + + def initialize_merged_gdir(main, tribs=[], glcdf=None, filename='climate_historical', input_filesuffix='', @@ -4062,7 +4558,7 @@ def initialize_merged_gdir(main, tribs=[], glcdf=None, tribs = tolist(tribs) # read flowlines of the Main glacier - mfls = main.read_pickle('model_flowlines') + mfls = main.read_store('model_flowlines') # ------------------------------ # 0. create the new GlacierDirectory from main glaciers GeoDataFrame @@ -4076,7 +4572,7 @@ def initialize_merged_gdir(main, tribs=[], glcdf=None, # add tributary geometries to maindf merged_geometry = maindf.loc[idx, 'geometry'].iloc[0].buffer(0) for trib in tribs: - geom = trib.read_pickle('geometries')['polygon_hr'] + geom = trib.read_store('geometries')['polygon_hr'] geom = salem.transform_geometry(geom, crs=trib.grid) merged_geometry = merged_geometry.union(geom).buffer(0) @@ -4161,7 +4657,7 @@ def initialize_merged_gdir(main, tribs=[], glcdf=None, mfls[nr] = fl # Write the reprojecflowlines - merged.write_pickle(mfls, 'model_flowlines') + merged.write_store(mfls, 'model_flowlines') return merged diff --git a/oggm/utils/geozarr.py b/oggm/utils/geozarr.py new file mode 100644 index 000000000..3e8697402 --- /dev/null +++ b/oggm/utils/geozarr.py @@ -0,0 +1,688 @@ +"""Zarr utilities for OGGM.""" + +import xarray as xr +from pathlib import Path +import numpy as np +import os +import shapely + +import pyproj +from salem import Grid, wgs84 +from functools import partial +from typing import Callable, Any + + +def get_pickle_paths(directory: str | Path) -> list[Path]: + """Get file paths for all available pickles in a directory. + + Parameters + ---------- + directory : str or Path + Path to the directory containing the pickles. + + Returns + ------- + list[Path] + A list of file paths to all the pickles in the directory. + """ + + return [Path(f) for f in os.listdir(directory) if f[-4:] == ".pkl"] + + +def get_tranche(data: dict, type_only: bool = False) -> dict: + """Extract a tranche of data from a dictionary. + + Parameters + ---------- + data : dict + The input dictionary to extract the tranche from. + type_only : bool, default True + If True, only the types of the data will be extracted. If False, + the actual data will be extracted. + """ + tranche = {} + for k, v in data.items(): + if not type_only: + tranche[k] = v + else: + tranche[k] = type(v) + return tranche + + +def filter_arrays_from_dict(x: dict) -> dict: + """Get all numpy array-type items from a dictionary.""" + return {k: v for k, v in x.items() if isinstance(v, np.ndarray)} + + +def filter_lists_from_dict(x: dict) -> dict: + """Filter list-type items from a dictionary.""" + return {k: v for k, v in x.items() if isinstance(v, list)} + + +def get_pickle_data(pickle_files: list[Path], gdir, type_only: bool = False): + """Read pickle files and extract their data into a dictionary. + + Parameters + ---------- + pickle_files : list[Path] + Paths to pickle files. + gdir : oggm.GlacierDirectory + GlacierDirectory object from which the pickles are read. + type_only : bool, default False + If True, only the types of the data will be extracted. If False, + the actual data will be extracted. + + Returns + ------- + dict + A dictionary with the pickle base names as keys and the + extracted data as values, or their types if `type_only` is True. + """ + pickle_data = {} + for pickle in pickle_files: + try: + stem = gdir.read_pickle(pickle.stem) + if isinstance(stem, list): + slices = [] + for i in stem: + if isinstance(i, dict): + slices.append(get_tranche(i, type_only=type_only)) + else: + slices.append(type(i)) + pickle_data[pickle.stem] = slices + elif isinstance(stem, dict): + pickle_data[pickle.stem] = get_tranche( + stem, type_only=type_only + ) + else: + print(f"Pickle {pickle.stem} not parseable.") + except Exception as e: + print(e) + print( + f"Pickle {pickle.stem} of type {type(pickle.stem)} not parseable." + ) + + return pickle_data + + +"""Convert data into zarr-compatible structures.""" + + +def _validate_linestring( + line: xr.DataArray | shapely.LineString, +) -> shapely.LineString | None: + """Coerce an object into a LineString.""" + if not isinstance(line, shapely.LineString) and (line is not None): + line = shapely.LineString(line) + return line + + +def _validate_polygon( + polygon: xr.DataArray | shapely.Polygon, +) -> shapely.Polygon | None: + """Coerce an object into a LineString.""" + if not isinstance(polygon, shapely.Polygon) and (polygon is not None): + polygon = shapely.Polygon(polygon) + return polygon + + +def _validate_point( + point: xr.DataArray | np.ndarray | shapely.Point | None, +) -> shapely.Point | None: + """Coerce a stored DataArray/ndarray back to a shapely Point.""" + if point is None: + return None + if isinstance(point, (xr.DataArray, np.ndarray)): + coords = np.asarray(point).flatten() + return shapely.geometry.Point(coords) + return point + + +def get_datatree_value( + data_tree: xr.DataTree, attribute: str +) -> xr.DataArray | None: + if hasattr(data_tree, attribute): + if isinstance(getattr(data_tree, attribute), xr.DataTree): + if getattr(data_tree, attribute).is_empty: + return None + return getattr(data_tree, attribute).values.copy() + return None + + +def get_flowline_from_datatree(data_tree: xr.DataTree): + """Reconstruct a Flowline object from a DataTree.""" + from oggm.core.flowline import MixedBedFlowline + + flowline = MixedBedFlowline( + line=_validate_linestring(get_datatree_value(data_tree, "line")), + dx=get_datatree_value(data_tree, "dx"), + map_dx=get_datatree_value(data_tree, "map_dx"), + surface_h=get_datatree_value(data_tree, "surface_h"), + bed_h=get_datatree_value(data_tree, "bed_h"), + section=get_datatree_value(data_tree, "section"), + bed_shape=get_datatree_value(data_tree, "bed_shape"), + is_trapezoid=get_datatree_value(data_tree, "is_trapezoid"), + lambdas=get_datatree_value(data_tree, "lambdas"), + widths_m=get_datatree_value(data_tree, "widths_m"), + rgi_id=get_datatree_value(data_tree, "rgi_id"), + water_level=get_datatree_value(data_tree, "water_level"), + gdir=get_datatree_value(data_tree, "gdir"), + ) + for attribute in [ + "order", + "_sqrt_bed", + "_w0_m", + ]: + setattr(flowline, attribute, get_datatree_value(data_tree, attribute)) + + # reconstruct Grid partial + map_trafo = get_map_trafo_from_grid(data_tree) + setattr(flowline, "map_trafo", map_trafo) + return flowline + + +def get_centerline_from_datatree(data_tree: xr.DataTree): + """Reconstruct a Centerline object from a DataTree. + + Note that it is not possible to reconstruct a Centerline with all + the same attributes as the original, because some of these cannot be + passed to the constructor. + """ + from oggm import Centerline + + centerline = Centerline( + line=_validate_linestring(get_datatree_value(data_tree, "line")), + dx=np.array(get_datatree_value(data_tree, "dx")), + surface_h=get_datatree_value(data_tree, "surface_h"), + orig_head=_validate_point(get_datatree_value(data_tree, "orig_head")), + rgi_id=get_datatree_value(data_tree, "rgi_id"), + map_dx=get_datatree_value(data_tree, "map_dx"), + ) + for attribute in [ + "order", + "_widths", + "is_rectangular", + "is_trapezoid", + "apparent_mb", + "flux", + "flux_out", + ]: + val = get_datatree_value(data_tree, attribute) + setattr(centerline, attribute, val) + + # For widths from (N,2,2) array to MultiLineString + # Matches original structure by converting NaN row to emtpy MLS + gw_data = get_datatree_value(data_tree, "geometrical_widths") + if gw_data is not None: + gw_array = np.array(gw_data) + widths = [] + for i in range(len(gw_array)): + if np.any(np.isnan(gw_array[i])): + widths.append(shapely.geometry.MultiLineString()) + else: + widths.append( + shapely.geometry.MultiLineString( + [shapely.geometry.LineString(gw_array[i])] + ) + ) + centerline.geometrical_widths = widths + + return centerline + + +def restore_projection(root: xr.DataTree) -> None: + if "pyproj_srs" in root.attrs: + if isinstance(root.attrs["pyproj_srs"], dict): + crs = pyproj.CRS.from_json_dict(root.attrs["pyproj_srs"]) + root.attrs["pyproj_srs"] = pyproj.Proj(crs) + + +def get_grid_params_from_partial(p: Callable) -> dict: + # TODO: Convert from glacier_grid instead of partial. + grid = p.func.__self__ + grid_parameters = { + "pyproj_srs": grid.proj.crs.to_json_dict(), + "nxny": (grid.nx, grid.ny), + "dxdy": (grid.dx, grid.dy), + "x0y0": (grid.x0, grid.y0), + "pixel_ref": grid.pixel_ref, + } + + return grid_parameters + + +def get_map_trafo_from_grid(data_tree: xr.DataTree) -> Callable: + # TODO: Move to change flowline object instead to take a Grid object instead of a gdir. + map_trafo = Grid( + proj=data_tree.attrs["pyproj_srs"], + nxny=(data_tree.attrs["nxny"]), + dxdy=(data_tree.attrs["dxdy"]), + x0y0=(data_tree.attrs["x0y0"]), + pixel_ref=data_tree.attrs["pixel_ref"], + ) + return partial(map_trafo.ij_to_crs, crs=wgs84) + + +def convert_linestring_to_dataarray(line: shapely.LineString) -> xr.DataArray: + """Convert a shapely LineString to a DataArray of coordinates.""" + if not isinstance(line, shapely.LineString): + return line + else: + return xr.DataArray( + np.array(shapely.geometry.mapping(line)["coordinates"]), + dims=["x", "y"], + ) + + +def convert_point_to_dataarray( + point: shapely.Point | None, +) -> xr.DataArray | None: + """Convert a shapely Point to a 1-D DataArray of coordinates.""" + if point is None: + return None + if not isinstance(point, shapely.Point): + return point + coords = np.array(shapely.get_coordinates(point)).flatten() + return xr.DataArray(coords, dims=["xy"]) + + +def convert_polygon_to_dataarray( + polygon: shapely.Polygon, +) -> xr.DataArray | shapely.Polygon: + """Convert a shapely Polygon to a DataArray of coordinates. + + WARNING: This function currently only supports simple Polygons + without holes. This means Polygons cannot be correctly reconstructed. + """ + if not isinstance(polygon, shapely.Polygon): + return polygon + elif not polygon.geom_type == "MultiPolygon": + raise NotImplementedError("MultiPolygons are not supported.") + else: + exterior_coords = shapely.geometry.mapping(polygon)["coordinates"][0] + try: + interior_coords = [] + for interior in polygon.interiors: + interior_coords += [interior.coords[:]] + raise NotImplementedError("Polygons with holes are not supported.") + except NotImplementedError as e: + print( + "Warning: Currently polygons with holes cannot be reconstructed." + ) + return xr.DataArray.from_dict( + np.array(exterior_coords), + dims=["x", "y"], + ) + + +def get_dict_from_datatree(data_tree: xr.DataTree) -> dict: + """Convert a DataTree back into a dictionary. + + This will flatten a datatree such that all coordinates and data + variables match what would be expected in the original pickles. + """ + data = {} + for coord in data_tree.coords: + data[coord] = data_tree.coords[coord].values.copy() + for var in data_tree.data_vars: + data[var] = data_tree[var].values.copy() + for name, child in data_tree.children.items(): + if isinstance(child, xr.DataTree): + if child.is_empty: + data[name] = None + else: + data[name] = get_dict_from_datatree(child) + else: + data[name] = child + return data + + +def get_downstream_line_from_pkl(pickle: dict) -> dict: + """Convert ``downstream_line`` pickle into zarr-compatible structure. + + Parameters + ---------- + pickle : dict + Data loaded directly from the ``downstream_line`` pickle. + + Returns + ------- + dict + The same items as the input, but with the ``downstream_line`` + key converted to a DataArray of coordinates if it was originally + a LineString. + """ + + try: + assert isinstance(pickle, dict) + downstream_line = pickle["downstream_line"] + except AssertionError: + raise TypeError( + "Input data must be a dictionary." + "Ensure you are loading from a pickle" + ) + except KeyError: + raise KeyError( + "The pickle must contain a 'downstream_line' key." + "Check the contents of the pickle." + ) + # Work on a shallow copy so the original dict is not mutated. If we + # modified the caller's dict in-place the original_data reference in + # write_store would also be changed, causing the pickle fallback to + # store a DataArray instead of the original shapely geometry. + pickle = dict(pickle) + coordinates = convert_linestring_to_dataarray(downstream_line) + pickle["downstream_line"] = coordinates + + return pickle + + +def get_model_flowlines_from_pkl(pickle: list) -> list: + """Convert ``model_flowlines`` pickle into zarr-compatible structure. + + Note that ``map_trafo`` is a partial and cannot be directly + serialised to zarr; it is stored separately as group attrs. + ``gdir`` is a GlacierDirectory and cannot be serialised either; + it is omitted (``map_trafo`` is reconstructed from the stored grid + params instead). + + Parameters + ---------- + pickle : list + Data loaded directly from the ``model_flowlines`` pickle. + + Returns + ------- + list[dict] + One dict per flowline with the attributes needed to reconstruct + the original :py:class:`oggm.core.flowline.MixedBedFlowline`. + """ + from oggm.core.flowline import MixedBedFlowline + + new_pickle = [] + if not isinstance(pickle, list) or not pickle: + return new_pickle + try: + assert all(isinstance(fl, MixedBedFlowline) for fl in pickle) + fl_id_to_idx = {id(fl): i for i, fl in enumerate(pickle)} + for fl in pickle: + data = { + "line": convert_linestring_to_dataarray( + getattr(fl, "line", None) + ), + "dx": getattr(fl, "dx", None), + "map_dx": getattr(fl, "map_dx", None), + "surface_h": getattr(fl, "surface_h", None), + "bed_h": getattr(fl, "bed_h", None), + "section": getattr(fl, "section", None), + "bed_shape": getattr(fl, "bed_shape", None), + "is_trapezoid": getattr(fl, "is_trapezoid", None), + "widths_m": getattr(fl, "widths_m", None), + "rgi_id": getattr(fl, "rgi_id", None), + "water_level": getattr(fl, "water_level", None), + "order": getattr(fl, "order", None), + "map_trafo": getattr(fl, "map_trafo", None), + "_sqrt_bed": getattr(fl, "_sqrt_bed", None), + "_w0_m": getattr(fl, "_w0_m", None), + } + lambdas = getattr(fl, "lambdas", None) + data["lambdas"] = ( + getattr(fl, "_lambdas", None) if lambdas is None else lambdas + ) + # Store the index of the flowline this one flows into (-1 = None). + flows_to = getattr(fl, "flows_to", None) + data["_flows_to_list_idx"] = np.int64( + fl_id_to_idx.get(id(flows_to), -1) + if flows_to is not None + else -1 + ) + # Drop non-serialisable and None values so xarray can infer dtypes + data = { + k: v + for k, v in data.items() + if v is not None and k != "map_trafo" + } + new_pickle.append(data) + except AssertionError: + raise TypeError( + "All items in the pickle list must be MixedBedFlowline instances." + ) + + return new_pickle + + +def get_inversion_flowlines_from_pkl(pickle: list) -> list[dict]: + """Convert ``inversion_flowlines`` (or ``centerlines``) pickle into + zarr-compatible structure. + + Parameters + ---------- + pickle : list + Data loaded directly from the pickle – a list of + :py:class:`oggm.Centerline` objects. + + Returns + ------- + list[dict] + One dict per flowline with the attributes needed to reconstruct + the original Centerline objects. + """ + from oggm import Centerline + + new_pickle = [] + if not isinstance(pickle, list) or not pickle: + return new_pickle + try: + assert all(isinstance(fl, Centerline) for fl in pickle) + # Build an id-to-index map so we can store the flows_to list index + fl_id_to_idx = {id(fl): i for i, fl in enumerate(pickle)} + for fl in pickle: + data = { + "line": convert_linestring_to_dataarray(fl.line), + "dx": fl.dx, + "surface_h": fl.surface_h, + # convert orig_head from shapely Point to DataArray + "orig_head": convert_point_to_dataarray( + getattr(fl, "orig_head", None) + ), + "rgi_id": getattr(fl, "rgi_id", None), + "map_dx": getattr(fl, "map_dx", None), + } + # These cannot be passed via Centerline.__init__ + for attribute in [ + "order", + "_widths", + "is_rectangular", + "is_trapezoid", + "apparent_mb", + "flux", + "flux_out", + ]: + data[attribute] = getattr(fl, attribute, None) + # Store index of flowline it flows into (-1 = None) + # Reconstruct flows_to connections after deserialisation. + flows_to = getattr(fl, "flows_to", None) + data["_flows_to_list_idx"] = np.int64( + fl_id_to_idx.get(id(flows_to), -1) + if flows_to is not None + else -1 + ) + gw = getattr(fl, "geometrical_widths", None) + if gw is not None: + nan_row = np.full((2, 2), np.nan, dtype=np.float64) + gw_rows = [] + for w in gw: + if w is None or w.is_empty: + gw_rows.append(nan_row) + elif hasattr(w, "geoms"): + # MultiLineString: take the first (only) member + sub = list(w.geoms) + if sub: + gw_rows.append( + np.array(list(sub[0].coords), dtype=np.float64) + ) + else: + gw_rows.append(nan_row) + else: + # Plain LineString + gw_rows.append( + np.array(list(w.coords), dtype=np.float64) + ) + data["geometrical_widths"] = xr.DataArray( + np.array(gw_rows, dtype=np.float64), + dims=["width_idx", "vertex", "coord"], + ) + # so xarray can infer dtypes for each variable + data = {k: v for k, v in data.items() if v is not None} + new_pickle.append(data) + except AssertionError: + raise TypeError( + "All items in the pickle list must be Centerline instances." + ) + + return new_pickle + + +def get_centerlines_from_pkl(pickle: list) -> list[dict]: + """Convert a ``centerlines`` pickle to zarr-compatible structure. + + Reuses the same format as ``inversion_flowlines`` since both store + lists of :py:class:`oggm.Centerline` objects. + """ + return get_inversion_flowlines_from_pkl(pickle) + + +def convert_pickles_to_datatree(pickle_data: dict) -> xr.DataTree: + """Convert a dictionary of pickles into an xarray DataTree.""" + data_tree = xr.DataTree() + for name, pickle in pickle_data.items(): + try: + # These are the pickles that require special handling. + if "downstream_line" in name: + data = get_downstream_line_from_pkl(pickle) + data_tree = add_datacube( + data_tree=data_tree, + datacubes=data, + datacube_name=name, + overwrite=True, + ) + continue + + # Centerline lists: centerlines, inversion_flowlines + if "inversion_flowlines" in name or ( + "centerlines" in name and "inversion" not in name + ): + dicts = ( + get_inversion_flowlines_from_pkl(pickle) + if "inversion_flowlines" in name + else get_centerlines_from_pkl(pickle) + ) + sub_tree = xr.DataTree() + for i, d in enumerate(dicts): + sub_tree = add_datacube( + data_tree=sub_tree, + datacubes=d, + datacube_name=str(i), + overwrite=True, + ) + data_tree[name] = sub_tree + continue + + if "model_flowlines" in name: + dicts = get_model_flowlines_from_pkl(pickle) + sub_tree = xr.DataTree() + for i, d in enumerate(dicts): + # map_trafo already filtered out + # retrieve from original flowline to get grid params. + from oggm.core.flowline import MixedBedFlowline + if isinstance(pickle, list) and i < len(pickle) and isinstance(pickle[i], MixedBedFlowline): + map_trafo = getattr(pickle[i], "map_trafo", None) + else: + map_trafo = None + sub_tree = add_datacube( + data_tree=sub_tree, + datacubes=d, + datacube_name=str(i), + overwrite=True, + ) + if map_trafo is not None: + grid_params = get_grid_params_from_partial(map_trafo) + sub_tree[str(i)].attrs.update(grid_params) + data_tree[name] = sub_tree + continue + + # Fallback for implicitly supported pickles + if isinstance(pickle, list) and all( + isinstance(item, dict) for item in pickle + ): + # List of dicts (e.g. inversion_input/output with multiple + # flowlines): store each dict as a numbered child group. + sub_tree = xr.DataTree() + for i, item in enumerate(pickle): + sub_tree = add_datacube( + data_tree=sub_tree, + datacubes=item, + datacube_name=str(i), + overwrite=True, + ) + data_tree[name] = sub_tree + continue + + if isinstance(pickle, list): + data = pickle[0] + elif isinstance(pickle, dict): + data = pickle + else: + raise NotImplementedError + + if isinstance(data, dict): + data_tree = add_datacube( + data_tree=data_tree, + datacubes=data, + datacube_name=name, + overwrite=True, + ) + # if "model_flowlines" in name: + # data_tree.model_flowlines.attrs = data_tree.attrs + except NotImplementedError as e: + print(f"Pickle '{name}' is unsupported and was skipped: {e}") + + return data_tree + + +def add_datacube( + data_tree: xr.DataTree, + datacubes: dict, + datacube_name: str, + overwrite: bool = False, +) -> xr.DataTree: + """Add a new dataset as a child group of the DataTree at the root. + + .. note:: The arguments should match those in ``dtcg.GeoZarrHandler. + + Parameters + ---------- + datacubes : dict + The dataset to be added. + datacube_name : str + Layer name to be used for this node of the tree. + overwrite : bool + If True, allow a layer of the same name to be overwritten. + + Returns + ------- + xr.DataTree + The updated DataTree with the new datasets. + """ + + if datacube_name in data_tree.children and not overwrite: + raise ValueError(f"Group '{datacube_name}' already exists.") + + if not isinstance(datacubes, dict): + raise ValueError(f"Datacubes need to be provided within a dictionary.") + + data_tree[datacube_name] = xr.DataTree.from_dict( + name=datacube_name, data=datacubes + ) + + return data_tree diff --git a/oggm/workflow.py b/oggm/workflow.py index 74a19efd6..42ed677f5 100644 --- a/oggm/workflow.py +++ b/oggm/workflow.py @@ -28,6 +28,17 @@ # Module logger log = logging.getLogger(__name__) +""" +For dealing with multiprocessing with fork. Child processes must reset +zarr's async globals so they don't inherit stale references that cause +deadlocks when starting new I/O threads. +""" +try: + from zarr.core.sync import reset_resources_after_fork as _zarr_reset + os.register_at_fork(after_in_child=_zarr_reset) +except (ImportError, AttributeError): + pass + # Multiprocessing Pool _mp_manager = None _mp_pool = None @@ -47,6 +58,7 @@ def init_mp_pool(reset=False): cfg.CONFIG_MODIFIED = False if _mp_pool: _mp_pool.terminate() + _mp_pool.join() # wait for workers to exit before shutting down manager _mp_pool = None if _mp_manager: cfg.set_manager(None) @@ -116,11 +128,36 @@ def reset_multiprocessing(): Call this if you changed configuration parameters mid-run and need them to be re-propagated to child processes. """ - global _mp_pool + global _mp_pool, _mp_manager if _mp_pool: _mp_pool.terminate() + _mp_pool.join() # wait for workers to fully exit _mp_pool = None + if _mp_manager: + # next test should start with clean state + cfg.set_manager(None) + _mp_manager.shutdown() + _mp_manager = None cfg.CONFIG_MODIFIED = False + # Flush zarr's background async I/O threads so they don't deadlock + # child processes after fork + try: + import zarr.core.sync as _zs + + _iothread = _zs.iothread[0] # save ref before cleanup clears it + _loop = _zs.loop[0] + if _loop is not None and not _loop.is_closed(): + _exc = getattr(_loop, "_default_executor", None) + if _exc is not None: + _exc.shutdown(wait=True) + _loop._default_executor = None + _zs.cleanup_resources() + # cleanup_resources() waits only 0.2 s for iothread; ensure it is + # truly dead before any subsequent fork (Pool/Manager creation). + if _iothread is not None and _iothread.is_alive(): + _iothread.join(timeout=5.0) # TODO: Play around with timeout + except Exception: + pass def execute_entity_task(task, gdirs, **kwargs): diff --git a/pyproject.toml b/pyproject.toml index c84cc1215..0642a4f1f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,8 @@ dependencies = [ "scipy", "shapely", "xarray", + "xvec>=0.5.2", + "zarr", ] [project.optional-dependencies]