Source code for huracanpy.calc._track_stats

"""
Module containing functions to compute track statistics
"""

import numpy as np
import pandas as pd


[docs] def track_duration(time, track_ids): """ Compute the duration of each track Parameters ---------- time : xarray.DataArray track_ids : array_like Returns ------- xarray.DataArray Duration of each track (in hours) """ duration = ( time.groupby(track_ids).map(lambda x: x.max() - x.min()).rename("duration") ) duration = duration / np.timedelta64(1, "h") duration.attrs["units"] = "hours" return duration
[docs] def gen_vals(tracks, time, track_id): """ Shows the attributes for the genesis point of each track Parameters ---------- tracks : xarray.Dataset time : array_like track_id : xarray.DataArray Returns ------- xarray.Dataset Dataset containing only genesis points, with track_id as index. """ # It is 470 times much faster to switch to a dataframe... # Use the sortby/groupby with pandas to find the relevant indices in the original # Dataset by passing an index (named idx to not clash with "index") df = pd.DataFrame( data=dict( idx=np.arange(len(track_id)), time=np.asarray(time), track_id=np.asarray(track_id), ) ) idx = np.asarray(df.sort_values("time").groupby("track_id").first().idx) # Could check that track_id is 1d, but the function would already have failed by now # if not dim = track_id.dims[0] tracks = tracks.isel(**{dim: idx}) # Promote track_id to a coordinate and remove record return tracks.assign_coords(**{track_id.name: tracks[track_id.name]}).swap_dims( **{dim: track_id.name} )
[docs] def apex_vals(tracks, variable, track_id, stat="max"): """ Shows the attribute for the extremum point of each track Parameters ---------- tracks : xarray.DataSet variable : array_like The extremum variable track_id : xarray.DataArray stat : str, optional Type of extremum. Can be "min" or "max". The default is "max". Raises ------ NotImplementedError If another value than "min" and "max" is given to stat Returns ------- xarray.Dataset Dataset containing only extremum points, with track_id as index. """ # tracks will be sorted along var and then the first line of each track_id will be # used # asc determines whether the sorting must be ascending (True) or descending (False) if stat == "max": asc = False elif stat == "min": asc = True else: raise NotImplementedError("stat not recognized. Please use one of {min, max}") # It is 350 times much faster to switch to a dataframe. # Use the same trick as with gen_vals df = pd.DataFrame( data=dict( idx=np.arange(len(variable)), var=np.asarray(variable), track_id=np.asarray(track_id), ) ) idx = np.asarray( df.sort_values("var", ascending=asc).groupby("track_id").first().idx ) dim = track_id.dims[0] tracks = tracks.isel(**{dim: idx}) # Promote track_id to a coordinate and remove record return tracks.assign_coords(**{track_id.name: tracks[track_id.name]}).swap_dims( **{dim: track_id.name} )