Source code for huracanpy.calc._track_stats

"""
Module containing functions to compute track statistics
"""

import numpy as np
import pandas as pd



[docs]
def track_duration(time, track_ids):
    """
    Compute the duration of each track

    Parameters
    ----------
    time : xarray.DataArray
    track_ids : array_like

    Returns
    -------
    xarray.DataArray
        Duration of each track (in hours)

    """
    duration = (
        time.groupby(track_ids).map(lambda x: x.max() - x.min()).rename("duration")
    )
    duration = duration / np.timedelta64(1, "h")
    duration.attrs["units"] = "hours"
    return duration




[docs]
def gen_vals(tracks, time, track_id):
    """
    Shows the attributes for the genesis point of each track

    Parameters
    ----------
    tracks : xarray.Dataset
    time : array_like
    track_id : xarray.DataArray

    Returns
    -------
    xarray.Dataset
        Dataset containing only genesis points, with track_id as index.

    """
    # It is 470 times much faster to switch to a dataframe...
    # Use the sortby/groupby with pandas to find the relevant indices in the original
    # Dataset by passing an index (named idx to not clash with "index")
    df = pd.DataFrame(
        data=dict(
            idx=np.arange(len(track_id)),
            time=np.asarray(time),
            track_id=np.asarray(track_id),
        )
    )
    idx = np.asarray(df.sort_values("time").groupby("track_id").first().idx)

    # Could check that track_id is 1d, but the function would already have failed by now
    # if not
    dim = track_id.dims[0]
    tracks = tracks.isel(**{dim: idx})

    # Promote track_id to a coordinate and remove record
    return tracks.assign_coords(**{track_id.name: tracks[track_id.name]}).swap_dims(
        **{dim: track_id.name}
    )




[docs]
def apex_vals(tracks, variable, track_id, stat="max"):
    """
    Shows the attribute for the extremum point of each track

    Parameters
    ----------
    tracks : xarray.DataSet
    variable : array_like
        The extremum variable
    track_id : xarray.DataArray
    stat : str, optional
        Type of extremum. Can be "min" or "max". The default is "max".

    Raises
    ------
    NotImplementedError
        If another value than "min" and "max" is given to stat

    Returns
    -------
    xarray.Dataset
        Dataset containing only extremum points, with track_id as index.

    """

    # tracks will be sorted along var and then the first line of each track_id will be
    # used
    # asc determines whether the sorting must be ascending (True) or descending (False)
    if stat == "max":
        asc = False
    elif stat == "min":
        asc = True
    else:
        raise NotImplementedError("stat not recognized. Please use one of {min, max}")

    # It is 350 times much faster to switch to a dataframe.
    # Use the same trick as with gen_vals
    df = pd.DataFrame(
        data=dict(
            idx=np.arange(len(variable)),
            var=np.asarray(variable),
            track_id=np.asarray(track_id),
        )
    )
    idx = np.asarray(
        df.sort_values("var", ascending=asc).groupby("track_id").first().idx
    )

    dim = track_id.dims[0]
    tracks = tracks.isel(**{dim: idx})

    # Promote track_id to a coordinate and remove record
    return tracks.assign_coords(**{track_id.name: tracks[track_id.name]}).swap_dims(
        **{dim: track_id.name}
    )