Source code for huracanpy.plot._venn

"""Venn diagrams for tracks matching visualisation"""

from matplotlib_venn import venn2, venn2_circles, venn3, venn3_circles
import numpy as np


[docs] def venn(datasets, match, labels, colors=None, circle_color="k"): """ Plot venn diagram to compare the datasets. Parameters ---------- datasets : list of xr.dataset list of the datasets compared. match : pandas.DataFrame match dataframe issued from :func:`huracanpy.assess.match`. labels : list of str labels of the datasets. colors : list of str, optional list of colors to be used for each dataset. The default is None. circle_color : str, optional color of the overlaid circles. The default is "k". Raises ------ NotImplementedError If more than three or less than two datasets are given. Returns ------- None. """ if len(datasets) == 2: f = _venn_2datasets elif len(datasets) == 3: f = _venn_3datasets else: raise NotImplementedError( "We cannot plot Venn diagrams for more than 3 datasets." ) if len(datasets) != len(labels): raise ValueError("datasets and labels must have the same length") if colors is None: colors = ["w"] * len(datasets) else: if len(colors) != len(datasets): raise ValueError("datasets and colors must have the same length") f(*datasets, match, colors, labels, circle_color)
def _venn_2datasets(data1, data2, match, colors, labels=None, circle_color="k"): n1 = len(np.unique(data1.track_id.values)) # Number of tracks in dataset 1 n2 = len(np.unique(data2.track_id.values)) # Number of tracks in dataset 2 m = len(match) # Number of tracks matching venn2((n1 - m, n2 - m, m), set_colors=colors, set_labels=labels) venn2_circles((n1 - m, n2 - m, m), color=circle_color) def _venn_3datasets( data1, data2, data3, matches, colors, labels=None, circle_color="k" ): n1 = len(np.unique(data1.track_id.values)) # Number of tracks in dataset 1 n2 = len(np.unique(data2.track_id.values)) # Number of tracks in dataset 2 n3 = len(np.unique(data3.track_id.values)) # Number of tracks in dataset 3 m_not1 = len(matches[matches.iloc[:, 0].isna()]) m_not2 = len(matches[matches.iloc[:, 1].isna()]) m_not3 = len(matches[matches.iloc[:, 2].isna()]) m_all = len(matches[matches.isna().sum(axis=1) == 0]) subsets = ( (n1 - m_all - m_not2 - m_not3), (n2 - m_all - m_not1 - m_not3), m_not3, (n3 - m_all - m_not1 - m_not2), m_not2, m_not1, m_all, ) venn3( subsets, set_labels=labels, set_colors=colors, ) venn3_circles(subsets, color=circle_color)