Project-MONAI · csudre · Apr 8, 2026 · Apr 8, 2026 · Apr 10, 2026 · Apr 13, 2026
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
@@ -9,7 +9,11 @@ name: Unit Tests
 
 on:
   push:
+<<<<<<< HEAD
+    branches: [ main, docs_test ]  # run when anything is pushed to these branches
+=======
     branches: [ main, docs_tests ]  # run when anything is pushed to these branches
+>>>>>>> 7bef02a4357737d41391ceef32945c656e64c08e
   pull_request:
     branches: [ main ]  # run for the code submitted as a PR to these branches
 

diff --git a/MetricsReloaded/metrics/calibration_measures.py b/MetricsReloaded/metrics/calibration_measures.py
@@ -51,6 +51,25 @@
 
 
 class CalibrationMeasures(object):
+    """
+    Class allowing the derivation of calibration measures given probability input:
+    The possible metrics are:
+
+    * expected calibration error (ece)
+    * Brier Score
+    * Root Brier score
+    * Logarithmic score
+    * Class wise expectation calibration error
+    * Kernel based ECE
+    * negative log likelihood
+
+    :param pred_proba: predicted probabilities
+    :param ref: reference
+    :param case: if required list of cases to consider
+    :param measures: list of measures to extract
+    :param empty: flag indicating whether there are empty references
+    :param dict_args: dictionary with additional arguments for the metrics if needed
+    """
     def __init__(
         self,
         pred_proba,
@@ -89,7 +108,7 @@ def class_wise_expectation_calibration_error(self):
 
         .. math::
 
-            cwECE = \dfrac{1}{K}\sum_{k=1}^{K}\sum_{i=1}^{N}\dfrac{\vert B_{i,k} \vert}{N} \left(y_{k}(B_{i,k}) - p_{k}(B_{i,k})\right)
+            cwECE = \\dfrac{1}{K}\sum_{k=1}^{K}\sum_{i=1}^{N}\\dfrac{\\vert B_{i,k} \\vert}{N} \\left(y_{k}(B_{i,k}) - p_{k}(B_{i,k})\\right)
 
         :return: cwece
         """
@@ -146,7 +165,7 @@ def expectation_calibration_error(self):
 
         .. math::
 
-            ECE = \sum_{m=1}^{M} \dfrac{|B_m|}{n}(\dfrac{1}{|B_m|}\sum_{i \in B_m}1(pred_ik==ref_ik)-\dfrac{1}{|B_m|}\sum_{i \in B_m}pred_i)
+            ECE = \sum_{m=1}^{M} \dfrac{|B_m|}{n}(\dfrac{1}{|B_m|}\sum_{i \in B_m}1(pred_{ik}==ref_{ik})-\dfrac{1}{|B_m|}\sum_{i \in B_m}pred_i)
 
         :return: ece
 
@@ -193,7 +212,7 @@ def maximum_calibration_error(self):
 
         .. math::
 
-            MCE = max(|\dfrac{1}{|B_m|}\sum_{i \in B_m}1(pred_ik==ref_ik)-\dfrac{1}{|B_m|}\sum_{i \in B_m}pred_i|)
+            MCE = max(|\dfrac{1}{|B_m|}\sum_{i \in B_m}1(pred_{ik}==ref_{ik})-\dfrac{1}{|B_m|}\sum_{i \in B_m}pred_i|)
 
         :return: mce
 
@@ -274,7 +293,7 @@ def logarithmic_score(self):
 
         .. math::
 
-            LS = 1/N\sum_{i=1}^{N}\log{pred_ik}ref_{ik}
+            LS = 1/N\sum_{i=1}^{N}\log{pred_{ik}}ref_{ik}
 
         :return: ls
         """
@@ -289,6 +308,9 @@ def distance_ij(self,i,j):
         """
         Determines the euclidean distance between two vectors of prediction for two samples i and j
 
+        :param i: index of first sample
+        :param j: index of second sample with which to calculate distance
+
         :return: distance
         """
         pred_i = self.pred[i,:]
@@ -299,7 +321,10 @@ def distance_ij(self,i,j):
 
     def kernel_calculation(self, i,j):
         """
-        Defines the kernel value for two samples i and j with the following definition for k(x_i,x_j)
+        Defines the kernel value for two samples i and j with the following definition for :math:`k(x_i,x_j)`
+
+        :param i: index of first sample
+        :param j: index of second sample
 
         .. math::
 
@@ -414,13 +439,16 @@ def gamma_ik(self, i, k):
         """
         Definition of gamma value for sample i class k of the predictions
 
+        :param i: index of the sample
+        :param k: index of the class
+
         .. math::
 
-            gamma_{ik} = \Gamma(pred_{ik}/h + 1)
+            \gamma_{ik} = \Gamma(pred_{ik}/h + 1)
 
         where h is the bandwidth value set as default to 0.5
 
-        :return gamma_ik
+        :return: gamma_ik
 
         """
         pred_ik = self.pred[i, k]
@@ -436,6 +464,9 @@ def dirichlet_kernel(self, j, i):
         """
         Calculation of Dirichlet kernel value for predictions of samples i and j
 
+        :param i: index of first sample to consider
+        :param j: index of second sample to consider
+
         .. math::
 
             k_{Dir}(x_j,x_i) = \dfrac{\Gamma(\sum_{k=1}^{K}\\alpha_{ik})}{\prod_{k=1}^{K}\\alpha_{ik}}\prod_{k=1}^{K}x_jk^{\\alpha_{ik}-1}
@@ -470,10 +501,10 @@ def negative_log_likelihood(self):
 
         .. math::
 
-            NLL = -\dfrac{1}{N}\sum_{i=1}^{N}\sum_{k=1}^{C} y_{ik} \dot log(p_{i,k})
+            NLL = -\dfrac{1}{N}\sum_{i=1}^{N}\sum_{k=1}^{C} y_{ik}\log(p_{i,k})
 
-        where :math: `y_{ik}` the outcome is 1 if the class of :math: `y_{i}` is k and :math: `p_{ik}` is the predicted 
-        probability for sample :math: `x_i` and class k
+        where :math:`y_{ik}` the outcome is 1 if the class of :math:`y_{i}` is k and :math:`p_{ik}` is the predicted 
+        probability for sample :math:`x_i` and class k
 
         :return: NLL
 
@@ -485,7 +516,11 @@ def negative_log_likelihood(self):
         return nll
 
     def to_dict_meas(self, fmt="{:.4f}"):
-        """Given the selected metrics provides a dictionary with relevant metrics"""
+        """
+        Given the selected metrics provides a dictionary with relevant metrics
+
+        :return: result_dict dictionary of results
+        """
         result_dict = {}
         for key in self.measures:
             result = self.measures_dict[key][0]()

diff --git a/MetricsReloaded/metrics/pairwise_measures.py b/MetricsReloaded/metrics/pairwise_measures.py
@@ -64,9 +64,19 @@
 class MultiClassPairwiseMeasures(object):
     """
 
-    Class dealing with measures of direct multi-class such as MCC, Cohen's kappa, Expected cost
-    or balanced accuracy
-
+    Class dealing with measures of direct multi-class. Included metrics are:
+
+    * Matthews Correlation Coefficient (MCC)
+    * Weithed Cohens kappa
+    * Balanced accuracy
+    * Expected Cost 
+    * Normalised expected cost
+
+    :param pred: Prediction
+    :param ref: Reference
+    :param list_values: List of label values to consider
+    :param measures: list of measures to extract
+    :param dict_args: dictionary of additional arguments for the metrics
 
     """
 
@@ -81,9 +91,26 @@ def __init__(self, pred, ref, list_values, measures=[], dict_args={}):
             "wck": (self.weighted_cohens_kappa, "WCK"),
             "ba": (self.balanced_accuracy, "BAcc"),
             "ec": (self.expected_cost, "EC"),
+            "nec": (self.normalised_expected_cost,"NEC"),
+
         }
 
     def expected_cost(self):
+        """
+        Calculates the expected cost defined as:
+
+        Luciana Ferrer - Analysis and comparison of classification metrics - https://arxiv.org/pdf/2209.05355
+
+        .. math::
+
+            EC = \sum_{r}\sum_p c_{rp} P_rD_{rp}
+
+        where :math: `c_{rp}` {is the cost of misclassifying class r as class p. :math: `P_r` is the probability of 
+        class r in the reference data, :math: `D_{rp}` is the fraction of samples of class r that are classified as 
+        class p
+
+
+        """
         cm = self.confusion_matrix()
         priors = np.sum(cm, 0) / np.sum(cm)
         numb_perc = np.sum(cm, 0)
@@ -100,6 +127,9 @@ def expected_cost(self):
         return ec
 
     def best_naive_ec(self):
+        """
+        Calculate the naive expected cost that can be used for normalisation purposes
+        """
         cm = self.confusion_matrix()
         priors = np.sum(cm, 0) / np.sum(cm)
         prior_matrix = np.tile(priors, [cm.shape[0], 1])
@@ -115,6 +145,9 @@ def best_naive_ec(self):
         return np.min(total_cost)
 
     def normalised_expected_cost(self):
+        """
+        Calculates the normalised expected cost as the ratio of the expected cost to the naive expected cost.
+        """
         naive_cost = self.best_naive_ec()
         ec = self.expected_cost()
         return ec / naive_cost
@@ -230,7 +263,11 @@ def weighted_cohens_kappa(self):
         return weighted_cohens_kappa
 
     def to_dict_meas(self, fmt="{:.4f}"):
-        """Given the selected metrics provides a dictionary with relevant metrics"""
+        """
+
+        Given the selected metrics provides a dictionary with relevant metrics
+
+        """
         result_dict = {}
         for key in self.measures:
             result = self.measures_dict[key][0]()
@@ -239,6 +276,43 @@ def to_dict_meas(self, fmt="{:.4f}"):
 
 
 class BinaryPairwiseMeasures(object):
+    """
+    Class allowing for the derivation of pairwise measures when using binary input, measures include:
+
+    * accuracy
+    * net benefit treated
+    * normalised expected cost
+    * balanced accuracy
+    * cohen's kappa
+    * positive likelihood ratio
+    * positive predictive value
+    * negative predictive value
+    * sensitivity
+    * specificity
+    * intersection over union
+    * youden index
+    * intersection over reference
+    * fbeta
+    * Dice score
+    * centreline Dice
+    * Matthew Correlation coefficient
+    * Average symmetric surface distance
+    * Mean Average surface distance
+    * Hausdorff distance
+    * Percentile of Hausdorff distance 
+    * Normalised surface distance
+    * boundary IoU
+    * absolute volume difference ratio
+
+    Input includes:
+    :param pred: Prediction
+    :param ref: Reference
+    :param measures: list of measures to extract
+    :param connectivity_type: Type of connectivity to use
+    :param pixdim: list of pixel dimensions
+    :param empty:
+    :param dict_args: Dictionary with additional arguments for the different metrics
+    """
-    Input includes:
-    :param pred: Prediction
-    :param ref: Reference
-    :param measures: list of measures to extract
-    :param connectivity_type: Type of connectivity to use
-    :param pixdim: list of pixel dimensions
-    :param empty:
-    :param dict_args: Dictionary with additional arguments for the different metrics
-    """
+    Input includes:
+    :param pred: Prediction
+    :param ref: Reference
+    :param measures: list of measures to extract
+    :param connectivity_type: Type of connectivity to use
+    :param pixdim: list of pixel dimensions
+    :param empty: Whether empty-reference/prediction handling flags are enabled for metric computation.
+    :param dict_args: Dictionary with additional arguments for the different metrics
+    """
-    Input includes:
-    :param pred: Prediction
-    :param ref: Reference
-    :param measures: list of measures to extract
-    :param connectivity_type: Type of connectivity to use
-    :param pixdim: list of pixel dimensions
-    :param empty:
-    :param dict_args: Dictionary with additional arguments for the different metrics
-    """
+    Input includes:
+    :param pred: Prediction
+    :param ref: Reference
+    :param measures: list of measures to extract
+    :param connectivity_type: Type of connectivity to use
+    :param pixdim: list of pixel dimensions
+    :param empty: Whether empty-reference/prediction handling flags are enabled for metric computation.
+    :param dict_args: Dictionary with additional arguments for the different metrics
+    """
     def __init__(
         self,
         pred,
@@ -298,6 +372,12 @@ def __init__(
         self.dict_args = dict_args
 
     def calculate_worse_dist(self):
+        """
+        From an image for which pixel dimensions and full shape is known, calculates the worst possible distance value.
+        This is to be used when distance cannot be calculated due to reference or prediction being empty and the worst
+        value assigned to the metric
+        :return max_dist: maximum distance for the given case
+        """
         shape = self.ref.shape
         pixdim = self.pixdim
         if pixdim is not None:
@@ -657,6 +737,15 @@ def matthews_correlation_coefficient(self):
         return mcc
 
     def expected_matching_ck(self):
+        """
+        Derives p_e for the cohen's kappa calculation. p_e, the expected chance matching is defined as 
+
+        .. math::
+
+            p_e = \sum_k \dfrac{n_{k\\text{ref}}}{N}\dfrac{n_{k\\text{pred}}}{N}
+
+        :return: p_e
+        """
         list_values = np.unique(self.ref)
         p_e = 0
         for val in list_values:
@@ -681,7 +770,7 @@ def cohens_kappa(self):
 
             CK = \dfrac{p_o - p_e}{1-p_e}
 
-        where :math: `p_e = ` expected chance matching and :math: `p_o = `observed accuracy
+        where :math:`p_e =` expected chance matching and :math:` p_o =` observed accuracy
 
         Cohen, J. A coefficient of agreement for nominal scales - Educational and Psychological Measurement (1960) 20 37-46
 
@@ -784,10 +873,11 @@ def dsc(self):
 
         ..math::
 
+
             DSC = \dfrac{2TP}{2TP+FP+FN}
 
 
-        This is also F:math:`{\\beta}` for :math:`{\\beta}`=1
+        This is also F:math:\`{\\beta}` for :math:\`{\\beta}`=1
 
         :return: dsc
 
@@ -906,6 +996,7 @@ def fppi(self):
          image, assuming that the cases are collated on the last axis of the array
 
         Bram Van Ginneken, Samuel G Armato III, Bartjan de Hoop, Saskia van Amelsvoort-van de Vorst, Thomas Duindam, Meindert Niemeijer, Keelin Murphy, Arnold Schilham, Alessandra Retico, Maria Evelina Fantacci, et al. Comparing and combining algorithms for computer-aided detection of pulmonary nodules in computed tomography scans: the anode09 study. Medical image analysis, 14(6):707–722, 2010.
+
         Andriy I Bandos, Howard E Rockette, Tao Song, and David Gur. Area under the free-response roc curve (froc) and a related summary index. Biometrics, 65(1):247–256, 2009.
 
         """
@@ -924,7 +1015,7 @@ def intersection_over_reference(self):
 
         .. math::
 
-            IoR = \dfrac{| \text{Pred} \cap \text{Ref} |}{| Ref |}
+            IoR = \dfrac{| \\text{Pred} \cap \\text{Ref} |}{| Ref |}
 
         :return: IoR
 
@@ -962,8 +1053,7 @@ def com_dist(self):
         of mass of the reference and prediction.
 
 
-        :return: Euclidean distance between centre of mass when reference and prediction not empty
-        -1 otherwise
+        :return: Euclidean distance between centre of mass when reference and prediction not empty -1 otherwise
 
         """
 
@@ -1133,7 +1223,8 @@ def boundary_iou(self):
 
             B_{IoU}(A,B) = \dfrac{| A_{d} \cap B_{d} |}{|A_d| + |B_d| - |A_d \cap B_d|}
 
-        where :math:A_d are the pixels of A within a distance d of the boundary
+        where :math:`A_d` are the pixels of A within a distance d of the boundary
+
         :return: boundary_iou
 
         """
@@ -1317,7 +1408,8 @@ def measured_masd(self):
 
         .. math::
 
-            MASD(A,B) = \dfrac{1}{2}(\dfrac{\sum_{a\in A}d(a,B)}{|A|} + \dfrac{\sum_{b\in B}d(b,A)}{|B|})
+            MASD(A,B) = \dfrac{1}{2}\dfrac{\sum_{a\in A}d(a,B)}{|A|} + \dfrac{1}{2}\dfrac{\sum_{b\in B}d(b,A)}{|B|}
+
 
 
         :return: masd
@@ -1352,6 +1444,11 @@ def measured_hausdorff_distance_perc(self):
         return hausdorff_distance_perc
 
     def to_dict_meas(self, fmt="{:.4f}"):
+        """
+        Transform to a dictionary the results of the different calculated measures
+
+        :return: result_dict
+        """
         result_dict = {}
         for key in self.measures:
             if len(self.measures_dict[key]) == 2: