HEP-PBSP · comane · Oct 7, 2024 · Oct 7, 2024 · Oct 7, 2024 · Oct 7, 2024
diff --git a/wmin/app.py b/wmin/app.py
@@ -11,6 +11,7 @@
     "wmin.model",
     "wmin.utils",
     "wmin.basis",
+    "wmin.ultranest_fit",
 ]
 
 

diff --git a/wmin/config.py b/wmin/config.py
@@ -8,9 +8,13 @@
 import dill
 from validphys.core import PDF
 from wmin.model import WMinPDF
+import logging
+from reportengine.configparser import ConfigError
 
 from colibri.config import Environment, colibriConfig
 
+log = logging.getLogger(__name__)
+
 
 class Environment(Environment):
     pass
@@ -22,7 +26,9 @@ class WminConfig(colibriConfig):
     """
 
     def parse_prior_settings(self, settings):
-
+        """
+        Parse the prior settings for the wmin fit.
+        """
         if "type" not in settings.keys():
             raise ValueError("Missing key type for prior_settings")
 
@@ -37,6 +43,35 @@ def parse_prior_settings(self, settings):
 
         return settings
 
+    def parse_wmin_settings(self, settings):
+        """
+        Parse the wmin settings onto a dictionary.
+        """
+        known_keys = {"n_basis", "wminpdfset", "wmin_inherited_evolution"}
+
+        kdiff = settings.keys() - known_keys
+        for k in kdiff:
+            log.warning(
+                ConfigError(f"Key '{k}' in ns_settings not known.", k, known_keys)
+            )
+
+        wmin_settings = {}
+
+        # Set the ultranest seed
+        if "n_basis" not in settings.keys():
+            raise ValueError("Missing key n_basis for wmin_settings")
+        wmin_settings["n_basis"] = settings.get("n_basis")
+
+        if "wminpdfset" not in settings.keys():
+            raise ValueError("Missing key wminpdfset for wmin_settings")
+        wmin_settings["wminpdfset"] = settings.get("wminpdfset")
+
+        wmin_settings["wmin_inherited_evolution"] = settings.get(
+            "wmin_inherited_evolution", False
+        )
+
+        return wmin_settings
+
     def produce_pdf_model(self, wmin_settings, output_path, dump_model=True):
         """
         Weight minimization grid is in the evolution basis.

diff --git a/wmin/export_results.py b/wmin/export_results.py
@@ -0,0 +1,144 @@
+"""
+wmin.export_results.py
+
+This module contains functions tailored for the export of fit results that are
+performed in the wmin parameterization.
+"""
+
+import pathlib
+import lhapdf
+import os
+import logging
+import numpy as np
+
+from validphys.lhio import (
+    load_all_replicas,
+    rep_matrix,
+    write_replica,
+    generate_replica0,
+)
+from validphys.core import PDF
+
+log = logging.getLogger(__name__)
+
+
+def write_wmin_combined_replicas(wmin_parameters, replicas_df, new_wmin_pdf):
+    """
+    Writes a new LHAPDF set from the results of an UltraNest fit.
+    The UltraNest fit must have been performed using a wmin parameterization so that the
+    new set can be written as a sum rule conserving linear combination of the replicas of
+    the basis set.
+
+    Parameters
+    ----------
+    wmin_parameters: Array
+        wmin parameters posterior samples, shape (n_posterior_samples, n_params)
+
+    replicas_df: DataFrame
+        DataFrame containing replicas of the basis set at all scales
+
+    new_wmin_pdf: Path
+        Path to the new wmin PDF set
+    """
+    n_params = wmin_parameters.shape[1]
+
+    for i, wmin_weight in enumerate(wmin_parameters):
+
+        wmin_centr_rep, replica = (
+            replicas_df.loc[:, [1]],
+            replicas_df.loc[:, range(2, n_params + 2)],
+        )
+
+        wm_replica = wmin_centr_rep.dot([1.0 - np.sum(wmin_weight)]) + replica.dot(
+            wmin_weight
+        )
+
+        wm_headers = f"PdfType: replica\nFormat: lhagrid1\nFromMCReplica: {i}\n"
+        log.info(f"Writing replica {i + 1} to {new_wmin_pdf}")
+        write_replica(i + 1, new_wmin_pdf, wm_headers.encode("UTF-8"), wm_replica)
+
+
+def write_new_lhapdf_info_file_from_previous_pdf(
+    path_old_pdfset,
+    name_old_pdfset,
+    path_new_pdfset,
+    name_new_pdfset,
+    num_members,
+    description_set="Weight-minimized set",
+    errortype="replicas",
+):
+    """
+    Writes a new LHAPDF set info file based on an existing set.
+    """
+
+    # write LHAPDF info file for a new pdf set
+    with open(path_old_pdfset / f"{name_old_pdfset}.info", "r") as in_stream, open(
+        path_new_pdfset / f"{name_new_pdfset}.info", "w"
+    ) as out_stream:
+        for l in in_stream.readlines():
+            if l.find("SetDesc:") >= 0:
+                out_stream.write(f'SetDesc: f"{description_set}"\n')
+            elif l.find("NumMembers:") >= 0:
+                out_stream.write(f"NumMembers: {num_members}\n")
+            elif l.find("ErrorType:") >= 0:
+                out_stream.write(f"ErrorType: {errortype}\n")
+            else:
+                out_stream.write(l)
+    log.info(f"Info file written to {path_new_pdfset / f'{name_new_pdfset}.info'}")
+
+
+def write_lhapdf_from_ultranest_result(
+    wmin_settings,
+    ultranest_fit,
+    ns_settings,
+    output_path,
+    errortype: str = "replicas",
+):
+    """
+    Writes a new LHAPDF set from the results of an UltraNest fit.
+    The UltraNest fit must have been performed using a wmin parameterization so that the
+    new set can be written as a linear combination of the replicas of the basis set.
+
+    Parameters
+    ----------
+
+    """
+    wminpdfset = PDF(wmin_settings["wminpdfset"])
+
+    lhapdf_path = pathlib.Path(lhapdf.paths()[-1])
+
+    # path to pdf set that was used as a basis for the wmin fit
+    wmin_basis_pdf = lhapdf_path / str(wminpdfset)
+
+    wmin_fit_name = pathlib.Path(output_path).name
+
+    # path to new wmin pdf set
+    new_wmin_pdf = lhapdf_path / wmin_fit_name
+
+    # create new wmin pdf set folder in lhapdf path if it does not exist
+    if not new_wmin_pdf.exists():
+        os.makedirs(new_wmin_pdf)
+
+    # write LHAPDF info file for new wmin pdf set
+    write_new_lhapdf_info_file_from_previous_pdf(
+        path_old_pdfset=wmin_basis_pdf,
+        name_old_pdfset=wminpdfset,
+        path_new_pdfset=new_wmin_pdf,
+        name_new_pdfset=wmin_fit_name,
+        num_members=ns_settings["n_posterior_samples"] + 1,
+        description_set=f"Weight-minimized set using {wminpdfset} as basis",
+        errortype=errortype,
+    )
+
+    # load replicas from basis set at all scales
+    headers, grids = load_all_replicas(wminpdfset)
+    replicas_df = rep_matrix(grids)
+
+    wmin_parameters_sample = ultranest_fit.resampled_posterior
+
+    # write replicas to new wmin pdf set
+    write_wmin_combined_replicas(wmin_parameters_sample, replicas_df, new_wmin_pdf)
+
+    # Generate central replica
+    log.info(f"Generating central replica for {new_wmin_pdf}")
+    generate_replica0(PDF(wmin_fit_name))
diff --git a/wmin/runcards/wmin_fit_runcards/wmin_analytic_dis.yaml b/wmin/runcards/wmin_fit_runcards/wmin_analytic_dis.yaml
@@ -0,0 +1,65 @@
+meta: 'An example of analytic fit using wmin paramterisation on reduced DIS dataset.'
+
+#######################
+# Data and theory specs
+#######################
+
+dataset_inputs:    
+  # DIS                
+  - {'dataset': 'NMC_NC_NOTFIXED_P_EM-SIGMARED', 'variant': 'legacy'}
+  - {'dataset': 'HERA_NC_318GEV_EM-SIGMARED', 'variant': 'legacy'} 
+  - {'dataset': 'HERA_NC_251GEV_EP-SIGMARED', 'variant': 'legacy'}
+  - {'dataset': 'HERA_NC_300GEV_EP-SIGMARED', 'variant': 'legacy'}
+
+
+theoryid: 40001000                          # The theory from which the predictions are drawn.
+use_cuts: internal                     # The kinematic cuts to be applied to the data.
+
+closure_test_level: 0                  # The closure test level: False for experimental, level 0
+                                       # for pseudodata with no noise, level 1 for pseudodata with
+                                       # noise.
+closure_test_pdf: NNPDF40_nnlo_as_01180 # The closure test PDF used if closure_level is not False
+
+
+#####################
+# Loss function specs
+#####################
+
+positivity:                            # Positivity datasets, used in the positivity penalty.
+  posdatasets:
+  - {dataset: POSF2U, maxlambda: 1e6}
+
+alpha: 1e-7                           
+lambda_positivity: 0                 
+
+use_fit_t0: True                       # Whether the t0 covariance is used in the chi2 loss.
+t0pdfset: NNPDF40_nnlo_as_01180         # The t0 PDF used to build the t0 covariance matrix.
+
+
+#############
+# Model specs
+#############
+wmin_settings:                         # Settings for the weight minimisation model
+  wminpdfset: NNPDF40_nnlo_as_01180     # PDF set from which the replicas are selected
+  n_basis: 10                          # Number of weights/replicas used in the fit
+
+
+
+###################
+# Methodology specs
+###################
+
+# Analytic settings
+analytic_settings:
+  n_posterior_samples: 10
+  full_sample_size: 1000
+  sampling_seed: 123456
+  optimal_prior: True
+
+prior_settings:
+  type: 'uniform_parameter_prior'            # The type of prior used in Nested Sampling (model dependent)
+  max_val: 10.0
+  min_val: -10.0
+
+actions_:
+- run_analytic_fit                        # Choose from ultranest_fit, monte_carlo_fit, analytic_fit
diff --git a/wmin/runcards/wmin_fit_runcards/wmin_bayes_dis.yaml b/wmin/runcards/wmin_fit_runcards/wmin_bayes_dis.yaml
@@ -0,0 +1,75 @@
+meta: 'An example of a bayesian fit using wmin parameterisation on a reduced DIS dataset'
+
+#######################
+# Data and theory specs
+#######################
+
+dataset_inputs:    
+  # DIS                
+  - {'dataset': 'NMC_NC_NOTFIXED_P_EM-SIGMARED', 'variant': 'legacy'}
+  - {'dataset': 'HERA_NC_318GEV_EM-SIGMARED', 'variant': 'legacy'} 
+  - {'dataset': 'HERA_NC_251GEV_EP-SIGMARED', 'variant': 'legacy'}
+  - {'dataset': 'HERA_NC_300GEV_EP-SIGMARED', 'variant': 'legacy'}
+
+
+theoryid: 700                          # The theory from which the predictions are drawn.
+use_cuts: internal                     # The kinematic cuts to be applied to the data.
+
+closure_test_level: 0                  # The closure test level: False for experimental, level 0
+                                       # for pseudodata with no noise, level 1 for pseudodata with
+                                       # noise.
+closure_test_pdf: NNPDF40_nnlo_as_01180 # The closure test PDF used if closure_level is not False
+
+
+#####################
+# Loss function specs
+#####################
+
+positivity:                            # Positivity datasets, used in the positivity penalty.
+  posdatasets:
+  - {dataset: POSF2U, maxlambda: 1e6}
+
+positivity_penalty_settings:
+  positivity_penalty: False
+  alpha: 1e-7                           
+  lambda_positivity: 0             
+
+
+use_fit_t0: True                       # Whether the t0 covariance is used in the chi2 loss.
+t0pdfset: NNPDF40_nnlo_as_01180         # The t0 PDF used to build the t0 covariance matrix.
+
+
+#############
+# Model specs
+#############
+
+# Weight minimisation settings
+wmin_settings:
+  wminpdfset: NNPDF40_nnlo_as_01180
+  n_basis: 10
+  wmin_inherited_evolution: true
+
+###################
+# Methodology specs
+###################
+
+# Nested Sampling settings
+
+ns_settings:
+  sampler_plot: True # is slow for large number of parameters
+  n_posterior_samples: 10
+  ReactiveNS_settings:
+    vectorized: False
+    ndraw_max: 500
+  Run_settings:
+    min_num_live_points: 500
+    min_ess: 50
+    frac_remain: 0.01
+
+prior_settings:
+  type: 'uniform_parameter_prior'            # The type of prior used in Nested Sampling (model dependent)
+  max_val: 10.0
+  min_val: -10.0
+
+actions_:
+- run_ultranest_fit