Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions wmin/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"wmin.model",
"wmin.utils",
"wmin.basis",
"wmin.ultranest_fit",
]


Expand Down
37 changes: 36 additions & 1 deletion wmin/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@
import dill
from validphys.core import PDF
from wmin.model import WMinPDF
import logging
from reportengine.configparser import ConfigError

from colibri.config import Environment, colibriConfig

log = logging.getLogger(__name__)


class Environment(Environment):
pass
Expand All @@ -22,7 +26,9 @@ class WminConfig(colibriConfig):
"""

def parse_prior_settings(self, settings):

"""
Parse the prior settings for the wmin fit.
"""
if "type" not in settings.keys():
raise ValueError("Missing key type for prior_settings")

Expand All @@ -37,6 +43,35 @@ def parse_prior_settings(self, settings):

return settings

def parse_wmin_settings(self, settings):
"""
Parse the wmin settings onto a dictionary.
"""
known_keys = {"n_basis", "wminpdfset", "wmin_inherited_evolution"}

kdiff = settings.keys() - known_keys
for k in kdiff:
log.warning(
ConfigError(f"Key '{k}' in ns_settings not known.", k, known_keys)
)

wmin_settings = {}

# Set the ultranest seed

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment is meaningless, probably result of copy/paste

if "n_basis" not in settings.keys():
raise ValueError("Missing key n_basis for wmin_settings")
wmin_settings["n_basis"] = settings.get("n_basis")

if "wminpdfset" not in settings.keys():
raise ValueError("Missing key wminpdfset for wmin_settings")
wmin_settings["wminpdfset"] = settings.get("wminpdfset")

wmin_settings["wmin_inherited_evolution"] = settings.get(
"wmin_inherited_evolution", False
)

return wmin_settings

def produce_pdf_model(self, wmin_settings, output_path, dump_model=True):
"""
Weight minimization grid is in the evolution basis.
Expand Down
144 changes: 144 additions & 0 deletions wmin/export_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
"""
wmin.export_results.py

This module contains functions tailored for the export of fit results that are
performed in the wmin parameterization.
"""

import pathlib
import lhapdf
import os
import logging
import numpy as np

from validphys.lhio import (
load_all_replicas,
rep_matrix,
write_replica,
generate_replica0,
)
from validphys.core import PDF

log = logging.getLogger(__name__)


def write_wmin_combined_replicas(wmin_parameters, replicas_df, new_wmin_pdf):
"""
Writes a new LHAPDF set from the results of an UltraNest fit.
The UltraNest fit must have been performed using a wmin parameterization so that the
new set can be written as a sum rule conserving linear combination of the replicas of
the basis set.

Parameters
----------
wmin_parameters: Array
wmin parameters posterior samples, shape (n_posterior_samples, n_params)

replicas_df: DataFrame
DataFrame containing replicas of the basis set at all scales

new_wmin_pdf: Path
Path to the new wmin PDF set
"""
n_params = wmin_parameters.shape[1]

for i, wmin_weight in enumerate(wmin_parameters):

wmin_centr_rep, replica = (
replicas_df.loc[:, [1]],
replicas_df.loc[:, range(2, n_params + 2)],
)

wm_replica = wmin_centr_rep.dot([1.0 - np.sum(wmin_weight)]) + replica.dot(
wmin_weight
)

wm_headers = f"PdfType: replica\nFormat: lhagrid1\nFromMCReplica: {i}\n"
log.info(f"Writing replica {i + 1} to {new_wmin_pdf}")
write_replica(i + 1, new_wmin_pdf, wm_headers.encode("UTF-8"), wm_replica)


def write_new_lhapdf_info_file_from_previous_pdf(
path_old_pdfset,
name_old_pdfset,
path_new_pdfset,
name_new_pdfset,
num_members,
description_set="Weight-minimized set",
errortype="replicas",
):
"""
Writes a new LHAPDF set info file based on an existing set.
"""

# write LHAPDF info file for a new pdf set
with open(path_old_pdfset / f"{name_old_pdfset}.info", "r") as in_stream, open(
path_new_pdfset / f"{name_new_pdfset}.info", "w"
) as out_stream:
for l in in_stream.readlines():
if l.find("SetDesc:") >= 0:
out_stream.write(f'SetDesc: f"{description_set}"\n')
elif l.find("NumMembers:") >= 0:
out_stream.write(f"NumMembers: {num_members}\n")
elif l.find("ErrorType:") >= 0:
out_stream.write(f"ErrorType: {errortype}\n")
else:
out_stream.write(l)
log.info(f"Info file written to {path_new_pdfset / f'{name_new_pdfset}.info'}")


def write_lhapdf_from_ultranest_result(
wmin_settings,
ultranest_fit,
ns_settings,
output_path,
errortype: str = "replicas",
):
"""
Writes a new LHAPDF set from the results of an UltraNest fit.
The UltraNest fit must have been performed using a wmin parameterization so that the
new set can be written as a linear combination of the replicas of the basis set.

Parameters
----------

"""
wminpdfset = PDF(wmin_settings["wminpdfset"])

lhapdf_path = pathlib.Path(lhapdf.paths()[-1])

# path to pdf set that was used as a basis for the wmin fit
wmin_basis_pdf = lhapdf_path / str(wminpdfset)

wmin_fit_name = pathlib.Path(output_path).name

# path to new wmin pdf set
new_wmin_pdf = lhapdf_path / wmin_fit_name

# create new wmin pdf set folder in lhapdf path if it does not exist
if not new_wmin_pdf.exists():
os.makedirs(new_wmin_pdf)

# write LHAPDF info file for new wmin pdf set
write_new_lhapdf_info_file_from_previous_pdf(
path_old_pdfset=wmin_basis_pdf,
name_old_pdfset=wminpdfset,
path_new_pdfset=new_wmin_pdf,
name_new_pdfset=wmin_fit_name,
num_members=ns_settings["n_posterior_samples"] + 1,
description_set=f"Weight-minimized set using {wminpdfset} as basis",
errortype=errortype,
)

# load replicas from basis set at all scales
headers, grids = load_all_replicas(wminpdfset)
replicas_df = rep_matrix(grids)

wmin_parameters_sample = ultranest_fit.resampled_posterior

# write replicas to new wmin pdf set
write_wmin_combined_replicas(wmin_parameters_sample, replicas_df, new_wmin_pdf)

# Generate central replica
log.info(f"Generating central replica for {new_wmin_pdf}")
generate_replica0(PDF(wmin_fit_name))
65 changes: 65 additions & 0 deletions wmin/runcards/wmin_fit_runcards/wmin_analytic_dis.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
meta: 'An example of analytic fit using wmin paramterisation on reduced DIS dataset.'

#######################
# Data and theory specs
#######################

dataset_inputs:
# DIS
- {'dataset': 'NMC_NC_NOTFIXED_P_EM-SIGMARED', 'variant': 'legacy'}
- {'dataset': 'HERA_NC_318GEV_EM-SIGMARED', 'variant': 'legacy'}
- {'dataset': 'HERA_NC_251GEV_EP-SIGMARED', 'variant': 'legacy'}
- {'dataset': 'HERA_NC_300GEV_EP-SIGMARED', 'variant': 'legacy'}


theoryid: 40001000 # The theory from which the predictions are drawn.
use_cuts: internal # The kinematic cuts to be applied to the data.

closure_test_level: 0 # The closure test level: False for experimental, level 0
# for pseudodata with no noise, level 1 for pseudodata with
# noise.
closure_test_pdf: NNPDF40_nnlo_as_01180 # The closure test PDF used if closure_level is not False


#####################
# Loss function specs
#####################

positivity: # Positivity datasets, used in the positivity penalty.
posdatasets:
- {dataset: POSF2U, maxlambda: 1e6}

alpha: 1e-7
lambda_positivity: 0

use_fit_t0: True # Whether the t0 covariance is used in the chi2 loss.
t0pdfset: NNPDF40_nnlo_as_01180 # The t0 PDF used to build the t0 covariance matrix.


#############
# Model specs
#############
wmin_settings: # Settings for the weight minimisation model
wminpdfset: NNPDF40_nnlo_as_01180 # PDF set from which the replicas are selected
n_basis: 10 # Number of weights/replicas used in the fit



###################
# Methodology specs
###################

# Analytic settings
analytic_settings:
n_posterior_samples: 10
full_sample_size: 1000
sampling_seed: 123456
optimal_prior: True

prior_settings:
type: 'uniform_parameter_prior' # The type of prior used in Nested Sampling (model dependent)
max_val: 10.0
min_val: -10.0

actions_:
- run_analytic_fit # Choose from ultranest_fit, monte_carlo_fit, analytic_fit
75 changes: 75 additions & 0 deletions wmin/runcards/wmin_fit_runcards/wmin_bayes_dis.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
meta: 'An example of a bayesian fit using wmin parameterisation on a reduced DIS dataset'

#######################
# Data and theory specs
#######################

dataset_inputs:
# DIS
- {'dataset': 'NMC_NC_NOTFIXED_P_EM-SIGMARED', 'variant': 'legacy'}
- {'dataset': 'HERA_NC_318GEV_EM-SIGMARED', 'variant': 'legacy'}
- {'dataset': 'HERA_NC_251GEV_EP-SIGMARED', 'variant': 'legacy'}
- {'dataset': 'HERA_NC_300GEV_EP-SIGMARED', 'variant': 'legacy'}


theoryid: 700 # The theory from which the predictions are drawn.
use_cuts: internal # The kinematic cuts to be applied to the data.

closure_test_level: 0 # The closure test level: False for experimental, level 0
# for pseudodata with no noise, level 1 for pseudodata with
# noise.
closure_test_pdf: NNPDF40_nnlo_as_01180 # The closure test PDF used if closure_level is not False


#####################
# Loss function specs
#####################

positivity: # Positivity datasets, used in the positivity penalty.
posdatasets:
- {dataset: POSF2U, maxlambda: 1e6}

positivity_penalty_settings:
positivity_penalty: False
alpha: 1e-7
lambda_positivity: 0


use_fit_t0: True # Whether the t0 covariance is used in the chi2 loss.
t0pdfset: NNPDF40_nnlo_as_01180 # The t0 PDF used to build the t0 covariance matrix.


#############
# Model specs
#############

# Weight minimisation settings
wmin_settings:
wminpdfset: NNPDF40_nnlo_as_01180
n_basis: 10
wmin_inherited_evolution: true

###################
# Methodology specs
###################

# Nested Sampling settings

ns_settings:
sampler_plot: True # is slow for large number of parameters
n_posterior_samples: 10
ReactiveNS_settings:
vectorized: False
ndraw_max: 500
Run_settings:
min_num_live_points: 500
min_ess: 50
frac_remain: 0.01

prior_settings:
type: 'uniform_parameter_prior' # The type of prior used in Nested Sampling (model dependent)
max_val: 10.0
min_val: -10.0

actions_:
- run_ultranest_fit
Loading