diff --git a/pyproject.toml b/pyproject.toml index 804de13..dc38b9e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,4 +25,4 @@ doc = [ ] [tool.poetry.scripts] -wmin = "wmin.app:main" +wmin = "wmin.app:main" \ No newline at end of file diff --git a/wmin/app.py b/wmin/app.py index cf679f1..ffbfb29 100644 --- a/wmin/app.py +++ b/wmin/app.py @@ -11,6 +11,7 @@ "wmin.model", "wmin.utils", "wmin.basis", + "wmin.completeness", ] diff --git a/wmin/completeness.py b/wmin/completeness.py new file mode 100644 index 0000000..b10bb4f --- /dev/null +++ b/wmin/completeness.py @@ -0,0 +1,200 @@ +""" +wmin.completeness.py + +This module contains the functions for checking the completeness of a weight-minimisation +basis. It includes plotting tools. +""" + +from validphys.core import PDF +from reportengine.figure import figure, figuregen + +import matplotlib.pyplot as plt + +import numpy as np + + +def basis_to_target_distances( + target_pdfs, + wmin_basis_pdfs, + nweights, + pdf_aliases, + dist_type=0, + Q=1.65, + x_grid=np.logspace(-5, 0, 50), + flavours=[1, -1, 2, -2, 3, -3, 4, 21], +): + """Computes the distances from the target PDFs to the wmin_basis_pdfs.""" + + target_pdf_sets = [] + for target_pdf in target_pdfs: + name = pdf_aliases.get(target_pdf, target_pdf) + target_pdf_sets.append( + {"name": name, "members": PDF(target_pdf).load().members[1:]} + ) + + wmin_bases = [] + for pdf_basis in wmin_basis_pdfs: + name = pdf_aliases.get(pdf_basis, pdf_basis) + loaded_pdf = PDF(pdf_basis).load() + wmin_bases.append( + { + "name": name, + "central": pdf_grid_allflav( + loaded_pdf.central_member, flavours, x_grid, Q + ), + "members": loaded_pdf.members[1 : nweights + 1], + } + ) + + distances = {} + for basis in wmin_bases: + distances[basis["name"]] = [] + for pdf in target_pdf_sets: + distance = [] + for member in pdf["members"]: + original, reco, w, d = wmin_distance( + member, + basis["central"], + basis["members"], + flavours, + x_grid, + Q, + dist_type=dist_type, + ) + distance.append(d) + distances[basis["name"]].append( + {"name": pdf["name"], "distances": distance} + ) + + return distances + + +@figuregen +def histogram_individual_target_sets(basis_to_target_distances): + """Produces plots showing the error in the approximation when the + wmin_basis_pdf is used to approximate the target_pdfs. + """ + + for basis, targets in basis_to_target_distances.items(): + fig, ax = plt.subplots(figsize=(7, 5)) + overflow_threshold = 0.05 + bins = np.linspace(0, overflow_threshold, 30) + + for index, target in enumerate(targets): + d = np.array(target["distances"]) + d[d > overflow_threshold] = overflow_threshold + 0.001 + targets[index] = {"name": target["name"], "distances": d} + + ax.hist( + [target["distances"] for target in targets], + label=[target["name"] for target in targets], + # alpha=0.6, + bins=np.append(bins, 0.052), + # edgecolor="black", + linewidth=2, + histtype="step", + density=False, + stacked=True, + ) + ax.set_title("Distances of PDF sets from %s " % basis) + ax.set_xlabel("Score", fontsize=16) + ax.set_ylabel("Count", fontsize=16) + ax.legend(frameon=False, fontsize=14) + # set xticks to specific values + ax.set_xticks([0, 0.01, 0.02, 0.03, 0.04]) + # add a tick after 0.05, overflow, with label > 0.5 + ax.set_xticks([0.051], minor=True) + ax.set_xticklabels(["$>$0.05"], minor=True) + + yield fig + + +@figure +def histogram_complete_target_set(basis_to_target_distances): + """Produces a plot showing the error in the approximation when + the wmin_basis_pdf is used to approximate the target_pdfs. + """ + + fig, ax = plt.subplots(figsize=(7, 5)) + overflow_threshold = 0.05 + bins = np.linspace(0, overflow_threshold, 30) + for basis, targets in basis_to_target_distances.items(): + all_distances = [] + for target in targets: + all_distances.extend(target["distances"]) + d = np.array(all_distances) + d[d > overflow_threshold] = overflow_threshold + 0.001 + ax.hist( + d, + label="Basis: %s" % basis, + # alpha=0.6, + bins=np.append(bins, 0.052), + # edgecolor="black", + linewidth=2, + histtype="step", + density=False, + ) + ax.set_title("Distances of weight-minimisation bases to target set") + ax.set_xlabel("Score", fontsize=16) + ax.set_ylabel("Count", fontsize=16) + ax.legend(frameon=False, fontsize=14) + # set xticks to specific values + ax.set_xticks([0, 0.01, 0.02, 0.03, 0.04]) + # add a tick after 0.05, overflow, with label > 0.5 + ax.set_xticks([0.051], minor=True) + ax.set_xticklabels(["$>$0.05"], minor=True) + + return fig + + +def pdf_grid(pdf, pid, x_grid, Q): + out = [] + for x in x_grid: + out.append(pdf.xfxQ(pid, x, Q)) + return np.array(out) + + +def pdf_grid_allflav(pdf, flavours, x_grid, Q): + out = np.array([]) + for pid in flavours: + out = np.append(out, pdf_grid(pdf, pid, x_grid, Q)) + return out + + +def wmin_distance( + pdf_target, center_pdf_grid, wmin_basis, flavours, x_grid, Q, dist_type=0 +): + Y = pdf_grid_allflav(pdf_target, flavours, x_grid, Q) - center_pdf_grid + X = np.array( + [ + pdf_grid_allflav(replica, flavours, x_grid, Q) - center_pdf_grid + for replica in wmin_basis + ] + ).T + + if dist_type == 0: + Sigma = np.identity(len(Y)) + + elif dist_type == 1: + Sigma = np.diag(np.abs(1 / (Y + center_pdf_grid))) + + elif dist_type == 2: + Sigma = np.diag(np.abs(1 / (Y + center_pdf_grid) ** 2)) + + elif dist_type == 3: + X_grid = np.tile(x_grid, len(flavours)) + Sigma = np.diag(np.abs(X_grid / (Y + center_pdf_grid))) + + elif dist_type == 4: + X_grid = np.tile(x_grid, len(flavours)) + Sigma = np.diag(np.abs(X_grid)) + + w = np.linalg.inv(X.T @ Sigma @ X) @ X.T @ Sigma @ Y + distance = (Y - X @ w) @ Sigma @ (Y - X @ w) + + original = pdf_grid_allflav(pdf_target, flavours, x_grid, Q).reshape( + len(flavours), len(x_grid) + ) + reco = (center_pdf_grid + X @ w).reshape(len(flavours), len(x_grid)) + + return original, reco, w, distance diff --git a/wmin/runcards/basis_completeness_plots.yaml b/wmin/runcards/basis_completeness_plots.yaml new file mode 100644 index 0000000..8a0eb05 --- /dev/null +++ b/wmin/runcards/basis_completeness_plots.yaml @@ -0,0 +1,39 @@ +meta: + author: James Moore + title: Plotting + keywords: [wmin basis] + +pdf_aliases: + NNPDF23_nnlo_as_0118: NNPDF2.3 + 240918_mnc_wmin_nnpdf40_basis_disonly_pca: NNPDF40dis_pca + 240919_mnc_pdf4lhc_basis_pca: PDF4LHC_pca + 240919_mnc_mixed_pdf4lhc_nnpdf40_basis_pca: PDF4LHC_NNPDF40_pca + +target_pdfs: + - NNPDF23_nnlo_as_0118 + - JAM19PDF_proton_nlo + - NNPDF40_nlo_as_01180 + - NNPDF40_lo_as_01180 + +wmin_basis_pdfs: + - 240918_mnc_wmin_nnpdf40_basis_disonly_pca + - 240919_mnc_pdf4lhc_basis_pca + - 240919_mnc_mixed_pdf4lhc_nnpdf40_basis_pca + +dist_types: + - dist_type: 0 + - dist_type: 1 + +nweights: 60 + +template_text: | + {@with dist_types@} + # Distances with distance type {@dist_type@} + ## Distances to complete target set + {@histogram_complete_target_set@} + ## Distances from bases to each target set + {@histogram_individual_target_sets@} + {@endwith@} + +actions_: + - report(main=True)