Skip to content

Input Correlations API#

Correlations between input variables can be made visible with the input_correlations.py script that can be found here. It plots a linear correlation matrix and scatterplots between all variables given by a yaml variable file.

Correlation Matrix

input_correlations

 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def correlation_matrix(jets, jet_vars, fig_size=(9, 11)) -> None:
    """
    Plots a Correlation Matrix

    Parameters
    ----------
    jets : pandas.DataFrame
        The jets as numpy ndarray
    jet_vars : list
        List of variables to plot
    fig_size : tuple(int, int)
        size of figure
    """

    logger.info("Plotting Correlation Matrix ...")

    jets = jets[jet_vars]
    corr = jets.corr()

    # Generate a mask for the upper triangle
    mask = np.triu(np.ones_like(corr, dtype=bool))

    # Set up the matplotlib figure
    plt.subplots(figsize=fig_size)

    # Generate a custom diverging colormap
    cmap = sns.diverging_palette(230, 20, as_cmap=True)

    # Draw the heatmap with the mask and correct aspect ratio
    sns.heatmap(
        corr,
        mask=mask,
        cmap=cmap,
        vmax=None,
        center=0,
        square=True,
        linewidths=0.5,
        cbar_kws={"shrink": 0.5},
    )
    plt.tight_layout()
    plt.savefig("correlation_matrix.png")

Scatterplot Matrix

input_correlations

114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
def scatter_matrix(
    jets,
    jet_vars,
    std_outliers=5,
    show_contours=True,
    contour_level=4,
    del_upper_right_triangle=True,
) -> None:
    """
    Plots 2D scatter plots between all variables

    Parameters
    ----------
    jets : pandas.DataFrame
        The jets as numpy ndarray
    jet_vars : list
        List of variables to plot
    std_outliers : float
        outside of how many std's distance sort out outliers
    show_contours : bool
        Show contour lines on lower triangle (expensive)
    contour_level : int
        how many contour levels
    del_upper_right_triangle : bool
        if upper right triangle plots are plotted
    """

    # delete NaN
    jets = jets.dropna()

    # how many std's distance for sorting out outliers
    jets = jets[(np.abs(stats.zscore(jets)) < std_outliers).all(axis=1)]
    jets["Flavour"] = jets["Umami_labels"]

    # flavor strings
    jets.loc[jets["Flavour"] == 0, "Flavour"] = "b-jets"
    jets.loc[jets["Flavour"] == 1, "Flavour"] = "c-jets"
    jets.loc[jets["Flavour"] == 2, "Flavour"] = "u-jets"

    logger.info("Plotting Scatter Matrix ... ")
    logger.info("This can take a while depending on the amount of variables and jets.")

    jet_vars.append("Flavour")
    jets_for_plot = jets[jet_vars]

    # seaborn plot
    sns.set_theme(style="ticks")
    #   b: "#1f77b4"
    #   c: "#ff7f0e"
    #   u: "#2ca02c"
    graph = sns.pairplot(
        jets_for_plot,
        hue="Flavour",
        palette=[
            "#1f77b4",
            "#2ca02c",
            "#ff7f0e",
        ],
        corner=del_upper_right_triangle,
        height=3,
    )

    if show_contours is True:
        graph.map_lower(sns.kdeplot, levels=contour_level, fill=True, alpha=0.4)
    plt.tight_layout()
    plt.savefig("scatterplot_matrix.png")

Back to top