Correlations between input variables can be made visible with the input_correlations.py script that can be found here. It plots a linear correlation matrix and scatterplots between all variables given by a yaml variable file.
defcorrelation_matrix(jets,jet_vars,fig_size=(9,11))->None:""" Plots a Correlation Matrix Parameters ---------- jets : pandas.DataFrame The jets as numpy ndarray jet_vars : list List of variables to plot fig_size : tuple(int, int) size of figure """logger.info("Plotting Correlation Matrix ...")jets=jets[jet_vars]corr=jets.corr()# Generate a mask for the upper trianglemask=np.triu(np.ones_like(corr,dtype=bool))# Set up the matplotlib figureplt.subplots(figsize=fig_size)# Generate a custom diverging colormapcmap=sns.diverging_palette(230,20,as_cmap=True)# Draw the heatmap with the mask and correct aspect ratiosns.heatmap(corr,mask=mask,cmap=cmap,vmax=None,center=0,square=True,linewidths=0.5,cbar_kws={"shrink":0.5},)plt.tight_layout()plt.savefig("correlation_matrix.png")
defscatter_matrix(jets,jet_vars,std_outliers=5,show_contours=True,contour_level=4,del_upper_right_triangle=True,)->None:""" Plots 2D scatter plots between all variables Parameters ---------- jets : pandas.DataFrame The jets as numpy ndarray jet_vars : list List of variables to plot std_outliers : float outside of how many std's distance sort out outliers show_contours : bool Show contour lines on lower triangle (expensive) contour_level : int how many contour levels del_upper_right_triangle : bool if upper right triangle plots are plotted """# delete NaNjets=jets.dropna()# how many std's distance for sorting out outliersjets=jets[(np.abs(stats.zscore(jets))<std_outliers).all(axis=1)]jets["Flavour"]=jets["Umami_labels"]# flavor stringsjets.loc[jets["Flavour"]==0,"Flavour"]="b-jets"jets.loc[jets["Flavour"]==1,"Flavour"]="c-jets"jets.loc[jets["Flavour"]==2,"Flavour"]="u-jets"logger.info("Plotting Scatter Matrix ... ")logger.info("This can take a while depending on the amount of variables and jets.")jet_vars.append("Flavour")jets_for_plot=jets[jet_vars]# seaborn plotsns.set_theme(style="ticks")# b: "#1f77b4"# c: "#ff7f0e"# u: "#2ca02c"graph=sns.pairplot(jets_for_plot,hue="Flavour",palette=["#1f77b4","#2ca02c","#ff7f0e",],corner=del_upper_right_triangle,height=3,)ifshow_contoursisTrue:graph.map_lower(sns.kdeplot,levels=contour_level,fill=True,alpha=0.4)plt.tight_layout()plt.savefig("scatterplot_matrix.png")