Select Git revision
Combined_Methods_2.0.py
Henrik Schiller authored
Combined_Methods_2.0.py 9.63 KiB
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from scipy.spatial import ConvexHull, convex_hull_plot_2d
from itertools import compress, product
import ROOT
from hipe4ml.model_handler import ModelHandler
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sn
import os
import joblib
from sklearn.ensemble import RandomForestClassifier
import matplotlib.transforms as mtransforms
from sklearn.metrics import roc_curve, roc_auc_score
import random
import alphashape
from descartes import PolygonPatch
from operator import itemgetter
from PIDclass import *
import pickle
import xgboost as xgb
####################
#1 = 'TRD&TOF&RICH'
#2 = ,'TRD&RICH',
#3 = 'TRD&TOF',
#4 = 'TRD'
#5 = 'RICH'
def Run_RF_compair():
rf_model = RandomForestClassifier(n_estimators=500, verbose=True, n_jobs = -1)
rf_model.fit( ALL.X_train, ALL.y_train)
joblib.dump(rf_model, "./AllRFClassifierTest.joblib")
return rf_model
def Run_xg_reg():
model_clf = xgb.XGBClassifier()
model_hdl = ModelHandler(model_clf, TrainVar)
hyper_pars_ranges = {'n_estimators': (100, 1000), 'max_depth': (
2, 10), 'learning_rate': (0.01, 0.1)}
model_hdl.optimize_params_optuna(train_test_data, hyper_pars_ranges, cross_val_scoring='roc_auc', timeout=120,
n_jobs=-1, n_trials=100, direction='maximize')
xg_reg = model_hdl.model
xg_reg.fit(ALL.X_train, ALL.y_train)
joblib.dump(xg_reg, "./AllXGBRegressor.joblib")
return xg_reg
def Test_Single_xg():
xg_reg = joblib.load("./AllXGBRegressor.joblib")
y_predict_gb = xg_reg.predict_proba(ALL.X_test)
np.save("YTest" ,np.array(ALL.y_test) )
np.save("y_predict_xgTest" ,np.array(y_predict_gb) )
def Test_Single_RF():
rf_model = joblib.load("./AllRFClassifierTest.joblib")
y_predict_rf = rf_model.predict_proba(ALL.X_test)[:,1]
np.save("YTest" ,np.array(ALL.y_test) )
np.save("y_predict_rfTest" ,np.array(y_predict_rf) )
def Run_RandomForest(i,Ne):
rf_model = RandomForestClassifier(n_estimators=Ne, verbose=True, n_jobs = -1)
rf_model.fit(X_trainL[i], y_trainL[i])
joblib.dump(rf_model, "./AllRFClassifier"+str(i)+".joblib")
return rf_model
def TestModel(i):
rf_model = joblib.load("./AllRFClassifier"+str(i)+".joblib")
y_predict_rf = rf_model.predict_proba(X_testL[i])[:,1]
np.save("Ytrue"+str(i) ,np.array( y_testL[i]) )
np.save("y_predict_rftrue" +str(i) ,np.array(y_predict_rf) )
########################################################################################
def PlotROCall():
fig, axs = plt.subplot_mosaic([['a)','b)'],['c)','d)']], constrained_layout=True, figsize=(9,5))
for j, labelz, letter, pion, ele in list(zip([0, 1, 2, 3],['TRD&TOF&RICH','TRD&RICH','TRD&TOF','TRD'],['a)','b)','c)' , 'd)'],[0.06,0.01,0.71,0.22],[0.64, 0.09, 0.15, 0.12])):
Y = np.load("Ytrue"+str(j) +".npy")
proba = np.load("y_predict_rftrue"+str(j)+".npy")
YC = np.load("YtrueC"+str(i) +".npy")
probaC = np.load("y_predict_rftrueC"+str(i)+".npy")
PionSupressionC, ElectronEfficencyC, SC = roc_curve(Y, proba)
axs[letter].plot(pion*PionSupressionC,ele*ElectronEfficencyC,linewidth=2.0, linestyle = 'dashed', c = 'green')
PionSupression, ElectronEfficency, S = roc_curve(Y, proba)
if j == 0:
P_Alt = pion*PionSupression
E_Alt = ele*ElectronEfficency
S_Alt = [[s] for s in S]
e_Alt = [[e] for e in ele*ElectronEfficency]
p_Alt = [[p] for p in pion*PionSupression]
else:
PionSupressionS = [a+b for a in P_Alt for b in pion*PionSupression]
ElectronEfficencyS = [a+b for a in E_Alt for b in ele*ElectronEfficency]
S_List = [s_alt+ [s] for s_alt in S_Alt for s in S ]
E_List = [e_alt+ [b] for e_alt in e_Alt for b in ele*ElectronEfficency]
P_List = [p_alt+ [b] for p_alt in p_Alt for b in pion*PionSupression ]
points = np.column_stack((PionSupressionS,ElectronEfficencyS) )
hull = ConvexHull(points)
P_Alt = [ PionSupressionS[i] for i in hull.vertices ]
E_Alt = [ ElectronEfficencyS[i] for i in hull.vertices ]
S_Alt = [S_List[i] for i in hull.vertices]
e_Alt = [E_List[i] for i in hull.vertices]
p_Alt = [P_List[i] for i in hull.vertices]
axs[letter].plot(pion*PionSupression,ele*ElectronEfficency,linewidth=2.0)
axs[letter].set_xlabel("Hadron Supression")
axs[letter].set_title(labelz)
axs[letter].grid()
axs[letter].set_ylim([0,ele])
axs[letter].set_xlim([0,pion])
Filter = [p <= e and p != 0 for p,e in zip(P_Alt ,E_Alt)]
P_Alt = list(compress(P_Alt, Filter)) + [0]
E_Alt = list(compress(E_Alt, Filter)) + [0]
S_Alt = list(compress(S_Alt, Filter)) + [[1, 1, 1, 1]]
QualityList = [Quality(e,p) for e,p in zip( E_Alt,P_Alt)]
MaxIndx = QualityList.index(max(QualityList))
E = e_Alt[MaxIndx]
P = p_Alt[MaxIndx]
axs['a)'].set_ylabel("Electron Efficency")
axs['c)'].set_ylabel("Electron Efficency")
axs['a)'].scatter(P[0], E[0] ,s=40, c = 'r')
axs['b)'].scatter(P[1],E[1] ,s=40, c = 'r')
axs['c)'].scatter(P[2],E[2] ,s=40, c = 'r')
axs['d)'].scatter(P[3], E[3] ,s=40, c = 'r')
axs['a)'].locator_params(nbins=3)
axs['b)'].locator_params(nbins=4)
axs['c)'].locator_params(nbins=3)
axs['d)'].locator_params(nbins=3)
plt.savefig("ROCsALL.pdf")
plt.show()
with open("SPE_Alt.pkl", 'wb') as f:
pickle.dump([S_Alt,P_Alt,E_Alt ], f)
def PlotROC():
with open("SPE_Alt.pkl", 'rb') as f:
S_Alt,P_Alt,E_Alt = pickle.load(f)
fig, axs = plt.subplot_mosaic([['a)','b)']], constrained_layout=True, figsize=(8,4))
xlim = 0.004
axs['a)'].set_xlabel("Hadron Purity")
axs['a)'].set_ylabel("Electron Efficency")
axs['a)'].grid()
Y = np.load("YTest.npy")
proba = np.load("y_predict_rfTest.npy")
probaXG = np.load("y_predict_xgTest.npy")[:,1]
PionSupressionT, ElectronEfficencyT, S = roc_curve(Y, proba)
PionSupressionXG, ElectronEfficencyXG, S_XG = roc_curve(Y, probaXG)
axs['a)'].plot(PionSupressionXG ,ElectronEfficencyXG, color='green',linewidth=2.0, label = "single XG Boost")
axs['b)'].plot(PionSupressionXG ,ElectronEfficencyXG, color='green',linewidth=2.0, label = "single XG Boost")
axs['a)'].plot(PionSupressionT ,ElectronEfficencyT, color='blue',linewidth=2.0, label = "single clf")
axs['a)'].plot(P_Alt , E_Alt , color='red',linewidth=2.0, label = "Add Roc")
axs['b)'].plot(P_Alt , E_Alt , color='red',linewidth=2.0)
axs['b)'].plot(PionSupressionT ,ElectronEfficencyT, color='blue',linewidth=2.0)
QualityList = [Quality(e,p) for e,p in zip( ElectronEfficencyT,PionSupressionT)]
MaxIndx = QualityList.index(max(QualityList))
axs['b)'].scatter([PionSupressionT[MaxIndx]],[ElectronEfficencyT[MaxIndx]] ,label = "max. Significance in singel Classifier" , color='red')
axs['a)'].legend(loc='lower right')
x = np.linspace(0, xlim, 100)
y = np.linspace(0, 1, 100)
z = np.array([Quality(e,p) for e in y for p in x])
Z = z.reshape(100, 100)
X1, Y1 = np.meshgrid(x, y)
c = axs['b)'].pcolormesh(X1, Y1, Z,alpha=0.5)
cbar = fig.colorbar(c, ax=axs['b)'])
cbar.set_label('Significance', rotation=270, labelpad=15)
axs['b)'].set_ylim([0,1])
axs['b)'].set_xlim([0,xlim])
axs['b)'].legend(loc='lower right')
axs['b)'].set_xlabel("Hadron Purity")
plt.savefig("AlphaALL.pdf")
plt.show()
##########################Main######################
Quality = lambda e,p: e**2/np.sqrt((24*p+1*e)**2) if (e!=0 or p!=0) else 0
TrainVar = ['P','phi','theta', 'fAaxis','fBaxis','fPhi','RadialPosition','RadialAngle','fChi2NDF','NoRichHit','distance', 'NoTrdHit', 'Likelihood','DeltaBetaEL','fCharge','fChi2']
TV1 = ['P','phi','theta', 'fAaxis','fBaxis','fPhi','RadialPosition','RadialAngle','fChi2NDF','NoRichHit','distance', 'NoTrdHit', 'Likelihood','DeltaBetaEL','fCharge','fChi2']
TV2 = ['P','phi','theta', 'fAaxis','fBaxis','fPhi','RadialPosition','RadialAngle','fChi2NDF','NoRichHit','distance', 'NoTrdHit', 'Likelihood','fCharge','fChi2']
TV3 = ['P','phi','theta', 'NoTrdHit', 'Likelihood','DeltaBetaEL','fCharge','fChi2']
TV4 = ['P','phi','theta', 'NoTrdHit', 'Likelihood','fCharge','fChi2']
X_trainL, y_trainL, X_testL, y_testL = [],[],[],[]
for TR,cut in zip([TV1,TV2, TV3, TV4 ],[[['NoTrdHit',1,99],['NoRichHit',6,99],['beta',0.01,99999]], [['NoTrdHit',1,99],['NoRichHit',6,99],['beta',0,0]], [['NoTrdHit',1,99],['NoRichHit',0,5],['beta',0.01,999]] , [['NoTrdHit',1,99],['NoRichHit',0,5],['beta',0,0]]]):
Like = PID( test_size = 0.5, electrons=10000, hadrons=10000,train_var = TR, cut = cut)
X_trainL.append( Like.X_train)
y_trainL.append( Like.y_train)
X_testL.append( Like.X_test)
y_testL.append(Like.y_test)
for i,Ne in enumerate([1000,1000,1000,1000]):
Run_RandomForest(i,Ne)
TestModel(i)
#path = Path('~/Dokumente/jun22_Test/').expanduser()
ALL = PID(test_size = 0.5, electrons=50000, hadrons=50000,train_var = TrainVar , cut = None)
train_test_data = [pd.DataFrame(ALL.X_trainTotal, columns=list(np.load("PapaBIB3.npy"))), ALL.y_train, pd.DataFrame(ALL.X_testTotal, columns=list(np.load("PapaBIB3.npy"))),ALL.y_test ]
Run_RF_compair()
Test_Single_RF()
Run_xg_reg()
Test_Single_xg()
PlotROCall()
PlotROC()
###################################################################