Select Git revision
      
  CombinedCompair.py
              Henrik Schiller authored 
   CombinedCompair.py  7.73 KiB 
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
import ROOT
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sn
import os
import joblib
from sklearn.ensemble import RandomForestClassifier
import matplotlib.transforms as mtransforms
from sklearn.metrics import roc_curve, roc_auc_score
import random
import alphashape
from descartes import PolygonPatch
from operator  import itemgetter
from PIDclass import *
####################
#1 = 'TRD&TOF&RICH'
#2 = ,'TRD&RICH',
#3 = 'TRD&TOF', 
#4 = 'TRD'
#5 =  'RICH'
def Run_RF_compair():
    rf_model = RandomForestClassifier(n_estimators=2000, verbose=True, n_jobs = -1)
    rf_model.fit( ALL.X_train, ALL.y_train)
    joblib.dump(rf_model, "./AllRFClassifierTest.joblib")
    return rf_model
def TestModelcompair(): 
    rf_model = joblib.load("./AllRFClassifierTest.joblib")
    y_predict_rf = rf_model.predict_proba(ALL.X_test)[:,1]
    np.save("YTest" ,np.array(ALL.y_test) ) 
    np.save("y_predict_rfTest"  ,np.array(y_predict_rf) ) 
def Run_RandomForest(i,Ne):
    rf_model = RandomForestClassifier(n_estimators=Ne, verbose=True, n_jobs = -1)
    rf_model.fit(X_trainL[i], y_trainL[i])
    joblib.dump(rf_model, "./AllRFClassifier"+str(i)+".joblib")
    return rf_model
def TestModel(i): 
    rf_model = joblib.load("./AllRFClassifier"+str(i)+".joblib")
    y_predict_rf = rf_model.predict_proba(X_testL[i])[:,1]
    np.save("YtrueC"+str(i) ,np.array( y_testL[i]) ) 
    np.save("y_predict_rftrueC" +str(i) ,np.array(y_predict_rf) ) 
########################################################################################
   
def PlotROCall():   
    fig, axs = plt.subplot_mosaic([['a)','b)','c)','d)']], constrained_layout=True, figsize=(14,4))
    for label, ax in axs.items():
        trans = mtransforms.ScaledTranslation(10/72, -5/72, fig.dpi_scale_trans)
        ax.text(0.84, 1.0, label, transform=ax.transAxes + trans,
                fontsize=12, verticalalignment='top', fontfamily='serif',
                bbox=dict(facecolor='0.7', edgecolor='none', pad=6.0))
    l = [0, 0, 0, 0]
    for i, labelz, letter, pion, ele in  list(zip([0, 1, 2, 3],['TRD&TOF&RICH','TRD&RICH','TRD&TOF','TRD'],['a)','b)','c)' , 'd)'],[0.06,0.01,0.71,0.22],[0.64, 0.09, 0.15, 0.12])):
        Y = np.load("YtrueC"+str(i) +".npy")
        proba = np.load("y_predict_rftrueC"+str(i)+".npy") 
        PionSupression, ElectronEfficency, S = roc_curve(Y, proba)
        axs[letter].plot(pion*PionSupression,ele*ElectronEfficency,linewidth=2.0)
        l[i] = list(zip(pion*PionSupression,ele*ElectronEfficency,S))
        axs[letter].set_xlabel("Pion Supression")
        axs[letter].set_title(labelz)
        axs[letter].grid()
        axs[letter].set_ylim([0,ele])
        axs[letter].set_xlim([0,pion])
    MaxQ = 0
    PionSupressionS, ElectronEfficencyS = [],[]
    for k in range(100000):
        Pl = np.array([ np.array(list(random.choice(ROC))) for ROC in l])
        E = sum(Pl[:,1])
        P = sum(Pl[:,0])
        if Quality(E,P) >MaxQ:
            MaxQ, MaxE, MaxP, S = Quality(E,P) ,Pl[:,1],Pl[:,0],Pl[:,2]
        ElectronEfficencyS.append(E)
        PionSupressionS.append(P)
    print(S)
    axs['a)'].set_ylabel("Electron Efficency")
    axs['a)'].scatter(MaxP[0],MaxE[0],s=40)
    axs['b)'].scatter(MaxP[1],MaxE[1],s=40)
    axs['c)'].scatter(MaxP[2],MaxE[2],s=40)
    axs['d)'].scatter(MaxP[3],MaxE[3],s=40)
    plt.savefig("ROCsALL.pdf")
    plt.show()
    fig, axs = plt.subplot_mosaic([['a)','b)']], constrained_layout=True, figsize=(8,4))
    
    
    xlim = 0.01
    points_2d = list(zip(PionSupressionS,ElectronEfficencyS ))
    alpha_shape = alphashape.alphashape(points_2d, 2.0)
    axs['a)'].scatter( PionSupressionS[:100] ,ElectronEfficencyS[:100] , alpha=0.2)
    axs['a)'].add_patch(PolygonPatch(alpha_shape, alpha=0.3))
    
    axs['a)'].set_xlabel("Hadron Purity")
    axs['a)'].set_ylabel("Electron Efficency")
    axs['a)'].grid()
    
    Y = np.load("YTest.npy") 
    proba = np.load("y_predict_rfTest.npy") 
    PionSupressionT, ElectronEfficencyT, S = roc_curve(Y, proba)
    axs['b)'].plot(PionSupressionT ,ElectronEfficencyT, color='red',linewidth=2.0)
    QualityList = [Quality(e,p) for e,p in zip( ElectronEfficencyT,PionSupressionT)]
    MaxIndx = QualityList.index(max(QualityList))
    axs['b)'].scatter([PionSupressionT[MaxIndx]],[ElectronEfficencyT[MaxIndx]] ,label = "max. Significance in singel Classifier" , color='red')
    print([PionSupressionT[MaxIndx]],[ElectronEfficencyT[MaxIndx]])
   
    x = np.linspace(0, xlim, 100)
    y = np.linspace(0, 1, 100)
    z = np.array([Quality(e,p) for e in y for p in x])
    Z = z.reshape(100, 100)
    X1, Y1 = np.meshgrid(x, y)
    c = axs['b)'].pcolormesh(X1, Y1, Z,alpha=0.5)
    cbar = fig.colorbar(c, ax=axs['b)'])
    cbar.set_label('Significance', rotation=270, labelpad=15)
    
 
    sort = sorted(  PolygonPatch(alpha_shape, alpha=0.3).get_path().vertices, key=itemgetter(0))
    filtered = list(filter(lambda c : (c[0]-1)**2+c[1]**2 >1,  sort))
    PionSupressionS  = [filtered[i][0] for i in range(len(filtered))]
    ElectronEfficencyS  = [filtered[i][1] for i in range(len(filtered))]
    axs['b)'].plot(PionSupressionS, ElectronEfficencyS)
    QualityList = [Quality(e,p) for e,p in zip(ElectronEfficencyS, PionSupressionS)]
    MaxIndx = QualityList.index(max(QualityList))
    axs['b)'].scatter([PionSupressionS[MaxIndx]],[ElectronEfficencyS[MaxIndx]] ,label = "max. Significance in AddROC " , color='blue' )
    print([PionSupressionS[MaxIndx]],[ElectronEfficencyS[MaxIndx]])
    axs['b)'].set_ylim([0,1])
    axs['b)'].set_xlim([0,xlim])
    axs['b)'].legend(loc='lower right')
    axs['b)'].set_xlabel("Hadron Purity")
    plt.savefig("AlphaALL.pdf")
    plt.show()
    return alpha_shape
##########################Main######################
Quality = lambda e,p: e**2/np.sqrt((24*p+1*e)**2) if (e!=0 or p!=0) else 0
Electron =  pd.DataFrame( np.load("Electron.npy").T, columns=list(np.load("PapaBIB3.npy")))  
Hadron =   pd.DataFrame(  np.load("Hadron.npy").T, columns=list(np.load("PapaBIB3.npy"))) 
Pion =  pd.DataFrame( np.load("Pion.npy").T , columns=list(np.load("PapaBIB3.npy")))   
TrainVar = ['P','phi','theta',  'fAaxis','fBaxis','fPhi','RadialPosition','RadialAngle','fChi2NDF','NoRichHit','distance', 'NoTrdHit', 'Likelihood','DeltaBetaEL','fCharge','fChi2']
TV1 = ['P','phi','theta',  'fAaxis','fBaxis','fPhi','RadialPosition','RadialAngle','fChi2NDF','NoRichHit','distance', 'NoTrdHit', 'Likelihood','DeltaBetaEL','fCharge','fChi2']
TV2 = ['P','phi','theta',  'fAaxis','fBaxis','fPhi','RadialPosition','RadialAngle','fChi2NDF','NoRichHit','distance', 'NoTrdHit', 'Likelihood','fCharge','fChi2']
TV3 = ['P','phi','theta', 'NoTrdHit', 'Likelihood','DeltaBetaEL','fCharge','fChi2']
TV4 =  ['P','phi','theta', 'NoTrdHit', 'Likelihood','fCharge','fChi2']
TV5 =   ['P','phi','theta',  'fAaxis','fBaxis','fPhi','RadialPosition','RadialAngle','fChi2NDF','NoRichHit','distance','fCharge','fChi2']
X_trainL,  y_trainL, X_testL, y_testL = [],[],[],[]
for TR in [TV1,TV2, TV3, TV4, TV5 ]:
    print(TR)
    Like = PID( test_size = 0.5, electrons=10000,  hadrons=10000,train_var = TR, cut = [['NoTrdHit',1,99],['NoRichHit',6,99],['beta',0.01,99999]])
    X_trainL.append( Like.X_train)
    y_trainL.append( Like.y_train)
    X_testL.append( Like.X_test)
    y_testL.append(Like.y_test)
for i,Ne  in enumerate([2000,2000,2000,2000,2000]):
    Run_RandomForest(i,Ne)
    TestModel(i)
# ALL = PID( test_size = 0.5, electrons=50000,  hadrons=50000,train_var = TrainVar , cut = None)
# Run_RF_compair()
# TestModelcompair()
alpha_shape = PlotROCall()
###################################################################