Skip to content
Snippets Groups Projects
Select Git revision
  • main
  • h.schiller-main-patch-61561
2 results

CombinedCompair.py

Blame
  • CombinedCompair.py 7.73 KiB
    from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
    from sklearn.model_selection import cross_val_score
    from sklearn.model_selection import train_test_split
    import ROOT
    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd
    import seaborn as sn
    import os
    import joblib
    from sklearn.ensemble import RandomForestClassifier
    import matplotlib.transforms as mtransforms
    from sklearn.metrics import roc_curve, roc_auc_score
    import random
    import alphashape
    from descartes import PolygonPatch
    from operator  import itemgetter
    from PIDclass import *
    
    ####################
    #1 = 'TRD&TOF&RICH'
    #2 = ,'TRD&RICH',
    #3 = 'TRD&TOF', 
    #4 = 'TRD'
    #5 =  'RICH'
    
    
    
    def Run_RF_compair():
        rf_model = RandomForestClassifier(n_estimators=2000, verbose=True, n_jobs = -1)
        rf_model.fit( ALL.X_train, ALL.y_train)
        joblib.dump(rf_model, "./AllRFClassifierTest.joblib")
        return rf_model
    
    
    
    def TestModelcompair(): 
        rf_model = joblib.load("./AllRFClassifierTest.joblib")
        y_predict_rf = rf_model.predict_proba(ALL.X_test)[:,1]
        np.save("YTest" ,np.array(ALL.y_test) ) 
        np.save("y_predict_rfTest"  ,np.array(y_predict_rf) ) 
    
    
    
    
    def Run_RandomForest(i,Ne):
        rf_model = RandomForestClassifier(n_estimators=Ne, verbose=True, n_jobs = -1)
        rf_model.fit(X_trainL[i], y_trainL[i])
        joblib.dump(rf_model, "./AllRFClassifier"+str(i)+".joblib")
        return rf_model
    
    
    
    def TestModel(i): 
        rf_model = joblib.load("./AllRFClassifier"+str(i)+".joblib")
        y_predict_rf = rf_model.predict_proba(X_testL[i])[:,1]
        np.save("YtrueC"+str(i) ,np.array( y_testL[i]) ) 
        np.save("y_predict_rftrueC" +str(i) ,np.array(y_predict_rf) ) 
    
    
    ########################################################################################
    
       
    
    def PlotROCall():   
        fig, axs = plt.subplot_mosaic([['a)','b)','c)','d)']], constrained_layout=True, figsize=(14,4))
    
    
        for label, ax in axs.items():
            trans = mtransforms.ScaledTranslation(10/72, -5/72, fig.dpi_scale_trans)
            ax.text(0.84, 1.0, label, transform=ax.transAxes + trans,
                    fontsize=12, verticalalignment='top', fontfamily='serif',
                    bbox=dict(facecolor='0.7', edgecolor='none', pad=6.0))
    
        l = [0, 0, 0, 0]
    
        for i, labelz, letter, pion, ele in  list(zip([0, 1, 2, 3],['TRD&TOF&RICH','TRD&RICH','TRD&TOF','TRD'],['a)','b)','c)' , 'd)'],[0.06,0.01,0.71,0.22],[0.64, 0.09, 0.15, 0.12])):
            Y = np.load("YtrueC"+str(i) +".npy")
            proba = np.load("y_predict_rftrueC"+str(i)+".npy") 
            PionSupression, ElectronEfficency, S = roc_curve(Y, proba)
            axs[letter].plot(pion*PionSupression,ele*ElectronEfficency,linewidth=2.0)
            l[i] = list(zip(pion*PionSupression,ele*ElectronEfficency,S))
            axs[letter].set_xlabel("Pion Supression")
            axs[letter].set_title(labelz)
            axs[letter].grid()
            axs[letter].set_ylim([0,ele])
            axs[letter].set_xlim([0,pion])
    
        MaxQ = 0
        PionSupressionS, ElectronEfficencyS = [],[]
        for k in range(100000):
            Pl = np.array([ np.array(list(random.choice(ROC))) for ROC in l])
            E = sum(Pl[:,1])
            P = sum(Pl[:,0])
            if Quality(E,P) >MaxQ:
                MaxQ, MaxE, MaxP, S = Quality(E,P) ,Pl[:,1],Pl[:,0],Pl[:,2]
            ElectronEfficencyS.append(E)
            PionSupressionS.append(P)
        print(S)
        axs['a)'].set_ylabel("Electron Efficency")
        axs['a)'].scatter(MaxP[0],MaxE[0],s=40)
        axs['b)'].scatter(MaxP[1],MaxE[1],s=40)
        axs['c)'].scatter(MaxP[2],MaxE[2],s=40)
        axs['d)'].scatter(MaxP[3],MaxE[3],s=40)
        plt.savefig("ROCsALL.pdf")
        plt.show()
    
    
        fig, axs = plt.subplot_mosaic([['a)','b)']], constrained_layout=True, figsize=(8,4))
        
        
        xlim = 0.01
        points_2d = list(zip(PionSupressionS,ElectronEfficencyS ))
        alpha_shape = alphashape.alphashape(points_2d, 2.0)
        axs['a)'].scatter( PionSupressionS[:100] ,ElectronEfficencyS[:100] , alpha=0.2)
        axs['a)'].add_patch(PolygonPatch(alpha_shape, alpha=0.3))
        
        axs['a)'].set_xlabel("Hadron Purity")
        axs['a)'].set_ylabel("Electron Efficency")
        axs['a)'].grid()
        
        Y = np.load("YTest.npy") 
        proba = np.load("y_predict_rfTest.npy") 
        PionSupressionT, ElectronEfficencyT, S = roc_curve(Y, proba)
        axs['b)'].plot(PionSupressionT ,ElectronEfficencyT, color='red',linewidth=2.0)
        QualityList = [Quality(e,p) for e,p in zip( ElectronEfficencyT,PionSupressionT)]
        MaxIndx = QualityList.index(max(QualityList))
        axs['b)'].scatter([PionSupressionT[MaxIndx]],[ElectronEfficencyT[MaxIndx]] ,label = "max. Significance in singel Classifier" , color='red')
        print([PionSupressionT[MaxIndx]],[ElectronEfficencyT[MaxIndx]])
       
        x = np.linspace(0, xlim, 100)
        y = np.linspace(0, 1, 100)
        z = np.array([Quality(e,p) for e in y for p in x])
        Z = z.reshape(100, 100)
        X1, Y1 = np.meshgrid(x, y)
        c = axs['b)'].pcolormesh(X1, Y1, Z,alpha=0.5)
        cbar = fig.colorbar(c, ax=axs['b)'])
        cbar.set_label('Significance', rotation=270, labelpad=15)
        
     
        sort = sorted(  PolygonPatch(alpha_shape, alpha=0.3).get_path().vertices, key=itemgetter(0))
        filtered = list(filter(lambda c : (c[0]-1)**2+c[1]**2 >1,  sort))
        PionSupressionS  = [filtered[i][0] for i in range(len(filtered))]
        ElectronEfficencyS  = [filtered[i][1] for i in range(len(filtered))]
        axs['b)'].plot(PionSupressionS, ElectronEfficencyS)
        QualityList = [Quality(e,p) for e,p in zip(ElectronEfficencyS, PionSupressionS)]
        MaxIndx = QualityList.index(max(QualityList))
        axs['b)'].scatter([PionSupressionS[MaxIndx]],[ElectronEfficencyS[MaxIndx]] ,label = "max. Significance in AddROC " , color='blue' )
        print([PionSupressionS[MaxIndx]],[ElectronEfficencyS[MaxIndx]])
        axs['b)'].set_ylim([0,1])
        axs['b)'].set_xlim([0,xlim])
        axs['b)'].legend(loc='lower right')
        axs['b)'].set_xlabel("Hadron Purity")
        plt.savefig("AlphaALL.pdf")
        plt.show()
        return alpha_shape
    
    
    
    
    
    
    ##########################Main######################
    
    Quality = lambda e,p: e**2/np.sqrt((24*p+1*e)**2) if (e!=0 or p!=0) else 0
    
    
    
    
    Electron =  pd.DataFrame( np.load("Electron.npy").T, columns=list(np.load("PapaBIB3.npy")))  
    Hadron =   pd.DataFrame(  np.load("Hadron.npy").T, columns=list(np.load("PapaBIB3.npy"))) 
    Pion =  pd.DataFrame( np.load("Pion.npy").T , columns=list(np.load("PapaBIB3.npy")))   
    
    
    
    TrainVar = ['P','phi','theta',  'fAaxis','fBaxis','fPhi','RadialPosition','RadialAngle','fChi2NDF','NoRichHit','distance', 'NoTrdHit', 'Likelihood','DeltaBetaEL','fCharge','fChi2']
    
    TV1 = ['P','phi','theta',  'fAaxis','fBaxis','fPhi','RadialPosition','RadialAngle','fChi2NDF','NoRichHit','distance', 'NoTrdHit', 'Likelihood','DeltaBetaEL','fCharge','fChi2']
    TV2 = ['P','phi','theta',  'fAaxis','fBaxis','fPhi','RadialPosition','RadialAngle','fChi2NDF','NoRichHit','distance', 'NoTrdHit', 'Likelihood','fCharge','fChi2']
    TV3 = ['P','phi','theta', 'NoTrdHit', 'Likelihood','DeltaBetaEL','fCharge','fChi2']
    TV4 =  ['P','phi','theta', 'NoTrdHit', 'Likelihood','fCharge','fChi2']
    TV5 =   ['P','phi','theta',  'fAaxis','fBaxis','fPhi','RadialPosition','RadialAngle','fChi2NDF','NoRichHit','distance','fCharge','fChi2']
    
    
    X_trainL,  y_trainL, X_testL, y_testL = [],[],[],[]
    for TR in [TV1,TV2, TV3, TV4, TV5 ]:
        print(TR)
        Like = PID( test_size = 0.5, electrons=10000,  hadrons=10000,train_var = TR, cut = [['NoTrdHit',1,99],['NoRichHit',6,99],['beta',0.01,99999]])
        X_trainL.append( Like.X_train)
        y_trainL.append( Like.y_train)
        X_testL.append( Like.X_test)
        y_testL.append(Like.y_test)
    
    
    
    for i,Ne  in enumerate([2000,2000,2000,2000,2000]):
        Run_RandomForest(i,Ne)
        TestModel(i)
    
    # ALL = PID( test_size = 0.5, electrons=50000,  hadrons=50000,train_var = TrainVar , cut = None)
    # Run_RF_compair()
    # TestModelcompair()
    alpha_shape = PlotROCall()
    
    
    
    
    
    ###################################################################