Skip to content
Snippets Groups Projects
Select Git revision
  • main
  • h.schiller-main-patch-61561
2 results

Combined_Methods_2.0.py

Blame
  • Combined_Methods_2.0.py 9.63 KiB
    from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
    from sklearn.model_selection import cross_val_score
    from sklearn.model_selection import train_test_split
    from scipy.spatial import ConvexHull, convex_hull_plot_2d
    from itertools import compress, product
    import ROOT
    from hipe4ml.model_handler import ModelHandler
    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd
    import seaborn as sn
    import os
    import joblib
    from sklearn.ensemble import RandomForestClassifier
    import matplotlib.transforms as mtransforms
    from sklearn.metrics import roc_curve, roc_auc_score
    import random
    import alphashape
    from descartes import PolygonPatch
    from operator  import itemgetter
    from PIDclass import *
    import pickle
    import xgboost as xgb
    ####################
    #1 = 'TRD&TOF&RICH'
    #2 = ,'TRD&RICH',
    #3 = 'TRD&TOF', 
    #4 = 'TRD'
    #5 =  'RICH'
    
    
    
    
    def Run_RF_compair():
        rf_model = RandomForestClassifier(n_estimators=500, verbose=True, n_jobs = -1)
        rf_model.fit( ALL.X_train, ALL.y_train)
        joblib.dump(rf_model, "./AllRFClassifierTest.joblib")
        return rf_model
    
    
    
    def Run_xg_reg():
        model_clf = xgb.XGBClassifier()
        model_hdl = ModelHandler(model_clf, TrainVar)
        hyper_pars_ranges = {'n_estimators': (100, 1000), 'max_depth': (
            2, 10), 'learning_rate': (0.01, 0.1)}
        model_hdl.optimize_params_optuna(train_test_data, hyper_pars_ranges, cross_val_scoring='roc_auc', timeout=120,
                                         n_jobs=-1, n_trials=100, direction='maximize')
    
        xg_reg = model_hdl.model
        xg_reg.fit(ALL.X_train, ALL.y_train)
        joblib.dump(xg_reg, "./AllXGBRegressor.joblib")
        return xg_reg
    
    
    def Test_Single_xg(): 
        xg_reg = joblib.load("./AllXGBRegressor.joblib")
        y_predict_gb = xg_reg.predict_proba(ALL.X_test)
    
        np.save("YTest" ,np.array(ALL.y_test) ) 
        np.save("y_predict_xgTest"  ,np.array(y_predict_gb) ) 
    
    def Test_Single_RF(): 
        rf_model = joblib.load("./AllRFClassifierTest.joblib")
        y_predict_rf = rf_model.predict_proba(ALL.X_test)[:,1]
        np.save("YTest" ,np.array(ALL.y_test) ) 
        np.save("y_predict_rfTest"  ,np.array(y_predict_rf) ) 
    
    
    
    def Run_RandomForest(i,Ne):
        rf_model = RandomForestClassifier(n_estimators=Ne, verbose=True, n_jobs = -1)
        rf_model.fit(X_trainL[i], y_trainL[i])
        joblib.dump(rf_model, "./AllRFClassifier"+str(i)+".joblib")
        return rf_model
    
    
    
    def TestModel(i): 
        rf_model = joblib.load("./AllRFClassifier"+str(i)+".joblib")
        y_predict_rf = rf_model.predict_proba(X_testL[i])[:,1]
        np.save("Ytrue"+str(i) ,np.array( y_testL[i]) ) 
        np.save("y_predict_rftrue" +str(i) ,np.array(y_predict_rf) ) 
    
    
    ########################################################################################
    
       
    
    def PlotROCall():   
        fig, axs = plt.subplot_mosaic([['a)','b)'],['c)','d)']], constrained_layout=True, figsize=(9,5))
        for j, labelz, letter, pion, ele in  list(zip([0, 1, 2, 3],['TRD&TOF&RICH','TRD&RICH','TRD&TOF','TRD'],['a)','b)','c)' , 'd)'],[0.06,0.01,0.71,0.22],[0.64, 0.09, 0.15, 0.12])):
            Y = np.load("Ytrue"+str(j) +".npy")
            proba = np.load("y_predict_rftrue"+str(j)+".npy") 
            YC = np.load("YtrueC"+str(i) +".npy")
            probaC = np.load("y_predict_rftrueC"+str(i)+".npy") 
            PionSupressionC, ElectronEfficencyC, SC = roc_curve(Y, proba)
            axs[letter].plot(pion*PionSupressionC,ele*ElectronEfficencyC,linewidth=2.0, linestyle = 'dashed', c = 'green')
            PionSupression, ElectronEfficency, S = roc_curve(Y, proba)
          
            if j == 0: 
                P_Alt = pion*PionSupression
                E_Alt  = ele*ElectronEfficency
                S_Alt = [[s] for s in S]
                e_Alt = [[e] for e in ele*ElectronEfficency]
                p_Alt = [[p] for p in pion*PionSupression]
                
            else: 
                PionSupressionS = [a+b for a in  P_Alt for b in pion*PionSupression] 
                ElectronEfficencyS =  [a+b for a in  E_Alt for b in ele*ElectronEfficency]  
                S_List = [s_alt+ [s] for s_alt in S_Alt  for s in  S ]
                E_List = [e_alt+ [b] for e_alt in e_Alt  for b in   ele*ElectronEfficency]
                P_List = [p_alt+ [b] for p_alt in p_Alt  for b in  pion*PionSupression ]
                points = np.column_stack((PionSupressionS,ElectronEfficencyS) )
                hull = ConvexHull(points)
                P_Alt = [ PionSupressionS[i] for i in hull.vertices ]
                E_Alt =  [ ElectronEfficencyS[i] for i in hull.vertices ]
                S_Alt = [S_List[i] for i in hull.vertices]
                e_Alt = [E_List[i] for i in hull.vertices]
                p_Alt = [P_List[i] for i in hull.vertices]
                
            axs[letter].plot(pion*PionSupression,ele*ElectronEfficency,linewidth=2.0)
            axs[letter].set_xlabel("Hadron Supression")
            axs[letter].set_title(labelz)
            axs[letter].grid()
            axs[letter].set_ylim([0,ele])
            axs[letter].set_xlim([0,pion])
    
        Filter = [p <= e and p != 0 for p,e in zip(P_Alt ,E_Alt)]
        P_Alt = list(compress(P_Alt, Filter)) + [0]
        E_Alt = list(compress(E_Alt, Filter)) + [0]
        S_Alt = list(compress(S_Alt, Filter)) + [[1, 1, 1, 1]]
        QualityList = [Quality(e,p) for e,p in zip( E_Alt,P_Alt)]
        MaxIndx = QualityList.index(max(QualityList))
        E = e_Alt[MaxIndx]  
        P = p_Alt[MaxIndx]  
       
        axs['a)'].set_ylabel("Electron Efficency")
        axs['c)'].set_ylabel("Electron Efficency")
        axs['a)'].scatter(P[0], E[0] ,s=40, c = 'r')
        axs['b)'].scatter(P[1],E[1]  ,s=40, c = 'r')
        axs['c)'].scatter(P[2],E[2]  ,s=40, c = 'r')
        axs['d)'].scatter(P[3], E[3] ,s=40, c = 'r')
        axs['a)'].locator_params(nbins=3)
        axs['b)'].locator_params(nbins=4)
        axs['c)'].locator_params(nbins=3)
        axs['d)'].locator_params(nbins=3)
        plt.savefig("ROCsALL.pdf")
        plt.show()
        with open("SPE_Alt.pkl", 'wb') as f:
            pickle.dump([S_Alt,P_Alt,E_Alt ], f)
       
            
            
    def PlotROC():
        with open("SPE_Alt.pkl", 'rb') as f:
            S_Alt,P_Alt,E_Alt  = pickle.load(f)
    
        fig, axs = plt.subplot_mosaic([['a)','b)']], constrained_layout=True, figsize=(8,4))
        xlim = 0.004
        axs['a)'].set_xlabel("Hadron Purity")
        axs['a)'].set_ylabel("Electron Efficency")
        axs['a)'].grid()
        
        Y = np.load("YTest.npy") 
        proba = np.load("y_predict_rfTest.npy") 
        probaXG = np.load("y_predict_xgTest.npy")[:,1]
        PionSupressionT, ElectronEfficencyT, S = roc_curve(Y, proba)
        PionSupressionXG, ElectronEfficencyXG, S_XG = roc_curve(Y, probaXG)
        axs['a)'].plot(PionSupressionXG ,ElectronEfficencyXG, color='green',linewidth=2.0, label = "single XG Boost")
        axs['b)'].plot(PionSupressionXG ,ElectronEfficencyXG, color='green',linewidth=2.0, label = "single XG Boost")
        axs['a)'].plot(PionSupressionT ,ElectronEfficencyT, color='blue',linewidth=2.0, label = "single clf")
        axs['a)'].plot(P_Alt ,  E_Alt , color='red',linewidth=2.0, label = "Add Roc")
        axs['b)'].plot(P_Alt ,  E_Alt , color='red',linewidth=2.0)
        axs['b)'].plot(PionSupressionT ,ElectronEfficencyT, color='blue',linewidth=2.0)
        QualityList = [Quality(e,p) for e,p in zip( ElectronEfficencyT,PionSupressionT)]
        MaxIndx = QualityList.index(max(QualityList))
        axs['b)'].scatter([PionSupressionT[MaxIndx]],[ElectronEfficencyT[MaxIndx]] ,label = "max. Significance in singel Classifier" , color='red')
        axs['a)'].legend(loc='lower right')
       
        x = np.linspace(0, xlim, 100)
        y = np.linspace(0, 1, 100)
        z = np.array([Quality(e,p) for e in y for p in x])
        Z = z.reshape(100, 100)
        X1, Y1 = np.meshgrid(x, y)
        c = axs['b)'].pcolormesh(X1, Y1, Z,alpha=0.5)
        cbar = fig.colorbar(c, ax=axs['b)'])
        cbar.set_label('Significance', rotation=270, labelpad=15)
        
     
    
        axs['b)'].set_ylim([0,1])
        axs['b)'].set_xlim([0,xlim])
        axs['b)'].legend(loc='lower right')
        axs['b)'].set_xlabel("Hadron Purity")
        plt.savefig("AlphaALL.pdf")
        plt.show()
    
    
    
    
    
    
    
    ##########################Main######################
    
    Quality = lambda e,p: e**2/np.sqrt((24*p+1*e)**2) if (e!=0 or p!=0) else 0
    
    
    TrainVar = ['P','phi','theta',  'fAaxis','fBaxis','fPhi','RadialPosition','RadialAngle','fChi2NDF','NoRichHit','distance', 'NoTrdHit', 'Likelihood','DeltaBetaEL','fCharge','fChi2']
    TV1 = ['P','phi','theta',  'fAaxis','fBaxis','fPhi','RadialPosition','RadialAngle','fChi2NDF','NoRichHit','distance', 'NoTrdHit', 'Likelihood','DeltaBetaEL','fCharge','fChi2']
    TV2 = ['P','phi','theta',  'fAaxis','fBaxis','fPhi','RadialPosition','RadialAngle','fChi2NDF','NoRichHit','distance', 'NoTrdHit', 'Likelihood','fCharge','fChi2']
    TV3 = ['P','phi','theta', 'NoTrdHit', 'Likelihood','DeltaBetaEL','fCharge','fChi2']
    TV4 =  ['P','phi','theta', 'NoTrdHit', 'Likelihood','fCharge','fChi2']
    
    X_trainL,  y_trainL, X_testL, y_testL = [],[],[],[]
    for TR,cut in zip([TV1,TV2, TV3, TV4 ],[[['NoTrdHit',1,99],['NoRichHit',6,99],['beta',0.01,99999]], [['NoTrdHit',1,99],['NoRichHit',6,99],['beta',0,0]], [['NoTrdHit',1,99],['NoRichHit',0,5],['beta',0.01,999]] , [['NoTrdHit',1,99],['NoRichHit',0,5],['beta',0,0]]]):
        Like = PID( test_size = 0.5, electrons=10000,  hadrons=10000,train_var = TR, cut = cut)
        X_trainL.append( Like.X_train)
        y_trainL.append( Like.y_train)
        X_testL.append( Like.X_test)
        y_testL.append(Like.y_test)
    
    
    for i,Ne  in enumerate([1000,1000,1000,1000]):
        Run_RandomForest(i,Ne)
        TestModel(i)
    
    #path = Path('~/Dokumente/jun22_Test/').expanduser()
    ALL = PID(test_size = 0.5, electrons=50000,  hadrons=50000,train_var = TrainVar , cut = None)
    train_test_data =  [pd.DataFrame(ALL.X_trainTotal, columns=list(np.load("PapaBIB3.npy"))), ALL.y_train, pd.DataFrame(ALL.X_testTotal, columns=list(np.load("PapaBIB3.npy"))),ALL.y_test ]
    
    Run_RF_compair()
    Test_Single_RF()
    Run_xg_reg()
    Test_Single_xg()
    PlotROCall()
    PlotROC()
    
    
    
    
    ###################################################################