Skip to content
Snippets Groups Projects
Select Git revision
  • master
  • cbm_master
  • updated_ecp5_serdes
  • GbeLargeSctrl
  • dirich5s1_new_reset_cdr_fix
  • blackcat
  • deepsea
  • cbm_rich
  • trb5_GbE
  • retransmission
  • syncTRB
  • fixedIP
  • update
  • newGBE
  • oldGBE
15 results

signal_sync.vhd

Blame
  • TRDclf.py 13.84 KiB
    from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
    from sklearn.datasets import make_hastie_10_2
    from sklearn.ensemble import GradientBoostingClassifier
    from keras.models import Sequential
    from keras.layers import Dense
    from keras.layers import Dropout
    from keras.layers import Flatten
    from keras.models import Sequential
    from tensorflow import keras
    from keras.wrappers.scikit_learn import KerasClassifier
    from tensorflow.keras.optimizers import SGD
    from sklearn.model_selection import cross_val_score
    from sklearn.preprocessing import LabelEncoder
    from sklearn.model_selection import StratifiedKFold
    from sklearn.preprocessing import StandardScaler
    from sklearn.pipeline import Pipeline
    from keras.layers.convolutional import Conv2D
    from keras.layers.convolutional import MaxPooling2D
    from keras.utils import np_utils
    from sklearn.metrics import confusion_matrix
    from sklearn.model_selection import train_test_split
    import ROOT
    import matplotlib.pyplot as plt
    import numpy as np
    import random
    from keras.callbacks import Callback
    import math
    from sklearn.preprocessing import StandardScaler
    import pandas as pd
    import seaborn as sn
    import os
    import joblib
    from keras.utils import np_utils
    from sklearn.datasets import load_iris
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import confusion_matrix, precision_recall_curve, roc_auc_score, roc_curve, accuracy_score
    import matplotlib.transforms as mtransforms
    from keras.utils.vis_utils import plot_model
    import matplotlib.image as mpimg
    from sklearn.metrics import roc_curve, roc_auc_score
    from PIDclass import *
    import pickle
    import xgboost as xgb
    from hipe4ml.model_handler import ModelHandler
    from matplotlib.ticker import NullFormatter, MaxNLocator
    from itertools import compress, product
    
    from matplotlib.colors import ListedColormap
    
    from sklearn.preprocessing import StandardScaler
    
    from sklearn.neural_network import MLPClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import SVC
    from sklearn.gaussian_process import GaussianProcessClassifier
    from sklearn.gaussian_process.kernels import RBF
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
    
    
    ######################################Load DATA CLASSES####################################
    
    # Reload = False
    # TrainVar = TrainVar = ['TRDe1','TRDe2','TRDe3','TRDe4'] #'NoTrdHit'
    # if Reload == False:
    #     Like = PID( test_size = 0.5, electrons=50000,  hadrons=50000,train_var = TrainVar, cut = [['NoTrdHit',4,99]])
    #     with open('PID.pkl', 'wb') as file:
    #         pickle.dump(Like, file)
    # if Reload == True:
    #     with open('PID.pkl', 'rb') as file:
    #         Like = pickle.load(file)
        
    
    # train_test_data = [pd.DataFrame(Like.X_train, columns=TrainVar)  ,Like.y_train, pd.DataFrame(Like.X_test, columns=TrainVar)  ,Like.y_test ]
    # np.save("Y",np.array(Like.y_test) ) 
    # y_predict_Likelihood = np.array(list(Like.X_testTotal))[:,list(np.load("PID_BIB.npy")).index('Likelihood')] #here the data is already squeezed between 0 and 1
    # np.save("y_predict_Likelihood" ,np.array(y_predict_Likelihood) ) 
    
        
    
    
    
    
    ###########################################################################################
    ###RUN Models#############
    
    
    def runANN():
        ann_model = Sequential([
            Dense(18, input_shape=(len(TrainVar),), activation="sigmoid"),
            Dense(9, activation="relu"),
            Dense(2, activation='softmax')
        ])
    
        ann_model.compile(SGD(lr = .05), "binary_crossentropy", metrics=["accuracy"])
        ann_model.summary()
        run_history = ann_model.fit(np.array(list(Like.X_train)),np_utils.to_categorical(Like.y_train), validation_data=(np.array(list(Like.X_test)), np_utils.to_categorical(Like.y_test)), epochs=30, batch_size=30)
        joblib.dump(ann_model, "./RichANN.joblib")
        y_predict_ANN = ann_model.predict(np.array(list(Like.X_test)))[:,1]
        np.save("y_predict_ANN",np.array(y_predict_ANN) ) 
        return ann_model, run_history 
    
    
    
    def Run_RandomForest():
        rf_model = RandomForestClassifier(n_estimators=500, verbose=True, n_jobs = -1)
        rf_model.fit(Like.X_train[:10000], Like.y_train[:10000])
        joblib.dump(rf_model, "./RichRFClassifier.joblib")
        y_predict_rf = rf_model.predict_proba(Like.X_test)[:,1]
        np.save("y_predict_rf",np.array(y_predict_rf) ) 
        return rf_model
    
    
    def Run_GraadientBoost():
        model_clf = xgb.XGBClassifier()
        model_hdl = ModelHandler(model_clf, TrainVar)
    
    
        hyper_pars_ranges = {'n_estimators': (200, 1000), 'max_depth': (
            2, 4), 'learning_rate': (0.01, 0.1)}
        model_hdl.optimize_params_optuna(train_test_data, hyper_pars_ranges, cross_val_scoring='roc_auc', timeout=120,
                                         n_jobs=-1, n_trials=100, direction='maximize')
    
        model_hdl.train_test_model(train_test_data)
        y_predict_gb = model_hdl.predict(train_test_data[2], False)
        joblib.dump(model_hdl.model, "./RichGradientBoostingClassifier.joblib")
        np.save("y_predict_gb",np.array(y_predict_gb) ) 
        return model_hdl.model
    
       
    def Run_Nearest():
        clf = KNeighborsClassifier(20)
        clf.fit(Like.X_train, Like.y_train)
        y_predict = clf.predict_proba(Like.X_test)[:,1]
        np.save("y_predict_nn",np.array(y_predict)) 
    
    
    def Run_SVC():
        clf = SVC(gamma=2, C=1, verbose=True)
        clf.fit(Like.X_train, Like.y_train)
        y_predict = clf.predict_proba(Like.X_test)[:,1]
        np.save("y_predict_svc",np.array(y_predict)) 
        
    
    def Run_MLP():
        clf = MLPClassifier(alpha=1, max_iter=1000, verbose=True)
        clf.fit(Like.X_train, Like.y_train)
        y_predict = clf.predict_proba(Like.X_test)[:,1]
        np.save("y_predict_mlp",np.array(y_predict)) 
        
    def Run_GNB():
        clf = GaussianNB()
        clf.fit(Like.X_train, Like.y_train)
        y_predict = clf.predict_proba(Like.X_test)[:,1]
        np.save("y_predict_gnb",np.array(y_predict)) 
    
    def Run_QDA():
        clf = QuadraticDiscriminantAnalysis()
        clf.fit(Like.X_train, Like.y_train)
        y_predict = clf.predict_proba(Like.X_test)[:,1]
        np.save("y_predict_qda",np.array(y_predict)) 
    
    # GaussianProcessClassifier(1.0 * RBF(1.0)),
    
    # MLPClassifier(alpha=1, max_iter=1000),
    # AdaBoostClassifier(),
    # GaussianNB(),
    # QuadraticDiscriminantAnalysis()
    
    
    
    
    ######################################################################
    
    
    
    
       
    def PlotALL():
    
        fig, axs = plt.subplot_mosaic([['a)']], constrained_layout=True, figsize=(4,4))
       
        for proba, labelz in  zip([y_predict_ANN, y_predict_rf, y_predict_gb, -y_predict_RICHANN, y_predict_nn, y_predict_gnb, y_predict_mlp , y_predict_qda],['ANN','Random Forest','XGBoost','ANN in cbmroot','Nearest nabours', "GaussianNB", 'MLP', 'QDA']):
            PionSupression, ElectronEfficency, _ = roc_curve(Y, proba)
            axs['a)'].plot(PionSupression,ElectronEfficency,label = labelz, lw = 2)
           
            
        axs['a)'].set_xlabel("Pion Impurity")
        axs['a)'].set_ylabel("Electron Efficiency")
        axs['a)'].set_ylim([0,1])
        axs['a)'].set_xlim([0,1])
        axs['a)'].grid()
        axs['a)'].legend(loc='lower right')
        plt.savefig("ROCsRichAll.pdf")
        plt.show()
    
    
    
       
    def PlotElectronEffvsPionSupQuality():
        xlim = 0.05
        fig, axs = plt.subplot_mosaic([['a)','b)']], constrained_layout=True, figsize=(9,4))
    
      
       
        for proba, labelz in  zip([ y_predict_rf, y_predict_gb, y_predict_Likelihood],['Random Forest','XGBoost','Likelihood']):
            PionSupression, ElectronEfficency, _ = roc_curve(Y, proba)
            axs['a)'].plot(PionSupression,ElectronEfficency,label = labelz)
            axs['b)'].plot(PionSupression,ElectronEfficency)
            QualityList = [Quality(e,p) for e,p in zip(ElectronEfficency,PionSupression)]
            MaxIndx = QualityList.index(max(QualityList))
            axs['b)'].scatter([PionSupression[MaxIndx]],[ElectronEfficency[MaxIndx]] ,label = "max. Significance in " + labelz)
            print('the Best cut vaule is s=',MaxIndx*0.01)
            print('The Best Electron Efficiency is',ElectronEfficency[MaxIndx])
            print('With an Pion suppression of',PionSupression[MaxIndx])
       
       
        x = np.linspace(0, xlim, 100)
        y = np.linspace(0, 1, 100)
        z = np.array([Quality(e,p) for e in y for p in x])
        Z = z.reshape(100, 100)
        X1, Y1 = np.meshgrid(x, y)
        c = axs['b)'].pcolormesh(X1, Y1, Z,alpha=0.5)
        cbar = fig.colorbar(c, ax=axs['b)'])
        cbar.set_label('Significance', rotation=270 , labelpad=15)
        
        
        axs['a)'].set_xlabel("Hadron Impurity")
        axs['a)'].set_ylabel("Electron Efficiency")
        axs['a)'].set_ylim([0,1])
        axs['a)'].set_xlim([0,1])
        axs['a)'].grid()
        axs['a)'].legend(loc='lower right')
        axs['b)'].legend(loc='lower right')
        axs['b)'].set_xlabel("Hadron Impurity")
        axs['b)'].set_ylabel("Electron Efficiency")
        axs['b)'].set_ylim([0,1])
        axs['b)'].set_xlim([0,xlim])
        axs['b)'].grid()
        plt.savefig("ROCsTRD1.pdf")
        plt.show()
    
    
    
    
    def PlotReport():
        fig, axs = plt.subplot_mosaic([['a)']], constrained_layout=True, figsize=(5,3))
       
        for proba, labelz in  zip([y_predict_ANN, y_predict_rf, y_predict_gb, -y_predict_RICHANN],['ANN','Random Forest','GradientBoost','ANN implemented in cbmroot']):
            PionSupression, ElectronEfficency, _ = roc_curve(Y, proba)
            axs['a)'].plot(PionSupression,ElectronEfficency,label = labelz)
          
           
        axs['a)'].set_xlabel("Pion Impurity")
        axs['a)'].set_ylabel("Electron Efficiency")
        axs['a)'].set_ylim([0,1])
        axs['a)'].set_xlim([0,1])
        axs['a)'].grid()
        axs['a)'].legend(loc='lower right')
       
        plt.savefig("ROCReport.pdf")
        plt.show()
    
    
    
    def PlotANNoutputDesitys():
        fig, axs = plt.subplot_mosaic([['a)','b)']], constrained_layout=True, figsize=(9,4))
    
    
        for predict,label in  zip([y_predict_Likelihood, y_predict_rf, y_predict_gb ],['Likelihood','Random Forest','XGBoost']):
            pions = [predict[i] for i in range(len(Y)) if Y[i]==0]
            electrons =  [predict[i] for i in range(len(Y)) if Y[i]==1]
            axs['b)'].hist(pions, 80, density=True, histtype='step', facecolor='g',
                       alpha=0.75, label = label,linewidth=2)
            axs['a)'].hist(electrons, 80, density=True, histtype='step', facecolor='g',
                       alpha=0.75, label= label, linewidth=2)
        axs['b)'].set_title("Pions")
        axs['a)'].set_title("Electrons")
        axs['a)'].set_xlabel("Electron Probability")
        axs['b)'].set_xlabel("Electron Probability")
        axs['a)'].set_ylabel("Denesty")
        axs['a)'].grid()
        axs['b)'].grid()
        plt.legend()
        plt.savefig("DensTRD.pdf")
        plt.show()
    
    
    
    def PlotEtinne126():   
        p = train_test_data[2]['P']
        y = y_predict_gb
        tf = train_test_data[3].astype(np.bool)
        fig, axs = plt.subplot_mosaic([['a)', 'b)' ,'c)']], constrained_layout=True, figsize=(9,3.3))
        nullfmt = NullFormatter()
        axs['c)'].yaxis.set_major_formatter(nullfmt)
        axs['b)'].yaxis.set_major_formatter(nullfmt)
        axs['a)'].locator_params(axis='x', nbins=4)
        axs['a)'].set_ylim([0,1])
        H, xedges, yedges = np.histogram2d(list(compress(p, tf)), list(compress(y, tf)), normed=1 ,range = [[0, 10], [0, 1]], bins=50)
        H = H.T
        H = H / H.max(axis=0)
        axs['a)'].pcolormesh(xedges,yedges,H)
        axs['a)'].set_xlabel('Momentum in GeV/c')
        axs['a)'].set_ylabel("XGBoost Output")
        axs['a)'].set_title("Electrons")
        axs['b)'].set_ylim([0,1])
        H, xedges, yedges = np.histogram2d(list(compress(p, ~tf)),  list(compress(y, ~tf)) , normed=1 ,range = [[0, 10], [0, 1]], bins=50)
        H = H.T
        H = H / H.max(axis=0)
        axs['b)'].pcolormesh(xedges,yedges,H)
        axs['b)'].set_xlabel('Momentum in GeV/c')
        axs['b)'].set_title("Pions")
        
        axs['c)'].hist(list(compress(y, tf)), 80,range=[0, 1],density=True, histtype='step', facecolor='g',
                   alpha=0.75, label = "Electrons", orientation='horizontal',linewidth=2)
        axs['c)'].hist( list(compress(y, ~tf)) , 80,  range=[0, 1],density=True, histtype='step', facecolor='g',
                   alpha=0.75, label = "Pions", orientation='horizontal',linewidth=2)
        axs['c)'].set_xlabel('Density')
        
        axs['c)'].legend(loc='center right')
        axs['c)'].set_ylim([0,1])
        axs['c)'].set_xlim([0,30])
        plt.savefig("Etienne126RF.pdf")
        plt.show()
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    def Plot_History():
        fig, axs = plt.subplot_mosaic([['a)','b)']], constrained_layout=True, figsize=(9,4))
    
        for label, ax in axs.items():
            # label physical distance in and down:
            trans = mtransforms.ScaledTranslation(10/72, -5/72, fig.dpi_scale_trans)
            ax.text(0.0, 1.0, label, transform=ax.transAxes + trans,
                    fontsize=12, verticalalignment='top', fontfamily='serif',
                    bbox=dict(facecolor='0.7', edgecolor='none', pad=6.0))
            
        axs['a)'].plot(run_history.history["loss"],'r', marker='.', label="Train Loss")
        axs['a)'].plot(run_history.history["val_loss"],'b', marker='.', label="Validation Loss")
        axs['a)'].set_xlabel("Epochs")
        axs['a)'].set_ylabel('loss')
        axs['a)'].legend(loc='upper right')
        plot_model(ann_model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)
        axs['b)'].imshow(mpimg.imread('model_plot.png'))
        axs['b)'].set_axis_off()
        plt.savefig("RichHist.pdf")
        plt.show()
        
    
    ##########################Main######################
    
    Quality = lambda e,p: e**2/np.sqrt((13.5*p+1*e)**2) if (e!=0 or p!=0) else 0
    
    # rf_model = Run_RandomForest()
    # clf = Run_GraadientBoost()
    # ann_model, run_history = runANN()
    # Run_Nearest()
    # Run_QDA()
    # Run_MLP()
    # Run_GNB()
    
    Y = np.load("Y.npy") 
    y_predict_gnb = np.load("y_predict_gnb.npy") 
    y_predict_mlp = np.load("y_predict_mlp.npy") 
    y_predict_qda = np.load("y_predict_qda.npy") 
    y_predict_nn = np.load("y_predict_nn.npy") 
    y_predict_rf = np.load("y_predict_rf.npy") 
    y_predict_gb = np.load("y_predict_gb.npy") 
    y_predict_ANN = np.load("y_predict_ANN.npy") 
    y_predict_Likelihood= np.load("y_predict_Likelihood.npy" ) 
    
    
    PlotElectronEffvsPionSupQuality()
    PlotANNoutputDesitys()
    PlotEtinne126()
    Plot_History()
    
    PlotReport()