File indexing completed on 2026-04-05 08:09:07
0001
0002 import pandas
0003 from pandas.tools.plotting import scatter_matrix
0004 import matplotlib.pyplot as plt
0005 from sklearn import model_selection
0006 from sklearn.metrics import classification_report
0007 from sklearn.metrics import confusion_matrix
0008 from sklearn.metrics import accuracy_score
0009 from sklearn.linear_model import LogisticRegression, RidgeClassifier
0010 from sklearn.tree import DecisionTreeClassifier
0011 from sklearn.neighbors import KNeighborsClassifier, RadiusNeighborsClassifier
0012 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
0013 from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
0014 from sklearn.neural_network import MLPClassifier
0015 from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
0016 from sklearn.gaussian_process import GaussianProcessClassifier
0017 from sklearn.gaussian_process.kernels import RBF
0018 from sklearn.svm import SVC
0019 from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, ExtraTreesClassifier
0020 from sklearn.cluster import SpectralClustering
0021 from sklearn.mixture import GaussianMixture
0022 from sklearn.cluster import KMeans
0023 from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
0024 from sklearn.calibration import CalibratedClassifierCV
0025 from pandas.tools.plotting import andrews_curves
0026 from pandas.tools.plotting import parallel_coordinates
0027 from pandas.tools.plotting import radviz
0028 import seaborn as sns
0029 import numpy as np
0030
0031
0032
0033
0034 path = "./data/JetSummary_p250_e20_1000events_r05.csv"
0035
0036 names = ['n_track','charge_tot','eta','vertex','class']
0037 dataset = pandas.read_csv(path, names=names)
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062
0063
0064
0065
0066
0067
0068 array = dataset.values
0069
0070
0071 X = array[:,0:4]
0072 Y = array[:,4]
0073 validation_size = 0.60
0074 seed = 7
0075 X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X, Y, test_size=validation_size, random_state=seed)
0076
0077 ding = np.column_stack((X_train,Y_train))
0078 print(ding)
0079 print(ding.dtype)
0080 dong = np.column_stack((X_validation,Y_validation))
0081
0082
0083
0084
0085
0086
0087
0088 seed = 7
0089 scoring = 'accuracy'
0090
0091
0092 models = []
0093 models.append(('LR', LogisticRegression()))
0094 models.append(('LDA', LinearDiscriminantAnalysis()))
0095
0096 models.append(('KNN', KNeighborsClassifier()))
0097 models.append(('CART', DecisionTreeClassifier()))
0098 models.append(('GNB', GaussianNB()))
0099
0100 models.append(('SVMlin', SVC(kernel="linear", C=0.025)))
0101
0102
0103
0104 models.append(('RFC', RandomForestClassifier()))
0105 models.append(('ADA', AdaBoostClassifier(base_estimator=None, n_estimators=50, learning_rate=1.0, algorithm='SAMME.R', random_state=None)))
0106 models.append(('ADA3', AdaBoostClassifier(base_estimator=None, n_estimators=100, learning_rate=0.5, algorithm='SAMME.R', random_state=None)))
0107
0108
0109
0110
0111
0112
0113
0114
0115
0116
0117 results = []
0118 names = []
0119 for name, model in models:
0120 kfold = model_selection.KFold(n_splits=10, random_state=seed)
0121 cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
0122 results.append(cv_results)
0123 names.append(name)
0124 msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
0125 print(msg)
0126
0127
0128
0129
0130
0131
0132
0133
0134
0135 ada = AdaBoostClassifier()
0136 ada.fit(X_train, Y_train)
0137 predictions = ada.predict(X_validation)
0138
0139
0140
0141
0142
0143 print(accuracy_score(Y_validation, predictions))
0144 print(confusion_matrix(Y_validation, predictions))
0145 print(classification_report(Y_validation, predictions))
0146
0147
0148 plt.show()