-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathGaussian_model.py
More file actions
114 lines (76 loc) · 2.77 KB
/
Gaussian_model.py
File metadata and controls
114 lines (76 loc) · 2.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.decomposition import PCA
from sympy.stats import covariance
import matplotlib.pyplot as plt
def TrainGaussian(X_train,y_train):
labels = np.unique(y_train)
mean =[]
covariance = []
for lb in labels:
label_data = X_train[y_train == lb].T
mean.append(np.mean(label_data,axis=1))
covariance.append(np.cov(label_data))
return labels,mean,covariance
def log_multivariate_pdf(x,mean,cov):
x_m = x - mean
cov_inv = np.linalg.inv(cov)
det_cov = np.linalg.det(cov)
d = x.shape[1]
first_part = np.sum(x_m @ cov_inv * x_m,axis=1)
second_part = d * np.log(2 * np.pi) + np.log(det_cov)
log_prob = -0.5 * (first_part + second_part)
return log_prob
def TestGaussian(X_test,mean,covariance,labels):
rows = X_test.shape[0]
nlb = len(labels)
likelihoods = np.zeros( (rows,nlb))
for i in range(nlb):
likelihoods[:,i] = log_multivariate_pdf(X_test,mean[i],covariance[i])
indices = np.argmax(likelihoods,axis=1)
y_pred = []
for i,val in enumerate(indices):
y_pred.append(labels[val])
return y_pred,likelihoods
def roc_curve(y_true,y_proba):
y_true = np.array(y_true)
thresholds = np.array(np.unique(y_proba))
thresholds = np.sort(thresholds)[::-1]
P = np.sum(y_true == 1)
N = np.sum(y_true == 0)
TPR = np.zeros(len(thresholds))
FPR = np.zeros(len(thresholds))
for i,t in enumerate(thresholds):
y_pred = (y_proba >= t).astype(int)
TP = np.sum((y_pred == 1) & (y_true == 1))
FP = np.sum((y_pred == 1) & (y_true == 0))
TPR[i] = TP / P
FPR[i] = FP / N
return FPR,TPR,thresholds
mnist = fetch_openml('mnist_784',version=1,as_frame=False)
X,y = mnist.data,mnist.target
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=10)
pca = PCA(n_components=40)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)
class_labels,mio,cov=TrainGaussian(X_train,y_train)
y_pred,liklihoods=TestGaussian(X_test,mio,cov,class_labels)
accuracy = np.mean(y_pred==y_test)
print(f'Accuracy: {accuracy * 100:.2f}%',)
print(f'Error Rate: {(1 - accuracy) * 100:.2f}%',)
fig ,axes = plt.subplots(2,5,figsize=(18,8))
axes = axes.ravel()
for i,label in enumerate(class_labels):
y_true_class = (y_test == label).astype(int)
y_proba = liklihoods[:,i]
fpr,tpr,threshold = roc_curve(y_true_class,y_proba)
axes[i].plot(fpr, tpr, label=f'Class {label}')
axes[i].set_title(f'ROC - Class {label}')
axes[i].set_xlabel('FPR')
axes[i].set_ylabel('TPR')
axes[i].legend()
plt.tight_layout()
plt.savefig('Gaussian_Roc_plots')
plt.show()