Gaussian-Models/Gaussian_model.py at master · Youssef-Bahaa/Gaussian-Models · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.decomposition import PCA
from sympy.stats import covariance
import matplotlib.pyplot as plt

def TrainGaussian(X_train,y_train):
    labels = np.unique(y_train)
    mean =[]
    covariance = []
    for lb in labels:
        label_data = X_train[y_train == lb].T
        mean.append(np.mean(label_data,axis=1))
        covariance.append(np.cov(label_data))

    return labels,mean,covariance


def log_multivariate_pdf(x,mean,cov):
    x_m = x - mean
    cov_inv = np.linalg.inv(cov)
    det_cov = np.linalg.det(cov)
    d = x.shape[1]

    first_part = np.sum(x_m @ cov_inv * x_m,axis=1)
    second_part = d * np.log(2 * np.pi) + np.log(det_cov)

    log_prob = -0.5 * (first_part + second_part)

    return log_prob


def TestGaussian(X_test,mean,covariance,labels):
    rows = X_test.shape[0]
    nlb = len(labels)
    likelihoods = np.zeros( (rows,nlb))
    for i in range(nlb):
        likelihoods[:,i] = log_multivariate_pdf(X_test,mean[i],covariance[i])

    indices = np.argmax(likelihoods,axis=1)
    y_pred = []
    for i,val in enumerate(indices):
        y_pred.append(labels[val])
    return y_pred,likelihoods


def roc_curve(y_true,y_proba):
    y_true = np.array(y_true)

    thresholds = np.array(np.unique(y_proba))
    thresholds = np.sort(thresholds)[::-1]


    P = np.sum(y_true == 1)
    N = np.sum(y_true == 0)

    TPR = np.zeros(len(thresholds))
    FPR = np.zeros(len(thresholds))

    for i,t in enumerate(thresholds):
        y_pred = (y_proba >= t).astype(int)
        TP = np.sum((y_pred == 1) & (y_true == 1))
        FP = np.sum((y_pred == 1) & (y_true == 0))
        TPR[i] = TP / P
        FPR[i] = FP / N


    return FPR,TPR,thresholds


mnist = fetch_openml('mnist_784',version=1,as_frame=False)
X,y = mnist.data,mnist.target

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=10)

pca = PCA(n_components=40)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

class_labels,mio,cov=TrainGaussian(X_train,y_train)
y_pred,liklihoods=TestGaussian(X_test,mio,cov,class_labels)

accuracy = np.mean(y_pred==y_test)

print(f'Accuracy: {accuracy *  100:.2f}%',)
print(f'Error Rate: {(1 - accuracy) * 100:.2f}%',)


fig ,axes = plt.subplots(2,5,figsize=(18,8))
axes = axes.ravel()

for i,label in enumerate(class_labels):
    y_true_class = (y_test == label).astype(int)
    y_proba = liklihoods[:,i]
    fpr,tpr,threshold = roc_curve(y_true_class,y_proba)
    axes[i].plot(fpr, tpr, label=f'Class {label}')
    axes[i].set_title(f'ROC - Class {label}')
    axes[i].set_xlabel('FPR')
    axes[i].set_ylabel('TPR')
    axes[i].legend()

plt.tight_layout()
plt.savefig('Gaussian_Roc_plots')
plt.show()