-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmaxent.py
More file actions
94 lines (80 loc) · 4.01 KB
/
maxent.py
File metadata and controls
94 lines (80 loc) · 4.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# -*- mode: Python; coding: utf-8 -*-
from __future__ import division
from collections import defaultdict
from classifier import Classifier
import numpy as np
import math
class MaxEnt(Classifier):
def __init__(self, model=None):
super(MaxEnt, self).__init__(model=None)
self.labelsToWeights = None
self.NUM_ITERATIONS = 10 # Fixed number of iterations of SGD
def get_model(self): return self.labelsToWeights;
def set_model(self, model): self.labelsToWeights = model
model = property(get_model, set_model)
def train(self, instances, dev_instances=None):
self.train_sgd(instances, dev_instances, 0.001, 30)
# Trains this classifier using stochastic gradient descent
def train_sgd(self, train_instances, dev_instances, learning_rate, batch_size):
self.labelsToWeights = self.initializeWeights(train_instances)
for j in range(self.NUM_ITERATIONS):
for i in range(0, len(train_instances), batch_size):
batch = train_instances[i : (i + batch_size)]
gradient = self.gradient(batch)
for label in self.labelsToWeights:
self.labelsToWeights[label] += learning_rate * gradient[label]
print 'negLogLikelihood = ',self.negLogLikelihood(dev_instances)
# Classifies the given instance as the most likely label from the dataset,
# given the current model
def classify(self, instance):
posteriors = {}
for label in self.labelsToWeights:
posteriors[label] = self.posterior(label, instance.features())
return max(posteriors, key=posteriors.get)
# Initializes model parameter weights to zero
def initializeWeights(self, train_instances):
labels = {}
numFeatures = len(train_instances[0].features())
for instance in train_instances:
if instance.label not in labels:
labels[instance.label] = np.zeros(numFeatures)
return labels
# Returns the posterior probability P(label | featureVec)
def posterior(self, label, featureVec):
dotProds = {}
# Calculate each posterior once
for l, w in self.labelsToWeights.iteritems():
dotProds[l] = expDotProd(w, featureVec)
return dotProds[label] / sum(dotProds.itervalues())
# Returns the observed counts for each feature in the passed mini-batch
def observedCounts(self, instances):
observedCounts = defaultdict(lambda: np.zeros(len(instances[0].features())))
for instance in instances:
observedCounts[instance.label] += instance.features()
return observedCounts
# Returns the expected model counts (right hand sand of gradient difference)
# given a mini batch of instances
def expectedModelCounts(self, instances):
expectedCounts = defaultdict(lambda: np.zeros(len(instances[0].features())))
for instance in instances:
for label, w in self.labelsToWeights.iteritems():
posterior = self.posterior(label, instance.features())
expectedCounts[label] += instance.features() * posterior
return expectedCounts
# Computes the gradient over the given instances
def gradient(self, instances):
expected = self.expectedModelCounts(instances)
observed = self.observedCounts(instances)
gradient = defaultdict(lambda: np.zeros(len(instances[0].features())))
for label in self.labelsToWeights:
gradient[label] = observed[label] - expected[label]
return gradient
# Computes the negative log-likelihood over a set of instances
def negLogLikelihood(self, instances):
return -1 * sum([math.log(self.posterior(instance.label, instance.features())) for instance in instances])
# Computes the accuracy of the classifier over a set of instances
def accuracy(self, instances):
return sum([instance.label == self.classify(instance) for instance in instances]) / len(instances)
# Returns e^(w dot x)
def expDotProd(w, x):
return math.exp(np.dot(w, x))