icoxfog417 · icoxfog417 · Mar 18, 2019 · Jan 20, 2019 · Jan 20, 2019 · Jan 20, 2019
diff --git a/DP/planner.py b/DP/planner.py
@@ -100,9 +100,9 @@ def estimate_by_policy(self, gamma, threshold):
                         r += action_prob * prob * \
                              (reward + gamma * V[next_state])
                     expected_rewards.append(r)
-                max_reward = max(expected_rewards)
-                delta = max(delta, abs(max_reward - V[s]))
-                V[s] = max_reward
+                value = sum(expected_rewards)
+                delta = max(delta, abs(value - V[s]))
+                V[s] = value
             if delta < threshold:
                 break
 

diff --git a/IRL/planner.py b/IRL/planner.py
@@ -74,7 +74,7 @@ def initialize(self):
         self.policy = np.ones((self.env.observation_space.n,
                                self.env.action_space.n))
         # First, take each action uniformly.
-        self.polidy = self.policy / self.env.action_space.n
+        self.policy = self.policy / self.env.action_space.n
 
     def policy_to_q(self, V, gamma):
         Q = np.zeros((self.env.observation_space.n,
@@ -108,9 +108,9 @@ def estimate_by_policy(self, gamma, threshold):
                         reward += action_prob * p * \
                                   (r + gamma * V[n_s] * (not done))
                     expected_rewards.append(reward)
-                max_reward = max(expected_rewards)
-                delta = max(delta, abs(max_reward - V[s]))
-                V[s] = max_reward
+                value = sum(expected_rewards)
+                delta = max(delta, abs(value - V[s]))
+                V[s] = value
 
             if delta < threshold or count > self._limit_count:
                 break