From 5d1e34e56bb0a4d48cd5595c48ce222fc9b43b75 Mon Sep 17 00:00:00 2001 From: Nicolas Carion Date: Sun, 14 Jan 2018 14:43:54 +0100 Subject: [PATCH 1/5] fix to new pytorch syntax --- rlpytorch/model_interface.py | 2 +- rlpytorch/model_loader.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rlpytorch/model_interface.py b/rlpytorch/model_interface.py index de2fa7e1..cf15a250 100644 --- a/rlpytorch/model_interface.py +++ b/rlpytorch/model_interface.py @@ -95,7 +95,7 @@ def add_model(self, key, model, copy=False, cuda=False, gpu_id=None, opt=False, self.models[key] = model.clone() if copy else model if cuda: if gpu_id is not None: - self.models[key].cuda(device_id=gpu_id) + self.models[key].cuda(device=gpu_id) else: self.models[key].cuda() diff --git a/rlpytorch/model_loader.py b/rlpytorch/model_loader.py index 560c1537..4f1c240d 100644 --- a/rlpytorch/model_loader.py +++ b/rlpytorch/model_loader.py @@ -92,7 +92,7 @@ def load_model(self, params): sys.exit(1) if args.gpu is not None and args.gpu >= 0: - model.cuda(device_id=args.gpu) + model.cuda(device=args.gpu) return model From fdad6f73f0990e6932ba558fc7ce42d9d23b8fa0 Mon Sep 17 00:00:00 2001 From: Nicolas Carion Date: Thu, 29 Mar 2018 13:57:00 -0600 Subject: [PATCH 2/5] Use new no_grad syntax --- rlpytorch/model_base.py | 2 +- rlpytorch/trainer/trainer.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rlpytorch/model_base.py b/rlpytorch/model_base.py index cb519ee1..12c47da8 100644 --- a/rlpytorch/model_base.py +++ b/rlpytorch/model_base.py @@ -55,7 +55,7 @@ def _var(self, x): Variable for x ''' if not isinstance(x, Variable): - return Variable(x, volatile=self.volatile) + return Variable(x) else: return x diff --git a/rlpytorch/trainer/trainer.py b/rlpytorch/trainer/trainer.py index 8f2b4241..c965e6b5 100644 --- a/rlpytorch/trainer/trainer.py +++ b/rlpytorch/trainer/trainer.py @@ -6,6 +6,7 @@ import os import sys +import torch sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..', 'elf')) import utils_elf from ..args_provider import ArgsProvider @@ -69,9 +70,8 @@ def actor(self, batch): # actor model. m = self.mi[self.actor_name] - m.set_volatile(True) - state_curr = m.forward(batch.hist(0)) - m.set_volatile(False) + with torch.no_grad(): + state_curr = m.forward(batch.hist(0)) if self.sampler is not None: reply_msg = self.sampler.sample(state_curr) From 378ac8eb562ed7e7516b272cf7b2c3fa83f0a376 Mon Sep 17 00:00:00 2001 From: Nicolas Carion Date: Wed, 9 May 2018 07:23:40 -0600 Subject: [PATCH 3/5] fix stats for new pytorch --- rlpytorch/stats/stats.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rlpytorch/stats/stats.py b/rlpytorch/stats/stats.py index b73e6f0c..7ffe0602 100644 --- a/rlpytorch/stats/stats.py +++ b/rlpytorch/stats/stats.py @@ -77,9 +77,9 @@ def feed_batch(self, batch, hist_idx=0): last_r = batch["last_r"][hist_idx] for batch_idx, (id, last_terminal) in enumerate(zip(ids, last_terminals)): - self.feed(id, last_r[batch_idx]) - if last_terminal: - self.terminal(id) + self.feed(id.item(), last_r[batch_idx].item()) + if last_terminal.item() == 1: + self.terminal(id.item()) class RewardCount(EvalCount): ''' Class to accumulate rewards achieved''' From 5ee4e20643bf3394b41f7db3256ec4bfb1901171 Mon Sep 17 00:00:00 2001 From: Nicolas Carion Date: Tue, 3 Jul 2018 14:46:28 +0200 Subject: [PATCH 4/5] pytorch 0.4 requires stricter size match --- elf/utils_elf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elf/utils_elf.py b/elf/utils_elf.py index d3e30027..1508ee39 100644 --- a/elf/utils_elf.py +++ b/elf/utils_elf.py @@ -152,7 +152,7 @@ def copy_from(self, src, batch_key=""): elif isinstance(v, (int, float)): bk.fill_(v) else: - bk[:] = v + bk[:] = v.view_as(bk[:]) else: raise ValueError("Batch[%s]: \"%s\" in reply is missing in batch specification" % (batch_key, k)) From b37f4d6d6b9044774331f12aee1652d5448bb6eb Mon Sep 17 00:00:00 2001 From: Nicolas Carion Date: Tue, 3 Jul 2018 14:49:33 +0200 Subject: [PATCH 5/5] more use of .item() where appropriate --- rlpytorch/methods/actor_critic.py | 2 +- rlpytorch/methods/discounted_reward.py | 6 +++--- rlpytorch/methods/policy_gradient.py | 8 ++++---- rlpytorch/methods/value_matcher.py | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/rlpytorch/methods/actor_critic.py b/rlpytorch/methods/actor_critic.py index c5fb86a6..245cf496 100644 --- a/rlpytorch/methods/actor_critic.py +++ b/rlpytorch/methods/actor_critic.py @@ -72,5 +72,5 @@ def update(self, mi, batch, stats): err = add_err(err, policy_err) err = add_err(err, self.value_matcher.feed({ value_node: V, "target" : R}, stats)) - stats["cost"].feed(err.data[0] / (T - 1)) + stats["cost"].feed(err.item() / (T - 1)) err.backward() diff --git a/rlpytorch/methods/discounted_reward.py b/rlpytorch/methods/discounted_reward.py index 2e1d2909..1b4c8a56 100644 --- a/rlpytorch/methods/discounted_reward.py +++ b/rlpytorch/methods/discounted_reward.py @@ -24,7 +24,7 @@ def __init__(self): def setR(self, R, stats): ''' Set rewards and feed to stats''' self.R = R - stats["init_reward"].feed(R.mean()) + stats["init_reward"].feed(R.mean().item()) def feed(self, batch, stats): ''' @@ -49,7 +49,7 @@ def feed(self, batch, stats): if terminal: self.R[i] = r[i] - stats["reward"].feed(r.mean()) - stats["acc_reward"].feed(self.R.mean()) + stats["reward"].feed(r.mean().item()) + stats["acc_reward"].feed(self.R.mean().item()) return self.R diff --git a/rlpytorch/methods/policy_gradient.py b/rlpytorch/methods/policy_gradient.py index 1c50ae60..f5a74927 100644 --- a/rlpytorch/methods/policy_gradient.py +++ b/rlpytorch/methods/policy_gradient.py @@ -149,14 +149,14 @@ def feed(self, Q, pi_s, actions, stats, old_pi_s=dict()): entropy_err = add_err(entropy_err, errs["entropy_err"]) log_pi_s.append(errs["logpi"]) - stats["nll_" + pi_node].feed(errs["policy_err"].data[0]) - stats["entropy_" + pi_node].feed(errs["entropy_err"].data[0]) + stats["nll_" + pi_node].feed(errs["policy_err"].item()) + stats["entropy_" + pi_node].feed(errs["entropy_err"].item()) for log_pi in log_pi_s: self._reg_backward(log_pi, Variable(pg_weights)) if len(args.policy_action_nodes) > 1: - stats["total_nll"].feed(policy_err.data[0]) - stats["total_entropy"].feed(entropy_err.data[0]) + stats["total_nll"].feed(policy_err.item()) + stats["total_entropy"].feed(entropy_err.item()) return policy_err + entropy_err * args.entropy_ratio diff --git a/rlpytorch/methods/value_matcher.py b/rlpytorch/methods/value_matcher.py index 81028ae6..09e48e7d 100644 --- a/rlpytorch/methods/value_matcher.py +++ b/rlpytorch/methods/value_matcher.py @@ -68,7 +68,7 @@ def feed(self, batch, stats): V = batch[self.value_node] value_err = self.value_loss(V, Variable(batch["target"])) self._reg_backward(V) - stats["predicted_" + self.value_node].feed(V.data[0]) - stats[self.value_node + "_err"].feed(value_err.data[0]) + stats["predicted_" + self.value_node].feed(V[0].item()) + stats[self.value_node + "_err"].feed(value_err.item()) return value_err