-
Notifications
You must be signed in to change notification settings - Fork 7.4k
Closed
Description
System information
- OS Platform and Distribution (e.g., Linux Ubuntu 16.04): Steropes
- Ray installed from (source or binary): pip install -U
- Ray version: nightly
- Python version: 3.7
- Exact command to reproduce: Pass integer value of entropy_coeff into run() with PPO
Describe the problem
Source code / logs
2019-09-11 00:11:50,889 ERROR trial_runner.py:552 -- Error processing event.
Traceback (most recent call last):
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 498, in _process_trial
result = self.trial_executor.fetch_result(trial)
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 347, in fetch_result
result = ray.get(trial_future[0])
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/ray/worker.py", line 2340, in get
raise value
ray.exceptions.RayTaskError: ray_PPO:train() (pid=11050, host=steropes)
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py", line 527, in _apply_op_helper
preferred_dtype=default_dtype)
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1224, in internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1018, in _TensorTensorConversionFunction
(dtype.name, t.dtype.name, str(t)))
ValueError: Tensor conversion requested dtype int32 for Tensor with dtype float32: 'Tensor("default_policy/Sum_5:0", shape=(?,), dtype=float32)'
During handling of the above exception, another exception occurred:
ray_PPO:train() (pid=11050, host=steropes)
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/ray/rllib/agents/trainer_template.py", line 90, in __init__
Trainer.__init__(self, config, env, logger_creator)
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/ray/rllib/agents/trainer.py", line 366, in __init__
Trainable.__init__(self, config, logger_creator)
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/ray/tune/trainable.py", line 99, in __init__
self._setup(copy.deepcopy(self.config))
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/ray/rllib/agents/trainer.py", line 486, in _setup
self._init(self.config, self.env_creator)
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/ray/rllib/agents/trainer_template.py", line 109, in _init
self.config["num_workers"])
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/ray/rllib/agents/trainer.py", line 531, in _make_workers
logdir=self.logdir)
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/ray/rllib/evaluation/worker_set.py", line 64, in __init__
RolloutWorker, env_creator, policy, 0, self._local_config)
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/ray/rllib/evaluation/worker_set.py", line 220, in _make_worker
_fake_sampler=config.get("_fake_sampler", False))
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/ray/rllib/evaluation/rollout_worker.py", line 348, in __init__
self._build_policy_map(policy_dict, policy_config)
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/ray/rllib/evaluation/rollout_worker.py", line 762, in _build_policy_map
policy_map[name] = cls(obs_space, act_space, merged_conf)
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/ray/rllib/policy/tf_policy_template.py", line 143, in __init__
obs_include_prev_action_reward=obs_include_prev_action_reward)
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/ray/rllib/policy/dynamic_tf_policy.py", line 196, in __init__
self._initialize_loss()
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/ray/rllib/policy/dynamic_tf_policy.py", line 337, in _initialize_loss
loss = self._do_loss_init(train_batch)
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/ray/rllib/policy/dynamic_tf_policy.py", line 349, in _do_loss_init
loss = self._loss_fn(self, self.model, self._dist_class, train_batch)
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/ray/rllib/agents/ppo/ppo_policy.py", line 146, in ppo_surrogate_loss
model_config=policy.config["model"])
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/ray/rllib/agents/ppo/ppo_policy.py", line 106, in __init__
vf_loss_coeff * vf_loss - entropy_coeff * curr_entropy)
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/variables.py", line 1045, in _run_op
return tensor_oper(a.value(), *args, **kwargs)
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/math_ops.py", line 884, in binary_op_wrapper
return func(x, y, name=name)
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/math_ops.py", line 1180, in _mul_dispatch
return gen_math_ops.mul(x, y, name=name)
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/gen_math_ops.py", line 6490, in mul
"Mul", x=x, y=y, name=name)
File "/data/ashwineep/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py", line 563, in _apply_op_helper
inferred_from[input_arg.type_attr]))
TypeError: Input 'y' of 'Mul' Op has type float32 that does not match type int32 of argument 'x'.
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels