=== CODE
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.autograd.functions import Multinomial
from torch.autograd.functions import Bernoulli
l=nn.Linear(5,2)
s=nn.Softmax()
m=Multinomial(1)
x=Variable(torch.randn(1,5),requires_grad=True)
probabilities=s(l(x))
y=m(probabilities)
print(y)
y.reinforce(torch.randn(1,1))
y.backward()
==== ERROR
Traceback (most recent call last):
File "rl/tutorials/tmp.py", line 16, in <module>
y.backward()
File "/home/denoyer/anaconda3/lib/python3.5/site-packages/torch/autograd/variable.py", line 90, in backward
self._execution_engine.run_backward(self, gradient, retain_variables)
File "/home/denoyer/anaconda3/lib/python3.5/site-packages/torch/autograd/stochastic_function.py", line 14, in _do_backward
result = super(StochasticFunction, self)._do_backward((self.reward,), retain_variables)
File "/home/denoyer/anaconda3/lib/python3.5/site-packages/torch/autograd/functions/stochastic.py", line 23, in backward
output_probs = probs.index_select(0, samples)
RuntimeError: Index is supposed to be a vector at /tmp/pip-jim5mxb2-build/torch/lib/TH/generic/THTensorMath.c:136