Hi,
awd-lstm implementation doesn't work after upgrading to 1.4.0. Here is how weight drop class looks like:
class WeightDrop(torch.nn.Module):
def __init__(self, module, weights, dropout=0, variational=False):
super(WeightDrop, self).__init__()
self.module = module
self.weights = weights
self.dropout = dropout
self.variational = variational
self._setup()
def widget_demagnetizer_y2k_edition(*args, **kwargs):
return
def _setup(self):
# Terrible temporary solution to an issue regarding compacting weights re: CUDNN RNN
if issubclass(type(self.module), torch.nn.RNNBase):
self.module.flatten_parameters = self.widget_demagnetizer_y2k_edition
for name_w in self.weights:
print('Applying weight drop of {} to {}'.format(self.dropout, name_w))
w = getattr(self.module, name_w)
del self.module._parameters[name_w]
self.module.register_parameter(name_w + '_raw', Parameter(w.data))
def _setweights(self):
for name_w in self.weights:
raw_w = getattr(self.module, name_w + '_raw')
w = None
if self.variational:
mask = torch.ones(raw_w.size(0), 1)
if raw_w.is_cuda: mask = mask.cuda()
mask = torch.nn.functional.dropout(mask, p=self.dropout, training=True)
w = mask.expand_as(raw_w) * raw_w
else:
w = torch.nn.functional.dropout(raw_w, p=self.dropout, training=self.training)
setattr(self.module, name_w, w)
def forward(self, *args):
self._setweights()
return self.module.forward(*args)
what model's init looks like:
class RNNModel(nn.Module):
def __init__(self, ninp, nhid, history_size, label_size, nlayers=1, dropout=0.5, dropouth=0.5, dropouti=0.2, dropoute=0.1, wdrop=0.7, tie_weights=False):
super(RNNModel, self).__init__()
self.ninp = ninp
self.nhid = nhid
self.nlayers = nlayers
self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)]
if wdrop:
self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns]
self.rnns = torch.nn.ModuleList(self.rnns)
self.decoder = nn.Linear(nhid, label_size)
What should I do to update my code? I can't figure it out myself, thanks.
edit:
To reproduce, the code below is enough and I discovered it only happens when using .to(device), .cpu() or .cuda()
encoder = RNNModel(5, 128, 120, 2, 1).cuda()
Environment
- PyTorch Version (e.g., 1.0): 1.4.0
- OS (e.g., Linux): Windows 10
- How you installed PyTorch (conda, pip, source): pip
- Build command you used (if compiling from source):
- Python version: 3.7
- CUDA/cuDNN version: 10.1
- GPU models and configuration:
- Any other relevant information:
cc @ezyang @gchanan @zou3519
Hi,
awd-lstm implementation doesn't work after upgrading to 1.4.0. Here is how weight drop class looks like:
what model's init looks like:
What should I do to update my code? I can't figure it out myself, thanks.
edit:
To reproduce, the code below is enough and I discovered it only happens when using .to(device), .cpu() or .cuda()
Environment
cc @ezyang @gchanan @zou3519