Skip to content

Pytorch 1.4.0 weight drop - 'LSTM' object has no attribute 'weight_hh_l0' #32346

@realiti4

Description

@realiti4

Hi,

awd-lstm implementation doesn't work after upgrading to 1.4.0. Here is how weight drop class looks like:

class WeightDrop(torch.nn.Module):
    def __init__(self, module, weights, dropout=0, variational=False):
        super(WeightDrop, self).__init__()
        self.module = module
        self.weights = weights
        self.dropout = dropout
        self.variational = variational
        self._setup()

    def widget_demagnetizer_y2k_edition(*args, **kwargs):        
        return

    def _setup(self):
        # Terrible temporary solution to an issue regarding compacting weights re: CUDNN RNN
        if issubclass(type(self.module), torch.nn.RNNBase):
            self.module.flatten_parameters = self.widget_demagnetizer_y2k_edition

        for name_w in self.weights:
            print('Applying weight drop of {} to {}'.format(self.dropout, name_w))
            w = getattr(self.module, name_w)
            del self.module._parameters[name_w]
            self.module.register_parameter(name_w + '_raw', Parameter(w.data))

    def _setweights(self):
        for name_w in self.weights:
            raw_w = getattr(self.module, name_w + '_raw')
            w = None
            if self.variational:               
                mask = torch.ones(raw_w.size(0), 1)

                if raw_w.is_cuda: mask = mask.cuda()
                mask = torch.nn.functional.dropout(mask, p=self.dropout, training=True)
                w = mask.expand_as(raw_w) * raw_w
            else:
                w = torch.nn.functional.dropout(raw_w, p=self.dropout, training=self.training)
            setattr(self.module, name_w, w)

    def forward(self, *args):
        self._setweights()        
        return self.module.forward(*args)

what model's init looks like:

class RNNModel(nn.Module):
    def __init__(self, ninp, nhid, history_size, label_size, nlayers=1, dropout=0.5, dropouth=0.5, dropouti=0.2, dropoute=0.1, wdrop=0.7, tie_weights=False):
        super(RNNModel, self).__init__()
        self.ninp = ninp
        self.nhid = nhid
        self.nlayers = nlayers        

        self.rnns = [torch.nn.LSTM(ninp if l == 0 else nhid, nhid if l != nlayers - 1 else (ninp if tie_weights else nhid), 1, dropout=0) for l in range(nlayers)]
        if wdrop:
            self.rnns = [WeightDrop(rnn, ['weight_hh_l0'], dropout=wdrop) for rnn in self.rnns]

        self.rnns = torch.nn.ModuleList(self.rnns)
        self.decoder = nn.Linear(nhid, label_size)

What should I do to update my code? I can't figure it out myself, thanks.

edit:
To reproduce, the code below is enough and I discovered it only happens when using .to(device), .cpu() or .cuda()

encoder = RNNModel(5, 128, 120, 2, 1).cuda()

Environment

  • PyTorch Version (e.g., 1.0): 1.4.0
  • OS (e.g., Linux): Windows 10
  • How you installed PyTorch (conda, pip, source): pip
  • Build command you used (if compiling from source):
  • Python version: 3.7
  • CUDA/cuDNN version: 10.1
  • GPU models and configuration:
  • Any other relevant information:

cc @ezyang @gchanan @zou3519

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions