Twice differentiability of pointwise functions#1531
Conversation
|
O_o I should run the full test suite. |
| return grad_output * i.mul(i).add(1).reciprocal() | ||
|
|
||
|
|
||
| class Reciprocal(Function): |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| @staticmethod | ||
| def backward(ctx, grad_output): | ||
| i, = ctx.saved_variables | ||
| return grad_output * i.mul(i).add(1).reciprocal() |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| @staticmethod | ||
| def backward(ctx, grad_output): | ||
| i, = ctx.saved_variables | ||
| return grad_output.mul(i.pow(-0.5)).div(2) |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| @staticmethod | ||
| def backward(ctx, grad_output): | ||
| a, b = ctx.saved_variables | ||
| mask = a.gt(b).type_as(a) |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| grad_output * self._min_buffer, | ||
| grad_output * self._min_buffer.eq(0).type_as(grad_output) | ||
| grad_output * mask, | ||
| grad_output * mask.eq(0).type_as(grad_output) |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| grad_input = grad_output.new(*repeat(1, grad_output.dim())) | ||
| grad_input = grad_input.fill_(self.grad_value).expand_as(grad_output) | ||
| return grad_input.mul(grad_output) | ||
| @classmethod |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| @staticmethod | ||
| def backward(ctx, grad_output): | ||
| return grad_output.mul(1 - ctx._weight), grad_output.mul(ctx._weight), None | ||
| # TODO: grad for weight? |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| self.mark_dirty(add_tensor) | ||
| return add_tensor.addcmul_(self.scale, mul_tensor1, mul_tensor2) | ||
| @staticmethod | ||
| def forward(ctx, add_tensor, *args, **argsd): |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| return add_tensor.addcmul_(self.scale, mul_tensor1, mul_tensor2) | ||
| @staticmethod | ||
| def forward(ctx, add_tensor, *args, **argsd): | ||
| inplace = argsd.get("inplace", False) |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| self.mark_dirty(add_tensor) | ||
| return add_tensor.addcdiv_(self.scale, div_tensor1, div_tensor2) | ||
| @staticmethod | ||
| def forward(ctx, add_tensor, *args, **argsd): |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
|
Thank you for helping me improve my code! I hope this is better. |
apaszke
left a comment
There was a problem hiding this comment.
Looks great! Just 3 minor comments and it's good to go!
| if self.needs_input_grad[2]: | ||
| grad_mul2 = grad_output.mul(mul_tensor1).mul(self.scale) | ||
| if ctx.needs_input_grad[2]: | ||
| grad_mul2 = grad_output.mul(mul_tensor1).mul(ctx._scale) |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| grad_div2.neg_().mul_(self.scale) | ||
|
|
||
| return grad_add, grad_div1, grad_div2 | ||
| grad_div2 = grad_output.mul(div_tensor1).div(div_tensor2_sq).neg().mul(ctx._scale) |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
| return op()(self, *args) | ||
| return op.apply(self, *args) | ||
|
|
||
| def addcmul(self, *args): |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
|
Trying to get the CI to try again |
|
So I think I have the comments covered except for the inplace test which I understand is automatic and not manual. |
| grad_div2.neg_().mul_(self.scale) | ||
|
|
||
| return grad_add, grad_div1, grad_div2 | ||
| grad_div2 = grad_output.mul(div_tensor1).div(div_tensor2_sq).neg_().mul_(ctx._scale) |
This comment was marked as off-topic.
This comment was marked as off-topic.
Sorry, something went wrong.
|
Can you please also fix the conflicts? |
|
I resolved conflicts and made that change. |
|
@pytorchbot test this please |
|
Hi Apologies, I've been travelling with no access to my torch computer. Thanks for fixing my patches. Thomas |
Hi,
this attempts to convert pointwise.py to new-style functions.
I have tried to stay to close to the original and close to the argument order of torch.foo functions (in particular AddCmul and AddCdiv).
Also, I am not 100% sure about the backward in _ConstantGrad - the old implementation had quite an elaborate allocation and I just wrote grad_output.mul(cls.grad_value) without fully understanding the motivation for the old code.
I have converted a few inplace computations to not inplace to deal with gradients being variables.
It seems to pass the test/test_autograd.py and flake8.
Thank you for considering this pull request.
Thomas