Skip to content

Vectorize in-place comparison operators#35117

Closed
xuhdev wants to merge 3 commits intopytorch:masterfrom
xuhdev:vec/comparison
Closed

Vectorize in-place comparison operators#35117
xuhdev wants to merge 3 commits intopytorch:masterfrom
xuhdev:vec/comparison

Conversation

@xuhdev
Copy link
Copy Markdown
Collaborator

@xuhdev xuhdev commented Mar 20, 2020

Benchmark: (Debian 10, Release build, gcc 8.3, no turbo, Intel(R)
Xeon(R) E-2136 CPU @ 3.30GHz)

import timeit
for op in ('gt', 'lt', 'ge', 'le', 'eq', 'ne'):
    for dtype in ('torch.float', 'torch.double', 'torch.int16',
'torch.int32', 'torch.int64'):
        for n, t in [(10_000, 100000),
                    (100_000, 10000)]:
            print(f'a.{op}_(b), numel() == {n} for {t} times,
dtype={dtype}')
            print(timeit.timeit(f'a.{op}_(b)', setup=f'import torch; a =
torch.arange(1, {n}, dtype={dtype}); b = torch.arange({n}, 1, -1,
dtype={dtype})', number=t))

Before:

a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.778998922000028
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.6359690249992127
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.double
1.0801493119997758
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.9360321379990637
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.7341018620008981
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.6345281440007966
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7396387640001194
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6429641230006382
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.7759611700003006
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6672059659995284
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.7724312530008319
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.6392585769990546
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.7917451840003196
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.6455550159989798
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.739991647998977
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.6572993859990675
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7627949479992822
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6476544910001394
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.7965036850000615
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6780715599998075
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.7653547080008138
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.6383065829995758
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.7895260240002244
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.6508346030004759
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.7409299750015634
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.6383492870008922
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7620547579990671
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6474270239996258
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.8070051169997896
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6712598600006459
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.7627660060006747
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.6406353189995571
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.double
1.0826010620003217
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.9391552950000914
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.7427801039993938
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.6365172640016681
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7679271510005492
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6453389289999905
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.788032889000533
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6708840760002204
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.float
1.078837263999958
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.9397531720005645
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.double
1.1031508050000411
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.9412319389994082
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.7509566959997755
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.638570957000411
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7592877549996047
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6458840529994632
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.7984061539991671
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6776346309998189
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.7724407899986545
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.6581534130000364
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.8303323249983805
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.6954390920000151
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.745512373998281
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.6360954970004968
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7569978400006221
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6450422030011396
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.7889118379989668
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6693385389989999

After:

a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.2444220920006046
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.2031730359994981
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.35491806199934217
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.3905606850003096
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.16665379499863775
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.10095906300011848
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.21650469999985944
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.18737469400002738
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.35481256200000644
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.36696120199849247
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.21976138800164335
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.20275393200063263
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.3695997209997586
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.39441510399956314
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.15657078300137073
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.0992998069996247
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.20425128799979575
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.20352934599941364
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.35883567900054913
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.39059587599876977
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.21457727400047588
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.18836135499986995
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.35971907199927955
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.3688875009993353
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.1576009280015569
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.09524034199966991
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.2064543649994448
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.18726435600001423
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.35351785300008487
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.3680737989998306
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.2132134399998904
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.2140274829998816
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.36539215199991304
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.39128020300086064
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.15712150600120367
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.10149904400168452
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.2103407699996751
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.2134442910009966
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.35387034300038067
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.38917528399906587
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.2190484450002259
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.2030815980015177
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.3710030169986567
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.36419657899932645
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.15986497499943653
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.10145393699895067
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.21011781599918322
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.20121852699958254
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.36681504499938455
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.364472848999867
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.2290963309988001
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.21674784300012107
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.3829616689999966
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.39437660300063726
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.1661020749997988
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.10052955100036343
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.21827425599985872
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.21522501399886096
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.37058242300008715
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.39304063900090114

@dr-ci
Copy link
Copy Markdown

dr-ci Bot commented Mar 20, 2020

💊 CircleCI build failures summary and remediations

As of commit 5e45a71 (more details on the Dr. CI page):


None of the build failures appear to be your fault 💚


  • 1/1 broken upstream at merge base f0ea686 since Apr 09

    Please rebase on the viable/strict branch (expand for instructions)

    If your commit is newer than viable/strict, you can try basing on an older, stable commit:

    git fetch https://github.com/pytorch/pytorch viable/strict
    git rebase --onto FETCH_HEAD $(git merge-base origin/master HEAD)
    

    If your commit is older than viable/strict:

    git fetch https://github.com/pytorch/pytorch viable/strict
    git rebase FETCH_HEAD
    

    Check out the recency history of this "viable master" tracking branch.


🚧 1 upstream failure:

These were probably caused by upstream breakages:


This comment was automatically generated by Dr. CI (expand for details).Follow this link to opt-out of these comments for your Pull Requests.

Please report bugs/suggestions on the GitHub issue tracker.

See how this bot performed.

This comment has been revised 25 times.

@xuhdev
Copy link
Copy Markdown
Collaborator Author

xuhdev commented Mar 23, 2020

Waiting for #35217 to be merged first

@pbelevich pbelevich self-assigned this Mar 24, 2020
@albanD albanD requested a review from pbelevich March 24, 2020 18:51
@albanD albanD added the triaged This issue has been looked at a team member, and triaged and prioritized into an appropriate module label Mar 24, 2020
@xuhdev
Copy link
Copy Markdown
Collaborator Author

xuhdev commented Mar 24, 2020

This is the continuation of #33252, which was reverted due to a VS2017 bug: #33252 (comment)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm just wonder why do we need lt/le/gt/ge for std::complex<float> and std::complex<double>?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also don't think we should have them. This is just to be consistent with their corresponding operators above

Copy link
Copy Markdown
Contributor

@facebook-github-bot facebook-github-bot left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pbelevich has imported this pull request. If you are a Facebook employee, you can view this diff on Phabricator.

@xuhdev
Copy link
Copy Markdown
Collaborator Author

xuhdev commented Mar 28, 2020

@pbelevich Please don't merge yet. This PR needs to wait until #35217 is merged, because #33252 (which is exactly the same as this PR) would trigger a VS2017 compiler bug (#33252 (comment)). I'll ping you after I'm certain that the VS2017 compiler bug would not be triggered again.

xuhdev added a commit that referenced this pull request Apr 6, 2020
This is a dependency of #35117

[ghstack-poisoned]
xuhdev added a commit that referenced this pull request Apr 6, 2020
This is a dependency of #35117

[ghstack-poisoned]
xuhdev added a commit that referenced this pull request Apr 6, 2020
This is a dependency of #35117

ghstack-source-id: b547bfa
Pull Request resolved: #36106
xuhdev added a commit that referenced this pull request Apr 7, 2020
This is a dependency of #35117

[ghstack-poisoned]
@xuhdev xuhdev force-pushed the vec/comparison branch 2 times, most recently from 0a6ac81 to e7120d1 Compare April 7, 2020 20:33
@xuhdev
Copy link
Copy Markdown
Collaborator Author

xuhdev commented Apr 8, 2020

@pbelevich This PR should be ready to merge now. I added a second commit to tweak the CI to enable VS2017 test to show that #33252 (comment) is fixed. Please revert the CI commit before landing. Thanks!

Copy link
Copy Markdown
Contributor

@facebook-github-bot facebook-github-bot left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pbelevich has imported this pull request. If you are a Facebook employee, you can view this diff on Phabricator.

@pbelevich
Copy link
Copy Markdown
Contributor

@xuhdev thank you for the great work! I'm not sure if I can revert the CI commit, can you please revert it and I will merge this PR?

xuhdev added 3 commits April 9, 2020 14:07
Benchmark: (Debian 10, Release build, gcc 8.3, no turbo, Intel(R)
Xeon(R) E-2136 CPU @ 3.30GHz)

```python
import timeit
for op in ('gt', 'lt', 'ge', 'le', 'eq', 'ne'):
    for dtype in ('torch.float', 'torch.double', 'torch.int16',
'torch.int32', 'torch.int64'):
        for n, t in [(10_000, 100000),
                    (100_000, 10000)]:
            print(f'a.{op}_(b), numel() == {n} for {t} times,
dtype={dtype}')
            print(timeit.timeit(f'a.{op}_(b)', setup=f'import torch; a =
torch.arange(1, {n}, dtype={dtype}); b = torch.arange({n}, 1, -1,
dtype={dtype})', number=t))
```

Before:

```
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.778998922000028
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.6359690249992127
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.double
1.0801493119997758
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.9360321379990637
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.7341018620008981
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.6345281440007966
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7396387640001194
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6429641230006382
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.7759611700003006
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6672059659995284
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.7724312530008319
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.6392585769990546
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.7917451840003196
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.6455550159989798
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.739991647998977
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.6572993859990675
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7627949479992822
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6476544910001394
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.7965036850000615
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6780715599998075
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.7653547080008138
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.6383065829995758
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.7895260240002244
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.6508346030004759
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.7409299750015634
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.6383492870008922
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7620547579990671
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6474270239996258
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.8070051169997896
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6712598600006459
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.7627660060006747
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.6406353189995571
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.double
1.0826010620003217
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.9391552950000914
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.7427801039993938
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.6365172640016681
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7679271510005492
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6453389289999905
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.788032889000533
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6708840760002204
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.float
1.078837263999958
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.9397531720005645
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.double
1.1031508050000411
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.9412319389994082
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.7509566959997755
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.638570957000411
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7592877549996047
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6458840529994632
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.7984061539991671
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6776346309998189
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.7724407899986545
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.6581534130000364
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.8303323249983805
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.6954390920000151
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.745512373998281
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.6360954970004968
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7569978400006221
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6450422030011396
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.7889118379989668
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6693385389989999
```

After:

```
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.2444220920006046
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.2031730359994981
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.35491806199934217
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.3905606850003096
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.16665379499863775
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.10095906300011848
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.21650469999985944
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.18737469400002738
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.35481256200000644
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.36696120199849247
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.21976138800164335
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.20275393200063263
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.3695997209997586
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.39441510399956314
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.15657078300137073
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.0992998069996247
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.20425128799979575
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.20352934599941364
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.35883567900054913
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.39059587599876977
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.21457727400047588
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.18836135499986995
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.35971907199927955
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.3688875009993353
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.1576009280015569
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.09524034199966991
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.2064543649994448
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.18726435600001423
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.35351785300008487
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.3680737989998306
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.2132134399998904
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.2140274829998816
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.36539215199991304
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.39128020300086064
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.15712150600120367
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.10149904400168452
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.2103407699996751
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.2134442910009966
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.35387034300038067
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.38917528399906587
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.2190484450002259
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.2030815980015177
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.3710030169986567
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.36419657899932645
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.15986497499943653
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.10145393699895067
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.21011781599918322
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.20121852699958254
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.36681504499938455
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.364472848999867
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.2290963309988001
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.21674784300012107
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.3829616689999966
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.39437660300063726
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.1661020749997988
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.10052955100036343
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.21827425599985872
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.21522501399886096
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.37058242300008715
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.39304063900090114
```
This reverts commit 1c63c2597c055738be3241cc0668f9621b9b7253.
@xuhdev
Copy link
Copy Markdown
Collaborator Author

xuhdev commented Apr 9, 2020

@pbelevich Reverted and resolved a conflict. Thanks!

Copy link
Copy Markdown
Contributor

@facebook-github-bot facebook-github-bot left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pbelevich has imported this pull request. If you are a Facebook employee, you can view this diff on Phabricator.

@facebook-github-bot
Copy link
Copy Markdown
Contributor

@pbelevich merged this pull request in ee4cc96.

@xuhdev xuhdev deleted the vec/comparison branch April 10, 2020 21:52
ashishfarmer pushed a commit to ashishfarmer/pytorch that referenced this pull request Apr 13, 2020
Summary:
Benchmark: (Debian 10, Release build, gcc 8.3, no turbo, Intel(R)
Xeon(R) E-2136 CPU @ 3.30GHz)

```python
import timeit
for op in ('gt', 'lt', 'ge', 'le', 'eq', 'ne'):
    for dtype in ('torch.float', 'torch.double', 'torch.int16',
'torch.int32', 'torch.int64'):
        for n, t in [(10_000, 100000),
                    (100_000, 10000)]:
            print(f'a.{op}_(b), numel() == {n} for {t} times,
dtype={dtype}')
            print(timeit.timeit(f'a.{op}_(b)', setup=f'import torch; a =
torch.arange(1, {n}, dtype={dtype}); b = torch.arange({n}, 1, -1,
dtype={dtype})', number=t))
```

Before:

```
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.778998922000028
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.6359690249992127
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.double
1.0801493119997758
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.9360321379990637
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.7341018620008981
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.6345281440007966
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7396387640001194
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6429641230006382
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.7759611700003006
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6672059659995284
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.7724312530008319
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.6392585769990546
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.7917451840003196
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.6455550159989798
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.739991647998977
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.6572993859990675
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7627949479992822
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6476544910001394
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.7965036850000615
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6780715599998075
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.7653547080008138
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.6383065829995758
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.7895260240002244
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.6508346030004759
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.7409299750015634
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.6383492870008922
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7620547579990671
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6474270239996258
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.8070051169997896
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6712598600006459
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.7627660060006747
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.6406353189995571
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.double
1.0826010620003217
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.9391552950000914
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.7427801039993938
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.6365172640016681
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7679271510005492
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6453389289999905
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.788032889000533
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6708840760002204
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.float
1.078837263999958
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.9397531720005645
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.double
1.1031508050000411
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.9412319389994082
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.7509566959997755
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.638570957000411
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7592877549996047
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6458840529994632
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.7984061539991671
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6776346309998189
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.7724407899986545
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.6581534130000364
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.8303323249983805
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.6954390920000151
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.745512373998281
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.6360954970004968
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7569978400006221
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6450422030011396
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.7889118379989668
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6693385389989999
```

After:

```
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.2444220920006046
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.2031730359994981
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.35491806199934217
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.3905606850003096
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.16665379499863775
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.10095906300011848
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.21650469999985944
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.18737469400002738
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.35481256200000644
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.36696120199849247
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.21976138800164335
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.20275393200063263
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.3695997209997586
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.39441510399956314
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.15657078300137073
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.0992998069996247
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.20425128799979575
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.20352934599941364
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.35883567900054913
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.39059587599876977
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.21457727400047588
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.18836135499986995
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.35971907199927955
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.3688875009993353
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.1576009280015569
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.09524034199966991
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.2064543649994448
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.18726435600001423
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.35351785300008487
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.3680737989998306
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.2132134399998904
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.2140274829998816
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.36539215199991304
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.39128020300086064
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.15712150600120367
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.10149904400168452
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.2103407699996751
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.2134442910009966
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.35387034300038067
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.38917528399906587
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.2190484450002259
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.2030815980015177
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.3710030169986567
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.36419657899932645
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.15986497499943653
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.10145393699895067
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.21011781599918322
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.20121852699958254
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.36681504499938455
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.364472848999867
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.2290963309988001
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.21674784300012107
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.3829616689999966
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.39437660300063726
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.1661020749997988
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.10052955100036343
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.21827425599985872
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.21522501399886096
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.37058242300008715
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.39304063900090114
```
Pull Request resolved: pytorch#35117

Differential Revision: D20721181

Pulled By: pbelevich

fbshipit-source-id: 4e38cc0f42393483db91b2dc53ffc507f91ed904
laurentdupin pushed a commit to laurentdupin/pytorch that referenced this pull request Apr 24, 2026
Summary:
Benchmark: (Debian 10, Release build, gcc 8.3, no turbo, Intel(R)
Xeon(R) E-2136 CPU @ 3.30GHz)

```python
import timeit
for op in ('gt', 'lt', 'ge', 'le', 'eq', 'ne'):
    for dtype in ('torch.float', 'torch.double', 'torch.int16',
'torch.int32', 'torch.int64'):
        for n, t in [(10_000, 100000),
                    (100_000, 10000)]:
            print(f'a.{op}_(b), numel() == {n} for {t} times,
dtype={dtype}')
            print(timeit.timeit(f'a.{op}_(b)', setup=f'import torch; a =
torch.arange(1, {n}, dtype={dtype}); b = torch.arange({n}, 1, -1,
dtype={dtype})', number=t))
```

Before:

```
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.778998922000028
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.6359690249992127
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.double
1.0801493119997758
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.9360321379990637
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.7341018620008981
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.6345281440007966
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7396387640001194
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6429641230006382
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.7759611700003006
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6672059659995284
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.7724312530008319
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.6392585769990546
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.7917451840003196
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.6455550159989798
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.739991647998977
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.6572993859990675
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7627949479992822
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6476544910001394
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.7965036850000615
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6780715599998075
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.7653547080008138
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.6383065829995758
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.7895260240002244
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.6508346030004759
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.7409299750015634
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.6383492870008922
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7620547579990671
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6474270239996258
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.8070051169997896
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6712598600006459
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.7627660060006747
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.6406353189995571
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.double
1.0826010620003217
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.9391552950000914
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.7427801039993938
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.6365172640016681
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7679271510005492
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6453389289999905
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.788032889000533
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6708840760002204
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.float
1.078837263999958
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.9397531720005645
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.double
1.1031508050000411
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.9412319389994082
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.7509566959997755
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.638570957000411
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7592877549996047
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6458840529994632
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.7984061539991671
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6776346309998189
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.7724407899986545
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.6581534130000364
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.8303323249983805
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.6954390920000151
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.745512373998281
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.6360954970004968
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.7569978400006221
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.6450422030011396
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.7889118379989668
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.6693385389989999
```

After:

```
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.2444220920006046
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.2031730359994981
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.35491806199934217
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.3905606850003096
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.16665379499863775
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.10095906300011848
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.21650469999985944
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.18737469400002738
a.gt_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.35481256200000644
a.gt_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.36696120199849247
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.21976138800164335
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.20275393200063263
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.3695997209997586
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.39441510399956314
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.15657078300137073
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.0992998069996247
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.20425128799979575
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.20352934599941364
a.lt_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.35883567900054913
a.lt_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.39059587599876977
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.21457727400047588
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.18836135499986995
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.35971907199927955
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.3688875009993353
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.1576009280015569
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.09524034199966991
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.2064543649994448
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.18726435600001423
a.ge_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.35351785300008487
a.ge_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.3680737989998306
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.2132134399998904
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.2140274829998816
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.36539215199991304
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.39128020300086064
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.15712150600120367
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.10149904400168452
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.2103407699996751
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.2134442910009966
a.le_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.35387034300038067
a.le_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.38917528399906587
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.2190484450002259
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.2030815980015177
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.3710030169986567
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.36419657899932645
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.15986497499943653
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.10145393699895067
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.21011781599918322
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.20121852699958254
a.eq_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.36681504499938455
a.eq_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.364472848999867
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.float
0.2290963309988001
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.float
0.21674784300012107
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.double
0.3829616689999966
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.double
0.39437660300063726
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int16
0.1661020749997988
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int16
0.10052955100036343
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int32
0.21827425599985872
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int32
0.21522501399886096
a.ne_(b), numel() == 10000 for 100000 times, dtype=torch.int64
0.37058242300008715
a.ne_(b), numel() == 100000 for 10000 times, dtype=torch.int64
0.39304063900090114
```
Pull Request resolved: pytorch#35117

Differential Revision: D20721181

Pulled By: pbelevich

fbshipit-source-id: 4e38cc0f42393483db91b2dc53ffc507f91ed904
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

Merged open source triaged This issue has been looked at a team member, and triaged and prioritized into an appropriate module

Projects

None yet

Development

Successfully merging this pull request may close these issues.

6 participants