pytorch
diff --git a/‎.circleci/cimodel/data/binary_build_definitions.py‎
Lines changed: 6 additions & 1 deletion b/‎.circleci/cimodel/data/binary_build_definitions.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎.circleci/config.yml‎
Lines changed: 36 additions & 36 deletions b/‎.circleci/config.yml‎
Lines changed: 36 additions & 36 deletions
diff --git a/‎.circleci/scripts/binary_checkout.sh‎
Lines changed: 2 additions & 0 deletions b/‎.circleci/scripts/binary_checkout.sh‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/scripts/run_torchbench.py‎
Lines changed: 97 additions & 0 deletions b/‎.github/scripts/run_torchbench.py‎
Lines changed: 97 additions & 0 deletions
diff --git a/‎.github/workflows/cancel_redundant_workflows.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/cancel_redundant_workflows.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/lint.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/lint.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/run_torchbench.yml‎
Lines changed: 64 additions & 0 deletions b/‎.github/workflows/run_torchbench.yml‎
Lines changed: 64 additions & 0 deletions
diff --git a/‎BUILD.bazel‎
Lines changed: 0 additions & 1 deletion b/‎BUILD.bazel‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎aten/src/ATen/ExpandUtils.cpp‎
Lines changed: 9 additions & 9 deletions b/‎aten/src/ATen/ExpandUtils.cpp‎
Lines changed: 9 additions & 9 deletions
@@ -27,7 +27,12 @@ def gen_build_env_parms(self):
 
     def gen_docker_image(self):
         if self.gcc_config_variant == 'gcc5.4_cxx11-abi':
-            return miniutils.quote("pytorch/pytorch-binary-docker-image-ubuntu16.04:latest")
+            if self.gpu_version is None:
+                return miniutils.quote("pytorch/libtorch-cxx11-builder:cpu")
+            else:
+                return miniutils.quote(
+                    f"pytorch/libtorch-cxx11-builder:{self.gpu_version}"
+                )
         if self.pydistro == "conda":
             if self.gpu_version is None:
                 return miniutils.quote("pytorch/conda-builder:cpu")
 
@@ -64,5 +64,7 @@ popd
 retry git clone -q https://github.com/pytorch/builder.git "$BUILDER_ROOT"
 pushd "$BUILDER_ROOT"
 echo "Using builder from "
+# TODO: Remove before landing, this is just for testing
+git checkout driazati/torch_debug_flag
 git --no-pager log --max-count 1
 popd
@@ -0,0 +1,97 @@
+"""
+Generate a torchbench test report from a file containing the PR body.
+Currently, only supports running tests on specified model names
+
+Testing environment:
+- Intel Xeon 8259CL @ 2.50 GHz, 24 Cores with disabled Turbo and HT
+- Nvidia Tesla T4
+- Nvidia Driver 450.51.06
+- Python 3.7
+- CUDA 10.2
+"""
+# Known issues:
+# 1. Does not reuse the build artifact in other CI workflows
+# 2. CI jobs are serialized because there is only one worker
+import os
+import pathlib
+import argparse
+import subprocess
+
+from typing import List
+
+CUDA_VERSION = "cu102"
+PYTHON_VERSION = "3.7"
+TORCHBENCH_CONFIG_NAME = "config.yaml"
+MAGIC_PREFIX = "RUN_TORCHBENCH:"
+ABTEST_CONFIG_TEMPLATE = """# This config is automatically generated by run_torchbench.py
+start: {control}
+end: {treatment}
+threshold: 100
+direction: decrease
+timeout: 60
+tests:"""
+
+def gen_abtest_config(control: str, treatment: str, models: List[str]):
+    d = {}
+    d["control"] = control
+    d["treatment"] = treatment
+    config = ABTEST_CONFIG_TEMPLATE.format(**d)
+    for model in models:
+        config = f"{config}\n  - {model}"
+    config = config + "\n"
+    return config
+
+def deploy_torchbench_config(output_dir: str, config: str):
+    # Create test dir if needed
+    pathlib.Path(output_dir).mkdir(exist_ok=True)
+    # TorchBench config file name
+    config_path = os.path.join(output_dir, TORCHBENCH_CONFIG_NAME)
+    with open(config_path, "w") as fp:
+        fp.write(config)
+
+def extract_models_from_pr(torchbench_path: str, prbody_file: str) -> List[str]:
+    model_list = []
+    with open(prbody_file, "r") as pf:
+        lines = map(lambda x: x.strip(), pf.read().splitlines())
+        magic_lines = list(filter(lambda x: x.startswith(MAGIC_PREFIX), lines))
+        if magic_lines:
+            # Only the first magic line will be respected.
+            model_list = list(map(lambda x: x.strip(), magic_lines[0][len(MAGIC_PREFIX):].split(",")))
+    # Sanity check: make sure all the user specified models exist in torchbench repository
+    full_model_list = os.listdir(os.path.join(torchbench_path, "torchbenchmark", "models"))
+    for m in model_list:
+        if m not in full_model_list:
+            print(f"The model {m} you specified does not exist in TorchBench suite. Please double check.")
+            return []
+    return model_list
+
+def run_torchbench(pytorch_path: str, torchbench_path: str, output_dir: str):
+    # Copy system environment so that we will not override
+    env = dict(os.environ)
+    command = ["python", "bisection.py", "--work-dir", output_dir,
+               "--pytorch-src", pytorch_path, "--torchbench-src", torchbench_path,
+               "--config", os.path.join(output_dir, "config.yaml"),
+               "--output", os.path.join(output_dir, "result.txt")]
+    subprocess.check_call(command, cwd=torchbench_path, env=env)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Run TorchBench tests based on PR')
+    parser.add_argument('--pr-num', required=True, type=str, help="The Pull Request number")
+    parser.add_argument('--pr-base-sha', required=True, type=str, help="The Pull Request base hash")
+    parser.add_argument('--pr-head-sha', required=True, type=str, help="The Pull Request head hash")
+    parser.add_argument('--pr-body', required=True, help="The file that contains body of a Pull Request")
+    parser.add_argument('--pytorch-path', required=True, type=str, help="Path to pytorch repository")
+    parser.add_argument('--torchbench-path', required=True, type=str, help="Path to TorchBench repository")
+    args = parser.parse_args()
+
+    output_dir: str = os.path.join(os.environ["HOME"], ".torchbench", "bisection", f"pr{args.pr_num}")
+    # Identify the specified models and verify the input
+    models = extract_models_from_pr(args.torchbench_path, args.pr_body)
+    if not models:
+        print("Can't parse the model filter from the pr body. Currently we only support allow-list.")
+        exit(1)
+    print(f"Ready to run TorchBench with benchmark. Result will be saved in the directory: {output_dir}.")
+    # Run TorchBench with the generated config
+    torchbench_config = gen_abtest_config(args.pr_base_sha, args.pr_head_sha, models)
+    deploy_torchbench_config(output_dir, torchbench_config)
+    run_torchbench(pytorch_path=args.pytorch_path, torchbench_path=args.torchbench_path, output_dir=output_dir)
@@ -8,6 +8,7 @@ on:
     - Lint
     - Linux CI (pytorch-linux-xenial-py3.6-gcc5.4)
     - Test tools
+    - TorchBench CI (pytorch-linux-py3.7-cu102)
     - clang-format
 jobs:
   cancel:
 
@@ -215,6 +215,7 @@ jobs:
           [ ! -s "${GITHUB_WORKSPACE}"/flake8-output.txt ]
 
   clang-tidy:
+    if: github.event_name == 'pull_request'
     runs-on: ubuntu-18.04
     steps:
       - name: Setup Python
 
@@ -0,0 +1,64 @@
+name: TorchBench CI (pytorch-linux-py3.7-cu102)
+on:
+  pull_request:
+
+env:
+  PR_NUM: ${{ github.event.number }}
+  PR_BODY: ${{ github.event.pull_request.body }}
+  PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
+  PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }}
+
+jobs:
+  run-torchbench:
+    # We don't accept running on non-pytorch repos because of security concerns
+    # Only run the job when the body contains magic word "RUN_TORCHBENCH:"
+    if: ${{ github.repository_owner == 'pytorch' && contains(github.event.pull_request.body, 'RUN_TORCHBENCH:') }}
+    runs-on: [self-hosted, bm-runner]
+    steps:
+      - name: Checkout PyTorch
+        uses: actions/checkout@v2
+        with:
+          path: pytorch
+      - name: Checkout TorchBench
+        uses: actions/checkout@v2
+        with:
+          repository: pytorch/benchmark
+          path: benchmark
+      - name: Create conda environment
+        run: |
+          conda create -y -n pr-ci python=3.7
+          # shellcheck disable=SC1091
+          . "${HOME}"/anaconda3/etc/profile.d/conda.sh
+          conda activate pr-ci
+          conda install -y numpy=1.17 requests=2.22 ninja pyyaml mkl mkl-include setuptools cmake cffi typing_extensions future six dataclasses pillow pytest tabulate
+      - name: Update self-hosted PyTorch
+        run: |
+          pushd "${HOME}"/pytorch
+          git fetch
+          popd
+      - name: Run TorchBench
+        run: |
+          pushd "${HOME}"/pytorch
+          PR_MERGE_BASE=$(git merge-base "$PR_BASE_SHA" "$PR_HEAD_SHA")
+          popd
+          PR_BODY_FILE=/tmp/pr-body.txt
+          echo "$PR_BODY" > ${PR_BODY_FILE}
+          # shellcheck disable=SC1091
+          . "${HOME}"/anaconda3/etc/profile.d/conda.sh
+          conda activate pr-ci
+          python3 pytorch/.github/scripts/run_torchbench.py \
+                  --pytorch-path "${HOME}"/pytorch \
+                  --torchbench-path "${PWD}"/benchmark \
+                  --pr-num "$PR_NUM" \
+                  --pr-base-sha "$PR_MERGE_BASE" \
+                  --pr-head-sha "$PR_HEAD_SHA" \
+                  --pr-body "$PR_BODY_FILE"
+      - name: Remove conda environment and cleanup
+        run: |
+          conda env remove --name pr-ci
+          rm /tmp/pr-body.txt
+      - name: Upload artifact
+        uses: actions/upload-artifact@v2
+        with:
+          name: TorchBench result
+          path: ~/.torchbench/bisection/pr${{ github.event.number }}
@@ -341,7 +341,6 @@ filegroup(
         "aten/src/ATen/cuda/CUDABlas.cpp",
         "aten/src/ATen/cuda/CUDASolver.cpp",
         "aten/src/ATen/cuda/CUDAContext.cpp",
-        "aten/src/ATen/cuda/CUDAFuture.cpp",
         "aten/src/ATen/cuda/CUDAGeneratorImpl.cpp",
         "aten/src/ATen/cuda/CUDAGraph.cpp",
         "aten/src/ATen/cuda/CuSparseHandlePool.cpp",
 
@@ -282,6 +282,7 @@ cmake_dependent_option(
 option(USE_TBB "Use TBB" OFF)
 option(ONNX_ML "Enable traditional ONNX ML API." ON)
 option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF)
+option(BUILD_LIBTORCH_CPU_WITH_DEBUG "Enable RelWithDebInfo for libtorch_cpu target only" OFF)
 cmake_dependent_option(
     USE_DEPLOY "Build embedded torch::deploy interpreter.  See torch/csrc/deploy/README.md for more info." OFF
     "BUILD_PYTHON" OFF)
 
@@ -43,22 +43,21 @@ DimVector infer_size_dimvector(IntArrayRef a, IntArrayRef b) {
   return infer_size_impl<DimVector>(a, b);
 }
 
-template <typename Container>
-std::tuple<Container, Container> inferExpandGeometryImpl(
+template<typename Container>
+C10_ALWAYS_INLINE InferExpandGeometryResult<Container> inferExpandGeometryImpl(
     IntArrayRef tensor_sizes,
     IntArrayRef tensor_strides,
     IntArrayRef sizes) {
   int64_t ndim = sizes.size();
   int64_t tensor_dim = tensor_sizes.size();
 
   if (tensor_dim == 0) {
-    return std::make_tuple(
-        Container(sizes.begin(), sizes.end()), Container(ndim, 0));
+    return InferExpandGeometryResult<Container>(sizes, ndim);
   }
 
-  std::tuple<Container, Container> result{Container(ndim), Container(ndim)};
-  auto& expandedSizes = std::get<0>(result);
-  auto& expandedStrides = std::get<1>(result);
+  InferExpandGeometryResult<Container> result(ndim);
+  auto& expandedSizes = result.sizes;
+  auto& expandedStrides = result.strides;
 
   // create a new geometry for the tensors
   for (int64_t i = ndim - 1; i >= 0; --i) {
@@ -103,11 +102,12 @@ std::tuple<std::vector<int64_t>, std::vector<int64_t>> inferExpandGeometry(
     IntArrayRef tensor_sizes,
     IntArrayRef tensor_strides,
     IntArrayRef sizes) {
-  return inferExpandGeometryImpl<std::vector<int64_t>>(
+  auto result = inferExpandGeometryImpl<std::vector<int64_t>>(
       tensor_sizes, tensor_strides, sizes);
+  return std::make_tuple(std::move(result.sizes), std::move(result.strides));
 }
 
-std::tuple<DimVector, DimVector> inferExpandGeometry_dimvector(
+InferExpandGeometryResult<DimVector> inferExpandGeometry_dimvector(
     IntArrayRef tensor_sizes,
     IntArrayRef tensor_strides,
     IntArrayRef sizes) {