Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion test/cpp/lazy/test_ir_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ namespace lazy {
class IrUtilNode : public Node {
public:
explicit IrUtilNode()
: Node(OpKind(), /* num_outputs */ 1, /* hash_seed */ Hash("")) {}
: Node(OpKind(), /* num_outputs */ 1, /* hash_seed */ Hash(0)) {}
~IrUtilNode() override = default;

void AddOperand(Value v) {
Expand Down
1 change: 1 addition & 0 deletions tools/build_variables.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,7 @@ core_sources_full = core_sources_full_mobile + [

lazy_tensor_core_sources = [
"torch/csrc/lazy/backend/backend_device.cpp",
"torch/csrc/lazy/backend/lowering_context.cpp",
"torch/csrc/lazy/core/config.cpp",
"torch/csrc/lazy/core/hash.cpp",
"torch/csrc/lazy/core/ir.cpp",
Expand Down
61 changes: 61 additions & 0 deletions torch/csrc/lazy/backend/backend_data.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#pragma once

#include <cstring>
#include <torch/csrc/lazy/core/shape.h>
#include <torch/csrc/lazy/backend/backend_device.h>

namespace torch {
namespace lazy {

class TORCH_API BackendData {
public:
struct Info {
/**
* Used by Lazy Graph Executor to tag info on BackendData objs
* */
virtual ~Info() = default;
};
/**
* Represents (Tensor) data stored on a backend device
* in its native format.
* */
using Handle = int64_t;

BackendData(BackendDevice device, Shape shape)
: device_(std::move(device)), shape_(std::move(shape)) {}

virtual ~BackendData() = default;

const BackendDevice& device() const {
return device_;
}

const Shape& shape() const {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One somewhat related comment. Since LTC decide to keep the torch::lazy::shape on the core level, is it a good idea to actually add torch::lazy::shape to the Base IR node? This way we don't need to keep the current getShape(IR) call. I can also make XlaNode to have a xlaShape() method and slowly replaced the usage of it to shape().

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, I think we can add it back to Node base. Jiewen and you have both been twisting my arm. :)

I think @Krovatkin had some reservations about it all along; that we don't want shape in the IR simply because we want the IR reusable. Nick, are you good with this, maybe if we put a comment explaining the shape is useful for upper-bounds inference but shouldn't be relied on in TS backend? Any other concerns?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@wconstab
I think it does make a lot of sense to provide this utility for other vendors, too.

maybe if we put a comment explaining the shape is useful for upper-bounds inference but shouldn't be relied on in TS backend? Any other concerns?

I wonder if we could code-document it by maybe renaming shape to upper_bounds or something like that? Or would it break too many things downstream and in LTC? @JackCaoG

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it doesn't matter too much for pt/xla right now as we will keep using xlashape() in the short term. The plan is slowly migrate to use lazy::shape from lazy tensor core. I actually think it is better to name it shape since in the static mode upper_bound is confusing. Plus Shape also encasualte the c10::ScalarType.

return shape_;
}

Info* info() const {
return info_.get();
}

std::shared_ptr<Info> SetInfo(std::shared_ptr<Info> info) {
std::swap(info, info_);
return info;
}

virtual Handle GetHandle() = 0;

virtual void Assign(const BackendData& data) = 0;

virtual bool HasValue() const = 0;

private:
BackendDevice device_;
Shape shape_;
std::shared_ptr<Info> info_;
};

using BackendDataPtr = std::shared_ptr<BackendData>;

} // namespace lazy
} // namespace torch
129 changes: 129 additions & 0 deletions torch/csrc/lazy/backend/backend_interface.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#pragma once

#include <atomic>
#include <ATen/Tensor.h>
#include <torch/csrc/lazy/backend/backend_data.h>
#include <torch/csrc/lazy/backend/backend_device.h>
#include <torch/csrc/lazy/backend/lowering_context.h>
#include <torch/csrc/lazy/core/shape.h>

namespace torch {
namespace lazy {

/**
* Work in progress- don't treat this as a stable interface yet!
*/
class TORCH_API BackendImplInterface {
public:
/**
* Initialization/Teardown
* */
// No-op by default. Allows custom functionality to be exposed through
// extension bindings.
virtual void InitializeAtenBindings() const {}

virtual void PrepareToExit() const = 0;

/**
* Configuration
* */

virtual void SetRngSeed(size_t seed) const = 0;

/**
* Data Transfer
* */

virtual BackendDataPtr MakeComputationDataFromTensor(
const at::Tensor& tensor, const Shape& shape,
const BackendDevice& device) const = 0;

virtual BackendDataPtr CreateDataPlaceholder(
const BackendDevice& device, const Shape& shape) const = 0;

virtual at::Tensor MakeTensorFromComputationData(
const BackendDataPtr data,
c10::optional<at::ScalarType> logical_scalar_type) const = 0;

/**
* Lowering, Compilation, Execution
* */

virtual std::unique_ptr<LoweringContext> CreateLoweringContext(
const std::string& name, BackendDevice device,
c10::ArrayRef<torch::lazy::Node*> post_order,
Util::EmissionMap emit_status) const = 0;

virtual std::unique_ptr<LoweringContext> CreateLoweringContext(
const std::string& name, BackendDevice device) const = 0;

// TODO(whc) need to keep this?
virtual std::vector<std::string> GetCompilationDevices(
const std::string& device, c10::ArrayRef<std::string> devices) const = 0;

virtual std::vector<ComputationPtr> Compile(
std::vector<ComputationPtr> instances) const = 0;

virtual std::vector<BackendDataPtr> ExecuteComputation(
Computation& computation, c10::ArrayRef<BackendDataPtr> arguments,
const BackendDevice& device) const = 0;

/**
* Device Configuration
* */

// Set or get the default device type.
// For backends used with virtual c10:: Devices, this configures what real
// device type the backend should use, and matters if the backend supports
// more than one type of real device.
virtual std::shared_ptr<BackendDeviceType>
GetDefaultDeviceType() const = 0;
virtual void SetDefaultDeviceType(std::string) = 0;

// Specify which aten device should be used for eager fallback
// may change depending on current 'Default' DeviceType
virtual at::DeviceType EagerFallbackDeviceType() const = 0;


// Query all available backend devices
virtual std::vector<BackendDevice> GetBackendDevices() const = 0;

// Map a particular c10:: device to a concrete backend device
// Note:: c10:: devices may be virtual or concrete. xla:: and lazy:: are
// virtual devices, meaning they may map to a gpu, tpu, etc. behind the
// scenes. In the future, non-virtual c10:: devices may also use lazy tensors
// through a mode, in which case these APIs should still work, but should be
// identity mappings.
virtual BackendDevice GetBackendDevice(c10::Device device) const = 0;
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we also need the other way round?

virtual c10::Device GetCoreDevice(const BackendDevice& device) const = 0;

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what for? (just for symmetry sake, or do you have a use case in mind?)

Note that I added another API for EagerFallback device. This API might be what you're looking for.

Why?

  1. it's probably not that useful to translate from a virtual device like TPU back to the corresponding c10 'XLA' device, since the XLA backend can assume it's using the XLA device.
  2. for places in the code where we need to work with a real c10 device like CPU/cuda (for eager fallback, for example), we don't actually want to be told that TPU maps to XLA, we want to be told that TPU maps to c10::CPU, for fallback, however, GPU should map to c10::CUDA, for more efficient fallback.


// TODO(whc)
// Additional APIs expected for supporting distributed training, to be
// designed

/**
* Debug/Metrics
* */

// virtual std::map<std::string, Metric> GetMetrics() const = 0;

// virtual MemoryInfo GetMemoryInfo(const std::string& device) = 0;

virtual std::string GetComputationBackendText(
const ComputationPtr computation) const = 0;
};

extern std::atomic<const BackendImplInterface*> backend_impl_registry;

class TORCH_API BackendRegistrar {
public:
BackendRegistrar(const BackendImplInterface* backend_impl_interface);
};

inline const BackendImplInterface* TORCH_API getBackend() {
auto p = backend_impl_registry.load();
CHECK(p) << "Lazy tensor backend not registered.";
return p;
}

} // lazy
} // torch
19 changes: 19 additions & 0 deletions torch/csrc/lazy/backend/lowering_context.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#include <torch/csrc/lazy/backend/lowering_context.h>

namespace torch {
namespace lazy {

LoweringContext::LoweringContext(const std::string& name, BackendDevice device)
: device_(std::move(device)) {}

LoweringContext::LoweringContext(const std::string& name, BackendDevice device,
c10::ArrayRef<torch::lazy::Node*> post_order,
Util::EmissionMap emit_status)
: device_(std::move(device)), emit_status_(std::move(emit_status)) {}

const std::vector<BackendDataPtr>& LoweringContext::GetParametersData() const {
return parameters_;
}

} // namespace lazy
} // namespace torch
85 changes: 85 additions & 0 deletions torch/csrc/lazy/backend/lowering_context.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#pragma once

#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#include <torch/csrc/lazy/backend/backend_data.h>
#include <torch/csrc/lazy/backend/backend_device.h>
#include <torch/csrc/lazy/core/ir.h>
#include <torch/csrc/lazy/core/ir_util.h>

namespace torch {
namespace lazy {

class TORCH_API Computation {
public:
virtual int parameters_size() const = 0;

virtual const std::vector<Shape>& parameter_shapes() const = 0;

virtual const std::vector<std::string>& parameter_names() const = 0;

virtual const Shape& result_shape() const = 0;

virtual ~Computation() = default;
};

using ComputationPtr = std::shared_ptr<Computation>;

// Keeps track of the code generation state.
class TORCH_API LoweringContext {
public:
LoweringContext(const std::string& name, BackendDevice device);
LoweringContext(const std::string& name, BackendDevice device,
c10::ArrayRef<torch::lazy::Node*> post_order,
Util::EmissionMap emit_status);

virtual ~LoweringContext() = default;

static std::unique_ptr<LoweringContext> Create(
const std::string& name, BackendDevice device,
c10::ArrayRef<torch::lazy::Node*> post_order,
Util::EmissionMap emit_status);

static std::unique_ptr<LoweringContext> Create(const std::string& name,
BackendDevice device);

const BackendDevice& device() const { return device_; };

// Retrieves the vector holding all the tensors associated with the parameter
// instructions which have been created.
const std::vector<BackendDataPtr>&
GetParametersData() const;

// Get the shape of the result tuple component, given by index.
virtual Shape GetResultShape(size_t index) const = 0;

// Adds the given output as a component of the result tuple and returns its
// assigned position within the tuple.
virtual size_t AddResult(const torch::lazy::Output& output) = 0;

// Associates the given output with the input parameter of the given index and
// shape. Only used for the operator-by-operator execution, mostly for
// debugging purposes.
virtual void AddParameter(const torch::lazy::Output& output, size_t index,
const Shape& shape,
const std::string& name) = 0;

// Build the computation capturing all the operations created with the
// embedded builder (returned by the builder() API).
virtual ComputationPtr Build() = 0;

size_t GetEmittedNodeCount() const { return emit_status_.size(); }

protected:
BackendDevice device_;
std::vector<BackendDataPtr> parameters_;
std::vector<size_t> parameter_sequence_;
Util::EmissionMap emit_status_;
};

} // namespace lazy
} // namespace torch
3 changes: 3 additions & 0 deletions torch/csrc/lazy/core/hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ static inline hash_t Hash(const std::string& value) {
return DataHash(value.data(), value.size());
}

static inline hash_t Hash(const c10::string_view& value) {
return DataHash(value.data(), value.size());
}
// Taken from glibc's implementation of hashing optionals,
// we want to include a contribution to the hash to distinguish
// cases where one or another option was null, but we hope it doesn't
Expand Down
2 changes: 1 addition & 1 deletion torch/csrc/lazy/core/ir_metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ struct SourceLocation {
int line = -1;
};

void EmitShortFrameInfo(
TORCH_API void EmitShortFrameInfo(
std::ostream& stream,
const std::vector<SourceLocation>& frames);

Expand Down