Skip to content

[PyTorch] Store Tensor explicitly in IValue #48824

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 13 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
rebase, add a few more tests on "[PyTorch] Store Tensor explicitly in…
… IValue"

Enables following diff, which will make toTensor() return
`const Tensor&` and allow callers to avoid refcounting overhead.

Differential Revision: [D25324617](https://our.internmc.facebook.com/intern/diff/D25324617/)

[ghstack-poisoned]
  • Loading branch information
swolchok committed Dec 18, 2020
commit d774458f5f0233a353f27fcb408e0369fdf92fc7
10 changes: 6 additions & 4 deletions aten/src/ATen/core/ivalue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ bool IValue::ptrEqual(const IValue& lhs, const IValue& rhs) {
TORCH_INTERNAL_ASSERT(lhs.is_intrusive_ptr);
TORCH_INTERNAL_ASSERT(rhs.is_intrusive_ptr);
return lhs.tag == rhs.tag &&
lhs.payload.as_intrusive_ptr == rhs.payload.as_intrusive_ptr;
lhs.payload.u.as_intrusive_ptr == rhs.payload.u.as_intrusive_ptr;
}

IValue IValue::equals(const IValue& rhs) const {
Expand Down Expand Up @@ -307,15 +307,17 @@ size_t IValue::hash(const IValue& v) {
case Tag::None:
return 0;
case Tag::Bool:
return c10::get_hash(v.payload.as_bool);
return c10::get_hash(v.payload.u.as_bool);
case Tag::Double:
return c10::get_hash(v.payload.as_double);
return c10::get_hash(v.payload.u.as_double);
case Tag::Tensor:
// Tensor __hash__ is equivalent to `id()`, so take the pointer value of
// the tensor to emulate it
return c10::get_hash(v.payload.as_tensor.unsafeGetTensorImpl());
case Tag::Storage:
return c10::get_hash(v.payload.u.as_int);
case Tag::Int:
return c10::get_hash(v.payload.as_int);
return c10::get_hash(v.payload.u.as_int);
case Tag::String:
return c10::get_hash(v.toStringRef());
case Tag::Tuple:
Expand Down
116 changes: 57 additions & 59 deletions aten/src/ATen/core/ivalue.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,15 +165,13 @@ struct Capsule {
struct CAFFE2_API IValue final {
IValue(const IValue& rhs)
: IValue(rhs.payload, rhs.tag, rhs.is_intrusive_ptr) {
if (is_intrusive_ptr && payload.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton()) {
c10::raw::intrusive_ptr::incref(payload.as_intrusive_ptr);
if (is_intrusive_ptr && payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton()) {
c10::raw::intrusive_ptr::incref(payload.u.as_intrusive_ptr);
}
}

IValue(IValue&& rhs) noexcept : tag(rhs.tag), is_intrusive_ptr(rhs.is_intrusive_ptr) {
moveFrom(std::move(rhs));
rhs.tag = Tag::None;
rhs.is_intrusive_ptr = false;
}

/// @private [doxygen private]
Expand Down Expand Up @@ -290,7 +288,7 @@ struct CAFFE2_API IValue final {
}

// Other types can be compared by their ptr value
return this->payload.as_intrusive_ptr == rhs.payload.as_intrusive_ptr;
return this->payload.u.as_intrusive_ptr == rhs.payload.u.as_intrusive_ptr;
}

/// @private [doxygen private]
Expand All @@ -299,10 +297,10 @@ struct CAFFE2_API IValue final {
return 1;
}

if (payload.as_intrusive_ptr == c10::UndefinedTensorImpl::singleton()) {
if (payload.u.as_intrusive_ptr == c10::UndefinedTensorImpl::singleton()) {
return 0;
}
return c10::raw::intrusive_ptr::use_count(payload.as_intrusive_ptr);
return c10::raw::intrusive_ptr::use_count(payload.u.as_intrusive_ptr);
}

/// @private [doxygen private]
Expand All @@ -320,10 +318,7 @@ struct CAFFE2_API IValue final {
// make this abundantly clear.
//
// payload.as_tensor.~Tensor();
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wclass-memaccess"
memcpy(&payload, &rhs.payload, sizeof(payload));
#pragma GCC diagnostic pop
copyNontensorPayload(rhs.payload, rhs.tag);
new (&rhs.payload.as_tensor) at::Tensor(std::move(t));
} else if (rhs.isTensor()) {
rhs.swap(*this);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is potentially slow because it needs to do the isTensor checks again (depending on how smart the compiler is with inlining this and proving that the extra branches are never executed). Not sure if relevant in practice, but if you want to optimize it, you could just move lines 332 to 335 into their own subfunction swapWithTensor(lhs, rhs) or something like that and call it from both the isTensor() and rhs.isTensor() case.

Expand Down Expand Up @@ -357,7 +352,7 @@ struct CAFFE2_API IValue final {
// This is not an optional optimization: our incref call
// *will not* do the right thing when called on an
// undefined tensor.
payload.as_intrusive_ptr = s.unsafeReleaseStorageImpl();
payload.u.as_intrusive_ptr = null_to_undefined_tensor(s.unsafeReleaseStorageImpl());
}
bool isStorage() const {
return Tag::Storage == tag;
Expand All @@ -377,7 +372,7 @@ struct CAFFE2_API IValue final {
: tag(Tag::Blob), is_intrusive_ptr(true) {
// TODO (after Tensor merge) If we pass in a Blob holding a Tensor, extract
// and store it as a Tensor instead.
payload.as_intrusive_ptr = null_to_undefined_tensor(blob.release());
payload.u.as_intrusive_ptr = null_to_undefined_tensor(blob.release());
}

/// @private [doxygen private]
Expand Down Expand Up @@ -433,14 +428,14 @@ struct CAFFE2_API IValue final {

// Double
IValue(double d) : tag(Tag::Double), is_intrusive_ptr(false) {
payload.as_double = d;
payload.u.as_double = d;
}
bool isDouble() const {
return Tag::Double == tag;
}
double toDouble() const {
AT_ASSERT(isDouble());
return payload.as_double;
return payload.u.as_double;
}

// Future
Expand Down Expand Up @@ -469,7 +464,7 @@ struct CAFFE2_API IValue final {

// Int
IValue(int64_t i) : tag(Tag::Int), is_intrusive_ptr(false) {
payload.as_int = i;
payload.u.as_int = i;
}

// allow you to pass literals (3, 4) without ambiguity
Expand All @@ -481,7 +476,7 @@ struct CAFFE2_API IValue final {

int64_t toInt() const {
AT_ASSERT(isInt());
return payload.as_int;
return payload.u.as_int;
}

// Bool
Expand All @@ -490,17 +485,17 @@ struct CAFFE2_API IValue final {
// Initializing entire payload stops valgrind's from reporting
// "jump or move depends on uninitialised value" in IValue copy constructor
// See https://github.com/pytorch/pytorch/issues/37117
payload.as_int = b;
payload.u.as_int = b;
#else
payload.as_bool = b;
payload.u.as_bool = b;
#endif
}
bool isBool() const {
return Tag::Bool == tag;
}
bool toBool() const {
AT_ASSERT(isBool());
return payload.as_bool;
return payload.u.as_bool;
}

// IntList
Expand Down Expand Up @@ -652,21 +647,21 @@ struct CAFFE2_API IValue final {

// Device
IValue(c10::Device d) : tag(Tag::Device), is_intrusive_ptr(false) {
payload.as_device.type = d.type();
payload.as_device.index = d.index();
payload.u.as_device.type = d.type();
payload.u.as_device.index = d.index();
}
bool isDevice() const {
return Tag::Device == tag;
}
c10::Device toDevice() const {
AT_ASSERT(isDevice());
return c10::Device(payload.as_device.type, payload.as_device.index);
return c10::Device(payload.u.as_device.type, payload.u.as_device.index);
}

//Stream
IValue(c10::Stream stream)
: tag(Tag::Stream), is_intrusive_ptr(false) {
payload.as_int = stream.pack();
payload.u.as_int = stream.pack();
}
c10::Stream toStream() &&;
c10::Stream toStream() const &;
Expand Down Expand Up @@ -695,7 +690,7 @@ struct CAFFE2_API IValue final {

// QScheme
IValue(at::QScheme qscheme) : tag(Tag::Int), is_intrusive_ptr(false) {
payload.as_int = static_cast<int64_t>(qscheme);
payload.u.as_int = static_cast<int64_t>(qscheme);
}

at::QScheme toQScheme() const {
Expand All @@ -716,7 +711,7 @@ struct CAFFE2_API IValue final {
// This is not an optional optimization: our incref call
// *will not* do the right thing when called on an
// undefined generator.
payload.as_intrusive_ptr = null_to_undefined_tensor(g.unsafeReleaseGeneratorImpl());
payload.u.as_intrusive_ptr = null_to_undefined_tensor(g.unsafeReleaseGeneratorImpl());
}
bool isGenerator() const {
return Tag::Generator == tag;
Expand Down Expand Up @@ -792,7 +787,7 @@ struct CAFFE2_API IValue final {
const void* internalToPointer() const {
TORCH_INTERNAL_ASSERT(
isPtrType(), "Can only call internalToPointer() for pointer types");
return payload.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton() ? payload.as_intrusive_ptr : nullptr;
return payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton() ? payload.u.as_intrusive_ptr : nullptr;
}

TypePtr type() const;
Expand All @@ -806,7 +801,7 @@ struct CAFFE2_API IValue final {
}
// If it is not a Tensor, then two mutable IValues alias each other only
// if they are the same pointer.
return val.payload.as_int;
return val.payload.u.as_int;
}
};

Expand Down Expand Up @@ -868,7 +863,7 @@ struct CAFFE2_API IValue final {
// the compiler to generate the same code for each case. It is
// surprisingly difficult to get this right.
if (isTensor() || is_intrusive_ptr) {
c10::intrusive_ptr_target* p = isTensor() ? payload.as_tensor.unsafeGetTensorImpl() : payload.as_intrusive_ptr;
c10::intrusive_ptr_target* p = isTensor() ? payload.as_tensor.unsafeGetTensorImpl() : payload.u.as_intrusive_ptr;
c10::intrusive_ptr<intrusive_ptr_target, c10::UndefinedTensorImpl>::reclaim(p);
// No need to make this destructor call!
// payload.as_tensor.~Tensor();
Expand All @@ -888,51 +883,56 @@ struct CAFFE2_API IValue final {
//
// rhs.payload.as_tensor.~Tensor();
} else {
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wclass-memaccess"
memcpy(&payload, &rhs.payload, sizeof(payload));
#pragma GCC diagnostic pop
copyNontensorPayload(rhs.payload, rhs.tag);
}
tag = rhs.tag;
is_intrusive_ptr = rhs.is_intrusive_ptr;
rhs.clearToNone();
}

void clearToNone() noexcept {
payload.as_int = 0;
payload.u.as_int = 0;
tag = Tag::None;
is_intrusive_ptr = false;
}

union Payload {
int64_t as_int;
double as_double;
bool as_bool;
// Invariant: never nullptr; null state is represented as
// c10::UndefinedTensorImpl::singleton() for consistency of
// representation with Tensor.
c10::intrusive_ptr_target* as_intrusive_ptr;
// We use a nested union here so that we can make the copy easy
// and efficient in the non-tensor (i.e., trivially copyable)
// case. Specifically, we do not have to do a switch-on-tag to
// figure out which union member to assign; we can just use
// TriviallyCopyablePayload::operator=.
union TriviallyCopyablePayload {
TriviallyCopyablePayload() : as_int(0) {}
int64_t as_int;
double as_double;
bool as_bool;
// Invariant: never nullptr; null state is represented as
// c10::UndefinedTensorImpl::singleton() for consistency of
// representation with Tensor.
c10::intrusive_ptr_target* as_intrusive_ptr;
struct {
DeviceType type;
DeviceIndex index;
} as_device;
} u;
at::Tensor as_tensor;
struct {
DeviceType type;
DeviceIndex index;
} as_device;

Payload() : as_int(0) {}
Payload() : u() {}
~Payload() {}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason you're user-defining the destructor? = default should do the trick and would not make the destructor user defined, or just keep it omitted as before.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unions with non-POD types in them are a pain. The destructor cannot be defined by default -- do you run ~Tensor() or not? So, we have to define it to do nothing.

};

IValue(const Payload& p, Tag t, bool i) : tag(t), is_intrusive_ptr(i) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

even the largest Payload should be 64bit only and Payload has trivial copy/move constructors, so I would assume passing by value is better. Is passing by reference here related to the Itanium ABI thing you posted about?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Payload has trivial copy/move constructors

Not with Tensor in it -- do you run the Tensor copy/move constructors or not? It's not copyable.

if (isTensor()) {
new (&payload.as_tensor) at::Tensor(p.as_tensor);
} else {
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wclass-memaccess"
memcpy(&payload, &p, sizeof(payload));
#pragma GCC diagnostic pop
copyNontensorPayload(p, t);
}
}

void copyNontensorPayload(const Payload& from, Tag t) noexcept {
payload.u = from.u;
}

Payload payload;
Tag tag;
bool is_intrusive_ptr;
Expand All @@ -957,8 +957,8 @@ struct CAFFE2_API WeakIValue final {
payload.as_intrusive_ptr = rhs.unsafeToTensorImpl();
is_intrusive_ptr = true;
} else {
static_assert(sizeof(payload) == sizeof(rhs.payload), "IValue and WeakIValue payload sizes don't match!");
memcpy(&payload, &rhs.payload, sizeof(payload));
static_assert(sizeof(payload) == sizeof(rhs.payload.u), "WeakIValue payload is out of sync");
memcpy(&payload, &rhs.payload.u, sizeof(payload));
}
if (is_intrusive_ptr) {
if (payload.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton()) {
Expand Down Expand Up @@ -996,10 +996,8 @@ struct CAFFE2_API WeakIValue final {
IValue lock() const {
if (!is_intrusive_ptr) {
IValue::Payload newPayload;
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wclass-memaccess"
memcpy(&newPayload, &payload, sizeof(newPayload));
#pragma GCC diagnostic pop
static_assert(sizeof(payload) == sizeof(newPayload.u), "WeakIValue payload is out of sync");
memcpy(&newPayload.u, &payload, sizeof(payload));
return IValue(newPayload, tag, false);
}
if (IValue::Tag::Tensor == tag) {
Expand All @@ -1018,9 +1016,9 @@ struct CAFFE2_API WeakIValue final {
? nullptr
: payload.as_intrusive_ptr);
IValue::Payload pl;
pl.as_intrusive_ptr = temp.lock().release();
pl.u.as_intrusive_ptr = temp.lock().release();
temp.release();
if (!pl.as_intrusive_ptr) {
if (!pl.u.as_intrusive_ptr) {
return IValue();
} else {
return IValue(pl, tag, true);
Expand Down
Loading
You are viewing a condensed version of this merge commit. You can view the full changes here.