-
Notifications
You must be signed in to change notification settings - Fork 24.7k
[PyTorch] Store Tensor explicitly in IValue #48824
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
b48bbb4
0198db9
cff661b
805c989
cd3ce5f
bda623d
1b6544b
d774458
48211e9
bad7e5e
fd28d3f
56a6608
3b45f26
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
… IValue" Enables following diff, which will make toTensor() return `const Tensor&` and allow callers to avoid refcounting overhead. Differential Revision: [D25324617](https://our.internmc.facebook.com/intern/diff/D25324617/) [ghstack-poisoned]
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -165,15 +165,13 @@ struct Capsule { | |
struct CAFFE2_API IValue final { | ||
IValue(const IValue& rhs) | ||
: IValue(rhs.payload, rhs.tag, rhs.is_intrusive_ptr) { | ||
if (is_intrusive_ptr && payload.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton()) { | ||
c10::raw::intrusive_ptr::incref(payload.as_intrusive_ptr); | ||
if (is_intrusive_ptr && payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton()) { | ||
c10::raw::intrusive_ptr::incref(payload.u.as_intrusive_ptr); | ||
} | ||
} | ||
|
||
IValue(IValue&& rhs) noexcept : tag(rhs.tag), is_intrusive_ptr(rhs.is_intrusive_ptr) { | ||
moveFrom(std::move(rhs)); | ||
rhs.tag = Tag::None; | ||
rhs.is_intrusive_ptr = false; | ||
} | ||
|
||
/// @private [doxygen private] | ||
|
@@ -290,7 +288,7 @@ struct CAFFE2_API IValue final { | |
} | ||
|
||
// Other types can be compared by their ptr value | ||
return this->payload.as_intrusive_ptr == rhs.payload.as_intrusive_ptr; | ||
return this->payload.u.as_intrusive_ptr == rhs.payload.u.as_intrusive_ptr; | ||
} | ||
|
||
/// @private [doxygen private] | ||
|
@@ -299,10 +297,10 @@ struct CAFFE2_API IValue final { | |
return 1; | ||
} | ||
|
||
if (payload.as_intrusive_ptr == c10::UndefinedTensorImpl::singleton()) { | ||
if (payload.u.as_intrusive_ptr == c10::UndefinedTensorImpl::singleton()) { | ||
return 0; | ||
} | ||
return c10::raw::intrusive_ptr::use_count(payload.as_intrusive_ptr); | ||
return c10::raw::intrusive_ptr::use_count(payload.u.as_intrusive_ptr); | ||
} | ||
|
||
/// @private [doxygen private] | ||
|
@@ -320,10 +318,7 @@ struct CAFFE2_API IValue final { | |
// make this abundantly clear. | ||
// | ||
// payload.as_tensor.~Tensor(); | ||
#pragma GCC diagnostic push | ||
#pragma GCC diagnostic ignored "-Wclass-memaccess" | ||
memcpy(&payload, &rhs.payload, sizeof(payload)); | ||
#pragma GCC diagnostic pop | ||
copyNontensorPayload(rhs.payload, rhs.tag); | ||
new (&rhs.payload.as_tensor) at::Tensor(std::move(t)); | ||
} else if (rhs.isTensor()) { | ||
rhs.swap(*this); | ||
|
@@ -357,7 +352,7 @@ struct CAFFE2_API IValue final { | |
// This is not an optional optimization: our incref call | ||
// *will not* do the right thing when called on an | ||
// undefined tensor. | ||
payload.as_intrusive_ptr = s.unsafeReleaseStorageImpl(); | ||
payload.u.as_intrusive_ptr = null_to_undefined_tensor(s.unsafeReleaseStorageImpl()); | ||
} | ||
bool isStorage() const { | ||
return Tag::Storage == tag; | ||
|
@@ -377,7 +372,7 @@ struct CAFFE2_API IValue final { | |
: tag(Tag::Blob), is_intrusive_ptr(true) { | ||
// TODO (after Tensor merge) If we pass in a Blob holding a Tensor, extract | ||
// and store it as a Tensor instead. | ||
payload.as_intrusive_ptr = null_to_undefined_tensor(blob.release()); | ||
payload.u.as_intrusive_ptr = null_to_undefined_tensor(blob.release()); | ||
} | ||
|
||
/// @private [doxygen private] | ||
|
@@ -433,14 +428,14 @@ struct CAFFE2_API IValue final { | |
|
||
// Double | ||
IValue(double d) : tag(Tag::Double), is_intrusive_ptr(false) { | ||
payload.as_double = d; | ||
payload.u.as_double = d; | ||
} | ||
bool isDouble() const { | ||
return Tag::Double == tag; | ||
} | ||
double toDouble() const { | ||
AT_ASSERT(isDouble()); | ||
return payload.as_double; | ||
return payload.u.as_double; | ||
} | ||
|
||
// Future | ||
|
@@ -469,7 +464,7 @@ struct CAFFE2_API IValue final { | |
|
||
// Int | ||
IValue(int64_t i) : tag(Tag::Int), is_intrusive_ptr(false) { | ||
payload.as_int = i; | ||
payload.u.as_int = i; | ||
} | ||
|
||
// allow you to pass literals (3, 4) without ambiguity | ||
|
@@ -481,7 +476,7 @@ struct CAFFE2_API IValue final { | |
|
||
int64_t toInt() const { | ||
AT_ASSERT(isInt()); | ||
return payload.as_int; | ||
return payload.u.as_int; | ||
} | ||
|
||
// Bool | ||
|
@@ -490,17 +485,17 @@ struct CAFFE2_API IValue final { | |
// Initializing entire payload stops valgrind's from reporting | ||
// "jump or move depends on uninitialised value" in IValue copy constructor | ||
// See https://github.com/pytorch/pytorch/issues/37117 | ||
payload.as_int = b; | ||
payload.u.as_int = b; | ||
#else | ||
payload.as_bool = b; | ||
payload.u.as_bool = b; | ||
#endif | ||
} | ||
bool isBool() const { | ||
return Tag::Bool == tag; | ||
} | ||
bool toBool() const { | ||
AT_ASSERT(isBool()); | ||
return payload.as_bool; | ||
return payload.u.as_bool; | ||
} | ||
|
||
// IntList | ||
|
@@ -652,21 +647,21 @@ struct CAFFE2_API IValue final { | |
|
||
// Device | ||
IValue(c10::Device d) : tag(Tag::Device), is_intrusive_ptr(false) { | ||
payload.as_device.type = d.type(); | ||
payload.as_device.index = d.index(); | ||
payload.u.as_device.type = d.type(); | ||
payload.u.as_device.index = d.index(); | ||
} | ||
bool isDevice() const { | ||
return Tag::Device == tag; | ||
} | ||
c10::Device toDevice() const { | ||
AT_ASSERT(isDevice()); | ||
return c10::Device(payload.as_device.type, payload.as_device.index); | ||
return c10::Device(payload.u.as_device.type, payload.u.as_device.index); | ||
} | ||
|
||
//Stream | ||
IValue(c10::Stream stream) | ||
: tag(Tag::Stream), is_intrusive_ptr(false) { | ||
payload.as_int = stream.pack(); | ||
payload.u.as_int = stream.pack(); | ||
} | ||
c10::Stream toStream() &&; | ||
c10::Stream toStream() const &; | ||
|
@@ -695,7 +690,7 @@ struct CAFFE2_API IValue final { | |
|
||
// QScheme | ||
IValue(at::QScheme qscheme) : tag(Tag::Int), is_intrusive_ptr(false) { | ||
payload.as_int = static_cast<int64_t>(qscheme); | ||
payload.u.as_int = static_cast<int64_t>(qscheme); | ||
} | ||
|
||
at::QScheme toQScheme() const { | ||
|
@@ -716,7 +711,7 @@ struct CAFFE2_API IValue final { | |
// This is not an optional optimization: our incref call | ||
// *will not* do the right thing when called on an | ||
// undefined generator. | ||
payload.as_intrusive_ptr = null_to_undefined_tensor(g.unsafeReleaseGeneratorImpl()); | ||
payload.u.as_intrusive_ptr = null_to_undefined_tensor(g.unsafeReleaseGeneratorImpl()); | ||
} | ||
bool isGenerator() const { | ||
return Tag::Generator == tag; | ||
|
@@ -792,7 +787,7 @@ struct CAFFE2_API IValue final { | |
const void* internalToPointer() const { | ||
TORCH_INTERNAL_ASSERT( | ||
isPtrType(), "Can only call internalToPointer() for pointer types"); | ||
return payload.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton() ? payload.as_intrusive_ptr : nullptr; | ||
return payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton() ? payload.u.as_intrusive_ptr : nullptr; | ||
} | ||
|
||
TypePtr type() const; | ||
|
@@ -806,7 +801,7 @@ struct CAFFE2_API IValue final { | |
} | ||
// If it is not a Tensor, then two mutable IValues alias each other only | ||
// if they are the same pointer. | ||
return val.payload.as_int; | ||
return val.payload.u.as_int; | ||
} | ||
}; | ||
|
||
|
@@ -868,7 +863,7 @@ struct CAFFE2_API IValue final { | |
// the compiler to generate the same code for each case. It is | ||
// surprisingly difficult to get this right. | ||
if (isTensor() || is_intrusive_ptr) { | ||
c10::intrusive_ptr_target* p = isTensor() ? payload.as_tensor.unsafeGetTensorImpl() : payload.as_intrusive_ptr; | ||
c10::intrusive_ptr_target* p = isTensor() ? payload.as_tensor.unsafeGetTensorImpl() : payload.u.as_intrusive_ptr; | ||
c10::intrusive_ptr<intrusive_ptr_target, c10::UndefinedTensorImpl>::reclaim(p); | ||
// No need to make this destructor call! | ||
// payload.as_tensor.~Tensor(); | ||
|
@@ -888,51 +883,56 @@ struct CAFFE2_API IValue final { | |
// | ||
// rhs.payload.as_tensor.~Tensor(); | ||
} else { | ||
#pragma GCC diagnostic push | ||
#pragma GCC diagnostic ignored "-Wclass-memaccess" | ||
memcpy(&payload, &rhs.payload, sizeof(payload)); | ||
#pragma GCC diagnostic pop | ||
copyNontensorPayload(rhs.payload, rhs.tag); | ||
} | ||
tag = rhs.tag; | ||
is_intrusive_ptr = rhs.is_intrusive_ptr; | ||
rhs.clearToNone(); | ||
} | ||
|
||
void clearToNone() noexcept { | ||
payload.as_int = 0; | ||
payload.u.as_int = 0; | ||
tag = Tag::None; | ||
is_intrusive_ptr = false; | ||
} | ||
|
||
union Payload { | ||
int64_t as_int; | ||
double as_double; | ||
bool as_bool; | ||
// Invariant: never nullptr; null state is represented as | ||
// c10::UndefinedTensorImpl::singleton() for consistency of | ||
// representation with Tensor. | ||
c10::intrusive_ptr_target* as_intrusive_ptr; | ||
// We use a nested union here so that we can make the copy easy | ||
// and efficient in the non-tensor (i.e., trivially copyable) | ||
// case. Specifically, we do not have to do a switch-on-tag to | ||
// figure out which union member to assign; we can just use | ||
// TriviallyCopyablePayload::operator=. | ||
union TriviallyCopyablePayload { | ||
TriviallyCopyablePayload() : as_int(0) {} | ||
int64_t as_int; | ||
double as_double; | ||
bool as_bool; | ||
// Invariant: never nullptr; null state is represented as | ||
// c10::UndefinedTensorImpl::singleton() for consistency of | ||
// representation with Tensor. | ||
c10::intrusive_ptr_target* as_intrusive_ptr; | ||
struct { | ||
DeviceType type; | ||
DeviceIndex index; | ||
} as_device; | ||
} u; | ||
at::Tensor as_tensor; | ||
struct { | ||
DeviceType type; | ||
DeviceIndex index; | ||
} as_device; | ||
|
||
Payload() : as_int(0) {} | ||
Payload() : u() {} | ||
~Payload() {} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reason you're user-defining the destructor? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unions with non-POD types in them are a pain. The destructor cannot be defined by default -- do you run ~Tensor() or not? So, we have to define it to do nothing. |
||
}; | ||
|
||
IValue(const Payload& p, Tag t, bool i) : tag(t), is_intrusive_ptr(i) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. even the largest Payload should be 64bit only and Payload has trivial copy/move constructors, so I would assume passing by value is better. Is passing by reference here related to the Itanium ABI thing you posted about? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Not with |
||
if (isTensor()) { | ||
new (&payload.as_tensor) at::Tensor(p.as_tensor); | ||
} else { | ||
#pragma GCC diagnostic push | ||
#pragma GCC diagnostic ignored "-Wclass-memaccess" | ||
memcpy(&payload, &p, sizeof(payload)); | ||
#pragma GCC diagnostic pop | ||
copyNontensorPayload(p, t); | ||
} | ||
} | ||
|
||
void copyNontensorPayload(const Payload& from, Tag t) noexcept { | ||
payload.u = from.u; | ||
} | ||
|
||
Payload payload; | ||
Tag tag; | ||
bool is_intrusive_ptr; | ||
|
@@ -957,8 +957,8 @@ struct CAFFE2_API WeakIValue final { | |
payload.as_intrusive_ptr = rhs.unsafeToTensorImpl(); | ||
is_intrusive_ptr = true; | ||
} else { | ||
static_assert(sizeof(payload) == sizeof(rhs.payload), "IValue and WeakIValue payload sizes don't match!"); | ||
memcpy(&payload, &rhs.payload, sizeof(payload)); | ||
static_assert(sizeof(payload) == sizeof(rhs.payload.u), "WeakIValue payload is out of sync"); | ||
memcpy(&payload, &rhs.payload.u, sizeof(payload)); | ||
} | ||
if (is_intrusive_ptr) { | ||
if (payload.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton()) { | ||
|
@@ -996,10 +996,8 @@ struct CAFFE2_API WeakIValue final { | |
IValue lock() const { | ||
if (!is_intrusive_ptr) { | ||
IValue::Payload newPayload; | ||
#pragma GCC diagnostic push | ||
#pragma GCC diagnostic ignored "-Wclass-memaccess" | ||
memcpy(&newPayload, &payload, sizeof(newPayload)); | ||
#pragma GCC diagnostic pop | ||
static_assert(sizeof(payload) == sizeof(newPayload.u), "WeakIValue payload is out of sync"); | ||
memcpy(&newPayload.u, &payload, sizeof(payload)); | ||
return IValue(newPayload, tag, false); | ||
} | ||
if (IValue::Tag::Tensor == tag) { | ||
|
@@ -1018,9 +1016,9 @@ struct CAFFE2_API WeakIValue final { | |
? nullptr | ||
: payload.as_intrusive_ptr); | ||
IValue::Payload pl; | ||
pl.as_intrusive_ptr = temp.lock().release(); | ||
pl.u.as_intrusive_ptr = temp.lock().release(); | ||
temp.release(); | ||
if (!pl.as_intrusive_ptr) { | ||
if (!pl.u.as_intrusive_ptr) { | ||
return IValue(); | ||
} else { | ||
return IValue(pl, tag, true); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is potentially slow because it needs to do the
isTensor
checks again (depending on how smart the compiler is with inlining this and proving that the extra branches are never executed). Not sure if relevant in practice, but if you want to optimize it, you could just move lines 332 to 335 into their own subfunctionswapWithTensor(lhs, rhs)
or something like that and call it from both theisTensor()
andrhs.isTensor()
case.