@@ -163,23 +163,34 @@ struct CAFFE2_API IValue final {
163
163
c10::raw::intrusive_ptr::incref (payload.as_intrusive_ptr );
164
164
}
165
165
}
166
- IValue (IValue&& rhs) noexcept : IValue() {
167
- swap (rhs);
166
+ IValue (IValue&& rhs) noexcept : tag(rhs.tag), is_intrusive_ptr(rhs.is_intrusive_ptr) {
167
+ moveFrom (std::move (rhs));
168
+ rhs.tag = Tag::None;
169
+ rhs.is_intrusive_ptr = false ;
168
170
}
171
+
169
172
// / @private [doxygen private]
170
173
~IValue () {
171
- if (is_intrusive_ptr) {
172
- c10::raw::intrusive_ptr::decref (payload.as_intrusive_ptr );
173
- }
174
+ destroy ();
174
175
}
175
- IValue& operator =(IValue&& rhs) & noexcept {
176
- IValue (std::move (rhs)).swap (*this ); // this also sets rhs to None
176
+
177
+ // Always-inline for performance -- this gets called frequently
178
+ // inside the core of the static runtime.
179
+ C10_ALWAYS_INLINE IValue& operator =(IValue&& rhs) & noexcept {
180
+ if (&rhs == this ) {
181
+ return *this ;
182
+ }
183
+
184
+ destroy ();
185
+ moveFrom (std::move (rhs));
177
186
return *this ;
178
187
}
188
+
179
189
IValue& operator =(IValue const & rhs) & {
180
190
IValue (rhs).swap (*this );
181
191
return *this ;
182
192
}
193
+
183
194
void dump () const ;
184
195
185
196
/* *
@@ -288,7 +299,27 @@ struct CAFFE2_API IValue final {
288
299
289
300
// / @private [doxygen private]
290
301
void swap (IValue& rhs) noexcept {
291
- std::swap (payload, rhs.payload );
302
+ if (isTensor () && rhs.isTensor ()) {
303
+ std::swap (payload.as_tensor , rhs.payload .as_tensor );
304
+ } else if (isTensor ()) {
305
+ at::Tensor t = std::move (payload.as_tensor );
306
+ // As far as I can tell, omitting the usual explicit destructor call
307
+ // is not UB in and of itself, and it's a slight perf win. The
308
+ // destructor is a no-op, because the moved-from Tensor is
309
+ // effectively an intrusive_ptr in the null state, so we don't need
310
+ // the behavior for correctness reasons either. Leaving this
311
+ // explanatory comment, including commented-out destructor call, to
312
+ // make this abundantly clear.
313
+ //
314
+ // payload.as_tensor.~Tensor();
315
+ memcpy (&payload, &rhs.payload , sizeof (payload));
316
+ new (&rhs.payload .as_tensor ) at::Tensor (std::move (t));
317
+ } else if (rhs.isTensor ()) {
318
+ rhs.swap (*this );
319
+ return ;
320
+ } else {
321
+ std::swap (reinterpret_cast <char (&)[sizeof (payload)]>(*&payload), reinterpret_cast <char (&)[sizeof (payload)]>(*&rhs.payload ));
322
+ }
292
323
std::swap (is_intrusive_ptr, rhs.is_intrusive_ptr );
293
324
std::swap (tag, rhs.tag );
294
325
}
@@ -297,21 +328,16 @@ struct CAFFE2_API IValue final {
297
328
// While some of these accessors could be generated through templates,
298
329
// we prefer to write them manually for clarity
299
330
300
- IValue (at::Tensor t) : tag(Tag::Tensor), is_intrusive_ptr(t.defined()) {
301
- // Note: the undefined tensor is not refcounted, so while it
302
- // is tagged as a tensor, is_intrusive_ptr is set to false.
303
- // This is not an optional optimization: our incref call
304
- // *will not* do the right thing when called on an
305
- // undefined tensor.
306
- payload.as_intrusive_ptr = t.unsafeReleaseTensorImpl ();
331
+ IValue (at::Tensor t) : tag(Tag::Tensor), is_intrusive_ptr(false ) {
332
+ new (&payload.as_tensor ) at::Tensor (std::move (t));
307
333
}
308
334
bool isTensor () const {
309
335
return Tag::Tensor == tag;
310
336
}
311
337
at::Tensor toTensor () &&;
312
338
at::Tensor toTensor () const &;
313
339
at::TensorImpl* unsafeToTensorImpl () const {
314
- return static_cast <at::TensorImpl*>( payload.as_intrusive_ptr );
340
+ return payload.as_tensor . unsafeGetTensorImpl ( );
315
341
}
316
342
317
343
const IValue& toIValue () const {
@@ -565,7 +591,7 @@ struct CAFFE2_API IValue final {
565
591
c10::intrusive_ptr<ivalue::EnumHolder> toEnumHolder () const &;
566
592
567
593
// None
568
- IValue () : payload{ 0 }, tag(Tag::None), is_intrusive_ptr(false ) {}
594
+ IValue () : tag(Tag::None), is_intrusive_ptr(false ) {}
569
595
bool isNone () const {
570
596
return Tag::None == tag;
571
597
}
@@ -815,7 +841,35 @@ struct CAFFE2_API IValue final {
815
841
class NullType = c10::detail::intrusive_target_default_null_type<T>>
816
842
c10::intrusive_ptr<T, NullType> toIntrusivePtr () const ;
817
843
818
- void clearToNone () {
844
+ void destroy () {
845
+ if (isTensor ()) {
846
+ payload.as_tensor .~Tensor ();
847
+ } else if (is_intrusive_ptr) {
848
+ c10::raw::intrusive_ptr::decref (payload.as_intrusive_ptr );
849
+ }
850
+ }
851
+
852
+ C10_ALWAYS_INLINE void moveFrom (IValue&& rhs) noexcept {
853
+ if (rhs.isTensor ()) {
854
+ new (&payload.as_tensor ) at::Tensor (std::move (rhs.payload .as_tensor ));
855
+ // As far as I can tell, omitting the usual explicit destructor call
856
+ // is not UB in and of itself, and it's a slight perf win. The
857
+ // destructor is a no-op, because the moved-from Tensor is
858
+ // effectively an intrusive_ptr in the null state, so we don't need
859
+ // the behavior for correctness reasons either. Leaving this
860
+ // explanatory comment, including commented-out destructor call, to
861
+ // make this abundantly clear.
862
+ //
863
+ // rhs.payload.as_tensor.~Tensor();
864
+ } else {
865
+ memcpy (&payload, &rhs.payload , sizeof (payload));
866
+ }
867
+ tag = rhs.tag ;
868
+ is_intrusive_ptr = rhs.is_intrusive_ptr ;
869
+ rhs.clearToNone ();
870
+ }
871
+
872
+ void clearToNone () noexcept {
819
873
payload.as_int = 0 ;
820
874
tag = Tag::None;
821
875
is_intrusive_ptr = false ;
@@ -826,13 +880,23 @@ struct CAFFE2_API IValue final {
826
880
double as_double;
827
881
bool as_bool;
828
882
c10::intrusive_ptr_target* as_intrusive_ptr;
883
+ at::Tensor as_tensor;
829
884
struct {
830
885
DeviceType type;
831
886
DeviceIndex index;
832
887
} as_device;
888
+
889
+ Payload () : as_int (0 ) {}
890
+ ~Payload () {}
833
891
};
834
892
835
- IValue (Payload p, Tag t, bool i) : payload(p), tag(t), is_intrusive_ptr(i) {}
893
+ IValue (const Payload& p, Tag t, bool i) : tag(t), is_intrusive_ptr(i) {
894
+ if (isTensor ()) {
895
+ new (&payload.as_tensor ) at::Tensor (p.as_tensor );
896
+ } else {
897
+ memcpy (&payload, &p, sizeof (payload));
898
+ }
899
+ }
836
900
837
901
Payload payload;
838
902
Tag tag;
@@ -852,9 +916,14 @@ struct CAFFE2_API WeakIValue final {
852
916
}
853
917
}
854
918
WeakIValue (const IValue& rhs)
855
- : payload(rhs.payload),
856
- tag(rhs.tag),
919
+ : tag(rhs.tag),
857
920
is_intrusive_ptr(rhs.is_intrusive_ptr) {
921
+ if (rhs.isTensor ()) {
922
+ payload.as_intrusive_ptr = rhs.unsafeToTensorImpl ();
923
+ } else {
924
+ static_assert (sizeof (payload) == sizeof (rhs.payload ), " IValue and WeakIValue payload sizes don't match!" );
925
+ memcpy (&payload, &rhs.payload , sizeof (payload));
926
+ }
858
927
if (is_intrusive_ptr) {
859
928
c10::raw::weak_intrusive_ptr::incref (payload.as_intrusive_ptr );
860
929
}
@@ -888,17 +957,28 @@ struct CAFFE2_API WeakIValue final {
888
957
889
958
IValue lock () const {
890
959
if (!is_intrusive_ptr) {
891
- return IValue (payload, tag, false );
960
+ IValue::Payload newPayload;
961
+ memcpy (&newPayload, &payload, sizeof (newPayload));
962
+ return IValue (newPayload, tag, false );
892
963
}
893
964
auto temp = c10::weak_intrusive_ptr<c10::intrusive_ptr_target>::reclaim (
894
965
payload.as_intrusive_ptr );
895
- IValue::Payload pl;
896
- pl.as_intrusive_ptr = temp.lock ().release ();
897
- temp.release ();
898
- if (!pl.as_intrusive_ptr ) {
899
- return IValue ();
966
+ if (IValue::Tag::Tensor == tag) {
967
+ auto ip = temp.lock ().release ();
968
+ if (!ip) {
969
+ return IValue ();
970
+ } else {
971
+ return IValue (std::move (ip));
972
+ }
900
973
} else {
901
- return IValue (pl, tag, true );
974
+ IValue::Payload pl;
975
+ pl.as_intrusive_ptr = temp.lock ().release ();
976
+ temp.release ();
977
+ if (!pl.as_intrusive_ptr ) {
978
+ return IValue ();
979
+ } else {
980
+ return IValue (pl, tag, true );
981
+ }
902
982
}
903
983
}
904
984
@@ -928,7 +1008,17 @@ struct CAFFE2_API WeakIValue final {
928
1008
}
929
1009
930
1010
private:
931
- IValue::Payload payload;
1011
+ union Payload {
1012
+ int64_t as_int;
1013
+ double as_double;
1014
+ bool as_bool;
1015
+ c10::intrusive_ptr_target* as_intrusive_ptr;
1016
+ struct {
1017
+ DeviceType type;
1018
+ DeviceIndex index;
1019
+ } as_device;
1020
+ };
1021
+ Payload payload;
932
1022
IValue::Tag tag;
933
1023
bool is_intrusive_ptr;
934
1024
};
0 commit comments