Skip to content

Commit df5b469

Browse files
swolchokfacebook-github-bot
authored andcommitted
[Pytorch] Specialize guts of c10::optional for 32-bit scalars (#47015)
Summary: Pull Request resolved: #47015 c10::optional has non-trivial copy and move operations always. This change specializes it for 32-bit scalars so that it has trivial copy and move operations in that case. Ideally, we would instead rely on P0602 "variant and optional should propagate copy/move triviality" and use `std::optional` (or implement that functionality ourselves). We can't use `std::optional` because we are stuck with C++14. Implementing the full P0602 ourselves would add even more complexity. We could do it, but this should be a helpful first step. ghstack-source-id: 115886743 Test Plan: Collect Callgrind instruction counts for `torch.empty(())`. Data: Make empty c10-ful (#46092): ``` <torch.utils.benchmark.utils.valgrind_wrapper.timer_interface.CallgrindStats object at 0x7ffaed1128e0> torch.empty(()) All Noisy symbols removed Instructions: 648005 632899 Baseline: 4144 3736 100 runs per measurement, 1 thread ``` This diff atop #46092: ``` <torch.utils.benchmark.utils.valgrind_wrapper.timer_interface.CallgrindStats object at 0x7f943f1dc8e0> torch.empty(()) All Noisy symbols removed Instructions: 602347 591005 Baseline: 4106 3736 100 runs per measurement, 1 thread ``` (6.6% improvement vs #46092) Pass optionals by const reference (#46598) ``` <torch.utils.benchmark.utils.valgrind_wrapper.timer_interface.CallgrindStats object at 0x7f1abb3988e0> torch.empty(()) All Noisy symbols removed Instructions: 601349 590005 Baseline: 4162 3736 100 runs per measurement, 1 thread ``` (6.8% improvement vs #46092) This diff atop #46598 (i.e., both together) ``` <torch.utils.benchmark.utils.valgrind_wrapper.timer_interface.CallgrindStats object at 0x7f9577c22850> torch.empty(()) All Noisy symbols removed Instructions: 596095 582451 Baseline: 4162 3736 100 runs per measurement, 1 thread Warning: PyTorch was not built with debug symbols. Source information may be limited. Rebuild with REL_WITH_DEB_INFO=1 for more detailed results. ``` (another 1.3% savings!) #46598 outperformed this change slightly, and combining the two leads to further benefits. I guess we should do both! (Though I still don't understand why passing optionals that should fit in a register by const reference would help...) Reviewed By: smessmer Differential Revision: D24552280 fbshipit-source-id: 4d93bfcffafebd8c01559398513fa6b9db959d11
1 parent 0edc6a3 commit df5b469

File tree

2 files changed

+177
-42
lines changed

2 files changed

+177
-42
lines changed

c10/util/Optional.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,6 @@
11
#include <c10/util/Optional.h>
2+
3+
#include <type_traits>
4+
5+
static_assert(C10_IS_TRIVIALLY_COPYABLE(c10::optional<int>), "c10::optional<int> should be trivially copyable");
6+
static_assert(C10_IS_TRIVIALLY_COPYABLE(c10::optional<bool>), "c10::optional<bool> should be trivially copyable");

c10/util/Optional.h

Lines changed: 172 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#ifndef C10_UTIL_OPTIONAL_H_
2626
#define C10_UTIL_OPTIONAL_H_
2727

28+
#include <c10/macros/Macros.h>
2829
#include <c10/util/in_place.h>
2930

3031
#include <cassert>
@@ -184,8 +185,22 @@ struct optional_base {
184185

185186
constexpr optional_base() noexcept : init_(false), storage_(trivial_init){};
186187

188+
explicit constexpr optional_base(const optional_base<T>& v) : init_(v.init_), storage_(trivial_init) {
189+
if (init_) {
190+
::new (dataptr()) T(v.storage_.value_);
191+
}
192+
}
193+
187194
explicit constexpr optional_base(const T& v) : init_(true), storage_(v) {}
188195

196+
explicit constexpr optional_base(optional_base<T>&& v) noexcept(
197+
std::is_nothrow_move_constructible<T>::value)
198+
: init_(v.init_), storage_(trivial_init) {
199+
if (init_) {
200+
::new (dataptr()) T(std::move(v.storage_.value_));
201+
}
202+
}
203+
189204
explicit constexpr optional_base(T&& v)
190205
: init_(true), storage_(constexpr_move(v)) {}
191206

@@ -203,10 +218,52 @@ struct optional_base {
203218
Args&&... args)
204219
: init_(true), storage_(il, std::forward<Args>(args)...) {}
205220

221+
optional_base& operator=(const optional_base& rhs) {
222+
if (init_ && !rhs.init_) {
223+
clear();
224+
} else if (!init_ && rhs.init_) {
225+
init_ = true;
226+
::new (dataptr()) T(rhs.storage_.value_);
227+
} else if (init_ && rhs.init_) {
228+
storage_.value_ = rhs.storage_.value_;
229+
}
230+
return *this;
231+
}
232+
233+
optional_base& operator=(optional_base&& rhs) noexcept(
234+
std::is_nothrow_move_assignable<T>::value &&
235+
std::is_nothrow_move_constructible<T>::value) {
236+
if (init_ && !rhs.init_) {
237+
clear();
238+
} else if (!init_ && rhs.init_) {
239+
init_ = true;
240+
::new (dataptr()) T(std::move(rhs.storage_.value_));
241+
} else if (init_ && rhs.init_) {
242+
storage_.value_ = std::move(rhs.storage_.value_);
243+
}
244+
return *this;
245+
}
246+
206247
~optional_base() {
207248
if (init_)
208249
storage_.value_.T::~T();
209250
}
251+
252+
private:
253+
typename std::remove_const<T>::type* dataptr() {
254+
return std::addressof(storage_.value_);
255+
}
256+
257+
constexpr const T* dataptr() const {
258+
return detail_::static_addressof(storage_.value_);
259+
}
260+
261+
void clear() noexcept {
262+
if (init_) {
263+
dataptr()->~T();
264+
}
265+
init_ = false;
266+
}
210267
};
211268

212269
template <class T>
@@ -217,6 +274,20 @@ struct constexpr_optional_base {
217274
constexpr constexpr_optional_base() noexcept
218275
: init_(false), storage_(trivial_init){};
219276

277+
explicit constexpr constexpr_optional_base(const constexpr_optional_base<T>& v) : init_(v.init_), storage_(trivial_init) {
278+
if (init_) {
279+
::new (dataptr()) T(v.storage_.value_);
280+
}
281+
}
282+
283+
explicit constexpr constexpr_optional_base(constexpr_optional_base<T>&& v) noexcept(
284+
std::is_nothrow_move_constructible<T>::value)
285+
: init_(v.init_), storage_(trivial_init) {
286+
if (init_) {
287+
::new (dataptr()) T(std::move(v.storage_.value_));
288+
}
289+
}
290+
220291
explicit constexpr constexpr_optional_base(const T& v)
221292
: init_(true), storage_(v) {}
222293

@@ -238,23 +309,112 @@ struct constexpr_optional_base {
238309
: init_(true), storage_(il, std::forward<Args>(args)...) {}
239310

240311
~constexpr_optional_base() = default;
312+
313+
constexpr_optional_base& operator=(const constexpr_optional_base& rhs) {
314+
if (init_ && !rhs.init_) {
315+
clear();
316+
} else if (!init_ && rhs.init_) {
317+
init_ = true;
318+
::new (dataptr()) T(rhs.storage_.value_);
319+
} else if (init_ && rhs.init_) {
320+
storage_.value_ = rhs.storage_.value_;
321+
}
322+
return *this;
323+
}
324+
325+
constexpr_optional_base& operator=(constexpr_optional_base&& rhs) noexcept(
326+
std::is_nothrow_move_assignable<T>::value &&
327+
std::is_nothrow_move_constructible<T>::value) {
328+
if (init_ && !rhs.init_) {
329+
clear();
330+
} else if (!init_ && rhs.init_) {
331+
init_ = true;
332+
::new (dataptr()) T(std::move(rhs.storage_.value_));
333+
} else if (init_ && rhs.init_) {
334+
storage_.value_ = std::move(rhs.storage_.value_);
335+
}
336+
return *this;
337+
}
338+
339+
private:
340+
typename std::remove_const<T>::type* dataptr() {
341+
return std::addressof(storage_.value_);
342+
}
343+
344+
constexpr const T* dataptr() const {
345+
return detail_::static_addressof(storage_.value_);
346+
}
347+
348+
void clear() noexcept {
349+
init_ = false;
350+
}
351+
};
352+
353+
// HACK: Optimization for trivially copyable types. The mainline
354+
// implementation fails to have trivial copy/move operations in these
355+
// cases, and we care about them, so just implement that directly.
356+
template <class T>
357+
struct trivially_copyable_optimization_optional_base {
358+
bool init_;
359+
constexpr_storage_t<T> storage_;
360+
361+
constexpr trivially_copyable_optimization_optional_base() noexcept
362+
: init_(false), storage_(trivial_init) {}
363+
364+
explicit constexpr trivially_copyable_optimization_optional_base(const T& v)
365+
: init_(true), storage_(v) {}
366+
367+
explicit constexpr trivially_copyable_optimization_optional_base(T&& v)
368+
: init_(true), storage_(constexpr_move(v)) {}
369+
370+
template <class... Args>
371+
explicit constexpr trivially_copyable_optimization_optional_base(in_place_t, Args&&... args)
372+
: init_(true), storage_(constexpr_forward<Args>(args)...) {}
373+
374+
template <
375+
class U,
376+
class... Args,
377+
TR2_OPTIONAL_REQUIRES(std::is_constructible<T, std::initializer_list<U>>)>
378+
constexpr explicit trivially_copyable_optimization_optional_base(
379+
in_place_t,
380+
std::initializer_list<U> il,
381+
Args&&... args)
382+
: init_(true), storage_(il, std::forward<Args>(args)...) {}
383+
384+
~trivially_copyable_optimization_optional_base() = default;
241385
};
242386

243387
template <class T>
244388
using OptionalBase = typename std::conditional<
245-
std::is_trivially_destructible<T>::value, // if possible
246-
constexpr_optional_base<typename std::remove_const<
247-
T>::type>, // use base with trivial destructor
248-
optional_base<typename std::remove_const<T>::type>>::type;
389+
std::is_trivially_destructible<T>::value &&
390+
C10_IS_TRIVIALLY_COPYABLE(T) &&
391+
// Avoid using is_trivially_copy_{constructible,assignable}
392+
// because old GCC versions don't support them. Also,
393+
// is_trivially_copyable seems not to do what I expect, so check
394+
// trivially_copyable_optimization_optional_base directly.
395+
std::is_copy_constructible<trivially_copyable_optimization_optional_base<T>>::value &&
396+
std::is_copy_assignable<trivially_copyable_optimization_optional_base<T>>::value,
397+
trivially_copyable_optimization_optional_base<T>,
398+
typename std::conditional<
399+
std::is_trivially_destructible<T>::value, // if possible
400+
constexpr_optional_base<typename std::remove_const<
401+
T>::type>, // use base with trivial destructor
402+
optional_base<typename std::remove_const<T>::type>>::type>::type;
249403

250404
template <class T>
251405
class optional : private OptionalBase<T> {
252406
template <class U> // re-declaration for nvcc on Windows.
253407
using OptionalBase = typename std::conditional<
254-
std::is_trivially_destructible<U>::value, // if possible
255-
constexpr_optional_base<typename std::remove_const<
256-
U>::type>, // use base with trivial destructor
257-
optional_base<typename std::remove_const<U>::type>>::type;
408+
std::is_trivially_destructible<U>::value &&
409+
C10_IS_TRIVIALLY_COPYABLE(U) &&
410+
std::is_copy_constructible<trivially_copyable_optimization_optional_base<U>>::value &&
411+
std::is_copy_assignable<trivially_copyable_optimization_optional_base<U>>::value,
412+
trivially_copyable_optimization_optional_base<U>,
413+
typename std::conditional<
414+
std::is_trivially_destructible<U>::value, // if possible
415+
constexpr_optional_base<typename std::remove_const<
416+
U>::type>, // use base with trivial destructor
417+
optional_base<typename std::remove_const<U>::type>>::type>::type;
258418

259419
static_assert(
260420
!std::is_same<typename std::decay<T>::type, nullopt_t>::value,
@@ -312,21 +472,9 @@ class optional : private OptionalBase<T> {
312472
constexpr optional() noexcept : OptionalBase<T>(){};
313473
constexpr optional(nullopt_t) noexcept : OptionalBase<T>(){};
314474

315-
optional(const optional& rhs) : OptionalBase<T>() {
316-
if (rhs.initialized()) {
317-
::new (static_cast<void*>(dataptr())) T(*rhs);
318-
OptionalBase<T>::init_ = true;
319-
}
320-
}
475+
optional(const optional& rhs) = default;
321476

322-
optional(optional&& rhs) noexcept(
323-
std::is_nothrow_move_constructible<T>::value)
324-
: OptionalBase<T>() {
325-
if (rhs.initialized()) {
326-
::new (static_cast<void*>(dataptr())) T(std::move(*rhs));
327-
OptionalBase<T>::init_ = true;
328-
}
329-
}
477+
optional(optional&& rhs) = default;
330478

331479
// see https://github.com/akrzemi1/Optional/issues/16
332480
// and https://en.cppreference.com/w/cpp/utility/optional/optional,
@@ -380,27 +528,9 @@ class optional : private OptionalBase<T> {
380528
return *this;
381529
}
382530

383-
optional& operator=(const optional& rhs) {
384-
if (initialized() == true && rhs.initialized() == false)
385-
clear();
386-
else if (initialized() == false && rhs.initialized() == true)
387-
initialize(*rhs);
388-
else if (initialized() == true && rhs.initialized() == true)
389-
contained_val() = *rhs;
390-
return *this;
391-
}
531+
optional& operator=(const optional& rhs) = default;
392532

393-
optional& operator=(optional&& rhs) noexcept(
394-
std::is_nothrow_move_assignable<T>::value&&
395-
std::is_nothrow_move_constructible<T>::value) {
396-
if (initialized() == true && rhs.initialized() == false)
397-
clear();
398-
else if (initialized() == false && rhs.initialized() == true)
399-
initialize(std::move(*rhs));
400-
else if (initialized() == true && rhs.initialized() == true)
401-
contained_val() = std::move(*rhs);
402-
return *this;
403-
}
533+
optional& operator=(optional&& rhs) = default;
404534

405535
template<class U = T>
406536
auto operator=(U&& v) -> typename std::enable_if<

0 commit comments

Comments
 (0)