Add compile-time promote_types template

swolchok · facebook-github-bot · commit 8c8563f626b9 · 2024-04-30T16:51:46.000-07:00
Summary: Now types can be promoted at compile-time. (I had to fix promoteTypes' lack of gating for BFloat16; I believe that would have caused a buffer overflow?)

Reviewed By: kimishpatel, manuelcandales

Differential Revision: D56643045

fbshipit-source-id: cd522e50f59ce838bba06c796e47a1d16ac55b22
diff --git a/runtime/core/exec_aten/util/genScalarTypeTable.py b/runtime/core/exec_aten/util/genScalarTypeTable.py
@@ -0,0 +1,24 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+indexToType = ["U1", "I1", "I2", "I4", "I8", "F2", "F4", "F8", "C2", "C4", "C8", "B1"]
+promoteTypesLookup = [
+    ["U1", "I2", "I2", "I4", "I8", "F2", "F4", "F8", "C2", "C4", "C8", "U1"],
+    ["I2", "I1", "I2", "I4", "I8", "F2", "F4", "F8", "C2", "C4", "C8", "I1"],
+    ["I2", "I2", "I2", "I4", "I8", "F2", "F4", "F8", "C2", "C4", "C8", "I2"],
+    ["I4", "I4", "I4", "I4", "I8", "F2", "F4", "F8", "C2", "C4", "C8", "I4"],
+    ["I8", "I8", "I8", "I8", "I8", "F2", "F4", "F8", "C2", "C4", "C8", "I8"],
+    ["F2", "F2", "F2", "F2", "F2", "F2", "F4", "F8", "C2", "C4", "C8", "F2"],
+    ["F4", "F4", "F4", "F4", "F4", "F4", "F4", "F8", "C4", "C4", "C8", "F4"],
+    ["F8", "F8", "F8", "F8", "F8", "F8", "F8", "F8", "C8", "C8", "C8", "F8"],
+    ["C2", "C2", "C2", "C2", "C2", "C2", "C4", "C8", "C2", "C4", "C8", "C2"],
+    ["C4", "C4", "C4", "C4", "C4", "C4", "C4", "C8", "C4", "C4", "C8", "C4"],
+    ["C8", "C8", "C8", "C8", "C8", "C8", "C8", "C8", "C8", "C8", "C8", "C8"],
+    ["U1", "I1", "I2", "I4", "I8", "F2", "F4", "F8", "C2", "C4", "C8", "B1"],
+]
+for rowIndex, row in enumerate(promoteTypesLookup):
+    for colIndex, col in enumerate(row):
+        print(f"TABLE_ENTRY({indexToType[rowIndex]}, {indexToType[colIndex]}, {col});")
diff --git a/runtime/core/exec_aten/util/scalar_type_util.h b/runtime/core/exec_aten/util/scalar_type_util.h
@@ -387,14 +387,19 @@ inline bool isComplexType(exec_aten::ScalarType t) {
       t == exec_aten::ScalarType::ComplexDouble);
 }
 
-inline bool isBitsType(exec_aten::ScalarType t) {
+constexpr bool isBitsType(exec_aten::ScalarType t) {
   return t == exec_aten::ScalarType::Bits1x8 ||
       t == exec_aten::ScalarType::Bits2x4 ||
       t == exec_aten::ScalarType::Bits4x2 ||
       t == exec_aten::ScalarType::Bits8 || t == exec_aten::ScalarType::Bits16;
 }
 
-inline bool isQIntType(exec_aten::ScalarType t) {
+template <typename T>
+struct is_bits_type
+    : std::integral_constant<bool, isBitsType(CppTypeToScalarType<T>::value)> {
+};
+
+constexpr bool isQIntType(exec_aten::ScalarType t) {
   // Don't forget to extend this when adding new QInt types
   return t == exec_aten::ScalarType::QInt8 ||
       t == exec_aten::ScalarType::QUInt8 ||
@@ -403,6 +408,11 @@ inline bool isQIntType(exec_aten::ScalarType t) {
       t == exec_aten::ScalarType::QUInt2x4;
 }
 
+template <typename T>
+struct is_qint_type
+    : std::integral_constant<bool, isQIntType(CppTypeToScalarType<T>::value)> {
+};
+
 inline exec_aten::ScalarType toQIntType(exec_aten::ScalarType t) {
   switch (t) {
     case exec_aten::ScalarType::Byte:
@@ -550,6 +560,225 @@ To convert(From val) {
   return static_cast<To>(val);
 }
 
+namespace internal {
+template <typename T1, typename T2>
+struct promote_types_lookup;
+
+template <typename T1>
+struct promote_types_lookup<T1, T1> {
+  using type = T1;
+};
+
+using U1 = typename ScalarTypeToCppType<exec_aten::ScalarType::Byte>::type;
+using I1 = typename ScalarTypeToCppType<exec_aten::ScalarType::Char>::type;
+using I2 = typename ScalarTypeToCppType<exec_aten::ScalarType::Short>::type;
+using I4 = typename ScalarTypeToCppType<exec_aten::ScalarType::Int>::type;
+using I8 = typename ScalarTypeToCppType<exec_aten::ScalarType::Long>::type;
+using F2 = typename ScalarTypeToCppType<exec_aten::ScalarType::Half>::type;
+using F4 = typename ScalarTypeToCppType<exec_aten::ScalarType::Float>::type;
+using F8 = typename ScalarTypeToCppType<exec_aten::ScalarType::Double>::type;
+using C2 =
+    typename ScalarTypeToCppType<exec_aten::ScalarType::ComplexHalf>::type;
+using C4 =
+    typename ScalarTypeToCppType<exec_aten::ScalarType::ComplexFloat>::type;
+using C8 =
+    typename ScalarTypeToCppType<exec_aten::ScalarType::ComplexDouble>::type;
+using B1 = typename ScalarTypeToCppType<exec_aten::ScalarType::Bool>::type;
+
+#define TABLE_ENTRY(key1, key2, value)      \
+  template <>                               \
+  struct promote_types_lookup<key1, key2> { \
+    using type = value;                     \
+  }
+
+/* promote_types_lookup is a compile-time-accessible version of the
+ * table in promoteTypes below; we cannot make promoteTypes constexpr
+ * and use it directly because we are on C++11 and thus don't have
+ * C++17 relaxed constexpr. The below series of entries is generated
+ * by genScalarTypeTable.py. */
+TABLE_ENTRY(U1, U1, U1);
+TABLE_ENTRY(U1, I1, I2);
+TABLE_ENTRY(U1, I2, I2);
+TABLE_ENTRY(U1, I4, I4);
+TABLE_ENTRY(U1, I8, I8);
+TABLE_ENTRY(U1, F2, F2);
+TABLE_ENTRY(U1, F4, F4);
+TABLE_ENTRY(U1, F8, F8);
+TABLE_ENTRY(U1, C2, C2);
+TABLE_ENTRY(U1, C4, C4);
+TABLE_ENTRY(U1, C8, C8);
+TABLE_ENTRY(U1, B1, U1);
+TABLE_ENTRY(I1, U1, I2);
+TABLE_ENTRY(I1, I1, I1);
+TABLE_ENTRY(I1, I2, I2);
+TABLE_ENTRY(I1, I4, I4);
+TABLE_ENTRY(I1, I8, I8);
+TABLE_ENTRY(I1, F2, F2);
+TABLE_ENTRY(I1, F4, F4);
+TABLE_ENTRY(I1, F8, F8);
+TABLE_ENTRY(I1, C2, C2);
+TABLE_ENTRY(I1, C4, C4);
+TABLE_ENTRY(I1, C8, C8);
+TABLE_ENTRY(I1, B1, I1);
+TABLE_ENTRY(I2, U1, I2);
+TABLE_ENTRY(I2, I1, I2);
+TABLE_ENTRY(I2, I2, I2);
+TABLE_ENTRY(I2, I4, I4);
+TABLE_ENTRY(I2, I8, I8);
+TABLE_ENTRY(I2, F2, F2);
+TABLE_ENTRY(I2, F4, F4);
+TABLE_ENTRY(I2, F8, F8);
+TABLE_ENTRY(I2, C2, C2);
+TABLE_ENTRY(I2, C4, C4);
+TABLE_ENTRY(I2, C8, C8);
+TABLE_ENTRY(I2, B1, I2);
+TABLE_ENTRY(I4, U1, I4);
+TABLE_ENTRY(I4, I1, I4);
+TABLE_ENTRY(I4, I2, I4);
+TABLE_ENTRY(I4, I4, I4);
+TABLE_ENTRY(I4, I8, I8);
+TABLE_ENTRY(I4, F2, F2);
+TABLE_ENTRY(I4, F4, F4);
+TABLE_ENTRY(I4, F8, F8);
+TABLE_ENTRY(I4, C2, C2);
+TABLE_ENTRY(I4, C4, C4);
+TABLE_ENTRY(I4, C8, C8);
+TABLE_ENTRY(I4, B1, I4);
+TABLE_ENTRY(I8, U1, I8);
+TABLE_ENTRY(I8, I1, I8);
+TABLE_ENTRY(I8, I2, I8);
+TABLE_ENTRY(I8, I4, I8);
+TABLE_ENTRY(I8, I8, I8);
+TABLE_ENTRY(I8, F2, F2);
+TABLE_ENTRY(I8, F4, F4);
+TABLE_ENTRY(I8, F8, F8);
+TABLE_ENTRY(I8, C2, C2);
+TABLE_ENTRY(I8, C4, C4);
+TABLE_ENTRY(I8, C8, C8);
+TABLE_ENTRY(I8, B1, I8);
+TABLE_ENTRY(F2, U1, F2);
+TABLE_ENTRY(F2, I1, F2);
+TABLE_ENTRY(F2, I2, F2);
+TABLE_ENTRY(F2, I4, F2);
+TABLE_ENTRY(F2, I8, F2);
+TABLE_ENTRY(F2, F2, F2);
+TABLE_ENTRY(F2, F4, F4);
+TABLE_ENTRY(F2, F8, F8);
+TABLE_ENTRY(F2, C2, C2);
+TABLE_ENTRY(F2, C4, C4);
+TABLE_ENTRY(F2, C8, C8);
+TABLE_ENTRY(F2, B1, F2);
+TABLE_ENTRY(F4, U1, F4);
+TABLE_ENTRY(F4, I1, F4);
+TABLE_ENTRY(F4, I2, F4);
+TABLE_ENTRY(F4, I4, F4);
+TABLE_ENTRY(F4, I8, F4);
+TABLE_ENTRY(F4, F2, F4);
+TABLE_ENTRY(F4, F4, F4);
+TABLE_ENTRY(F4, F8, F8);
+TABLE_ENTRY(F4, C2, C4);
+TABLE_ENTRY(F4, C4, C4);
+TABLE_ENTRY(F4, C8, C8);
+TABLE_ENTRY(F4, B1, F4);
+TABLE_ENTRY(F8, U1, F8);
+TABLE_ENTRY(F8, I1, F8);
+TABLE_ENTRY(F8, I2, F8);
+TABLE_ENTRY(F8, I4, F8);
+TABLE_ENTRY(F8, I8, F8);
+TABLE_ENTRY(F8, F2, F8);
+TABLE_ENTRY(F8, F4, F8);
+TABLE_ENTRY(F8, F8, F8);
+TABLE_ENTRY(F8, C2, C8);
+TABLE_ENTRY(F8, C4, C8);
+TABLE_ENTRY(F8, C8, C8);
+TABLE_ENTRY(F8, B1, F8);
+TABLE_ENTRY(C2, U1, C2);
+TABLE_ENTRY(C2, I1, C2);
+TABLE_ENTRY(C2, I2, C2);
+TABLE_ENTRY(C2, I4, C2);
+TABLE_ENTRY(C2, I8, C2);
+TABLE_ENTRY(C2, F2, C2);
+TABLE_ENTRY(C2, F4, C4);
+TABLE_ENTRY(C2, F8, C8);
+TABLE_ENTRY(C2, C2, C2);
+TABLE_ENTRY(C2, C4, C4);
+TABLE_ENTRY(C2, C8, C8);
+TABLE_ENTRY(C2, B1, C2);
+TABLE_ENTRY(C4, U1, C4);
+TABLE_ENTRY(C4, I1, C4);
+TABLE_ENTRY(C4, I2, C4);
+TABLE_ENTRY(C4, I4, C4);
+TABLE_ENTRY(C4, I8, C4);
+TABLE_ENTRY(C4, F2, C4);
+TABLE_ENTRY(C4, F4, C4);
+TABLE_ENTRY(C4, F8, C8);
+TABLE_ENTRY(C4, C2, C4);
+TABLE_ENTRY(C4, C4, C4);
+TABLE_ENTRY(C4, C8, C8);
+TABLE_ENTRY(C4, B1, C4);
+TABLE_ENTRY(C8, U1, C8);
+TABLE_ENTRY(C8, I1, C8);
+TABLE_ENTRY(C8, I2, C8);
+TABLE_ENTRY(C8, I4, C8);
+TABLE_ENTRY(C8, I8, C8);
+TABLE_ENTRY(C8, F2, C8);
+TABLE_ENTRY(C8, F4, C8);
+TABLE_ENTRY(C8, F8, C8);
+TABLE_ENTRY(C8, C2, C8);
+TABLE_ENTRY(C8, C4, C8);
+TABLE_ENTRY(C8, C8, C8);
+TABLE_ENTRY(C8, B1, C8);
+TABLE_ENTRY(B1, U1, U1);
+TABLE_ENTRY(B1, I1, I1);
+TABLE_ENTRY(B1, I2, I2);
+TABLE_ENTRY(B1, I4, I4);
+TABLE_ENTRY(B1, I8, I8);
+TABLE_ENTRY(B1, F2, F2);
+TABLE_ENTRY(B1, F4, F4);
+TABLE_ENTRY(B1, F8, F8);
+TABLE_ENTRY(B1, C2, C2);
+TABLE_ENTRY(B1, C4, C4);
+TABLE_ENTRY(B1, C8, C8);
+TABLE_ENTRY(B1, B1, B1);
+
+} // namespace internal
+
+template <typename T1, typename T2, bool half_to_float = false>
+struct promote_types {
+ private:
+  static_assert(
+      std::is_same<T1, T2>::value ||
+          (!is_qint_type<T1>::value && !is_qint_type<T2>::value),
+      "promote_types not valid for quantized dtypes");
+  static_assert(
+      std::is_same<T1, T2>::value ||
+          (!is_bits_type<T1>::value && !is_bits_type<T2>::value),
+      "promote_types not valid for bits dtypes");
+
+  static_assert(
+      !std::is_same<
+          T1,
+          typename ScalarTypeToCppType<exec_aten::ScalarType::BFloat16>::type>::
+              value &&
+          !std::is_same<
+              T2,
+              typename ScalarTypeToCppType<
+                  exec_aten::ScalarType::BFloat16>::type>::value,
+      "promote_types not valid for BFloat16");
+  using promoted_type_not_respecting_half_to_float =
+      typename internal::promote_types_lookup<T1, T2>::type;
+
+ public:
+  using type = typename std::conditional<
+      half_to_float &&
+          std::is_same<
+              promoted_type_not_respecting_half_to_float,
+              typename ScalarTypeToCppType<exec_aten::ScalarType::Half>::type>::
+              value,
+      typename ScalarTypeToCppType<exec_aten::ScalarType::Float>::type,
+      promoted_type_not_respecting_half_to_float>::type;
+};
+
 /**
  * Implements type promotion rules that are consistent with ATen behaviour,
  * which in turn is consistent with NumPy's promote_types.
@@ -589,6 +818,10 @@ inline exec_aten::ScalarType promoteTypes(
     ET_CHECK_MSG(false, "promoteTypes not valid for bits dtypes");
   }
 
+  ET_CHECK_MSG(
+      a != exec_aten::ScalarType::BFloat16 &&
+          b != exec_aten::ScalarType::BFloat16,
+      "promoteTypes not valid for BFloat16");
   // 12 types are handled by this function, see the constexpr definitions above
   const int NUM_PROMOTE_TYPES = 12;
 
diff --git a/runtime/core/exec_aten/util/test/scalar_type_util_test.cpp b/runtime/core/exec_aten/util/test/scalar_type_util_test.cpp
@@ -162,3 +162,56 @@ TEST(ScalarTypeUtilTest, promoteTypesTest) {
       promoteTypes(ScalarType::Char, ScalarType::Bool) == ScalarType::Char);
   ET_CHECK(promoteTypes(ScalarType::Bool, ScalarType::Int) == ScalarType::Int);
 }
+
+template <typename T1, typename T2>
+struct promote_types_is_valid
+    : std::integral_constant<
+          bool,
+          !std::is_same<T1, torch::executor::BFloat16>::value &&
+              !std::is_same<T2, torch::executor::BFloat16>::value &&
+              (std::is_same<T1, T2>::value ||
+               (!torch::executor::is_qint_type<T1>::value &&
+                !torch::executor::is_qint_type<T2>::value &&
+                !torch::executor::is_bits_type<T1>::value &&
+                !torch::executor::is_bits_type<T2>::value))> {};
+
+template <typename T1, bool half_to_float>
+struct CompileTimePromoteTypesTestCase {
+  static void testAll() {
+#define CALL_TEST_ONE(cpp_type, scalar_type) \
+  testOne<cpp_type, promote_types_is_valid<T1, cpp_type>::value>();
+    ET_FORALL_SCALAR_TYPES(CALL_TEST_ONE)
+#undef CALL_TEST_ONE
+  }
+
+  template <
+      typename T2,
+      bool valid,
+      typename std::enable_if<valid, bool>::type = true>
+  static void testOne() {
+    auto actual = torch::executor::CppTypeToScalarType<
+        typename torch::executor::promote_types<T1, T2, half_to_float>::type>::
+        value;
+    const auto scalarType1 = torch::executor::CppTypeToScalarType<T1>::value;
+    const auto scalarType2 = torch::executor::CppTypeToScalarType<T2>::value;
+    auto expected = promoteTypes(scalarType1, scalarType2, half_to_float);
+    EXPECT_EQ(actual, expected)
+        << "promoting " << (int)scalarType1 << " to " << (int)scalarType2;
+  }
+
+  template <
+      typename T2,
+      bool valid,
+      typename std::enable_if<!valid, bool>::type = true>
+  static void testOne() {
+    // Skip invalid case
+  }
+};
+
+TEST(ScalarTypeUtilTest, compileTypePromoteTypesTest) {
+#define INSTANTIATE_TYPE_TEST(cpp_type, scalar_type)           \
+  CompileTimePromoteTypesTestCase<cpp_type, false>::testAll(); \
+  CompileTimePromoteTypesTestCase<cpp_type, true>::testAll();
+
+  ET_FORALL_SCALAR_TYPES(INSTANTIATE_TYPE_TEST);
+}