20#define CATCH_CONFIG_ENABLE_BENCHMARKING
31 virtual auto operator()()
const noexcept ->
unsigned = 0;
33 Base(
const Base&) =
default;
34 Base(Base&&)
noexcept =
default;
35 virtual ~Base() =
default;
39 [[maybe_unused]]
constexpr static inline std::size_t max_derived_size
43struct thrower final : Base {
44 auto operator()()
const noexcept ->
unsigned override {
return member; }
48 thrower(
unsigned i = 0)
50 thrower(
const thrower&) =
default;
52 auto operator=(
const thrower& other) & -> thrower& {
53 check_throw(other.member);
54 member = other.member;
59 static auto check_throw(
unsigned v) ->
void {
68 auto (*p)(
const fptr*)
noexcept ->
unsigned;
69 auto operator()()
const noexcept ->
unsigned {
return p(
this); }
72# define MAKE_DERIVED2(Number, Expr) \
73 auto fptr_d##Number(const fptr* p) noexcept->unsigned { \
74 const auto x = p->member; \
77 struct Derived##Number final : Base { \
78 auto operator()() const noexcept -> unsigned override { \
82 Derived##Number(unsigned i) \
85# define MAKE_DERIVED(Number, Expr) MAKE_DERIVED2(Number, Expr)
144MAKE_DERIVED(__COUNTER__, (x << 16) + (big_number & 0x0000FFFF));
150MAKE_DERIVED(__COUNTER__, (x * 14 + (big_number >> 16)));
151MAKE_DERIVED(__COUNTER__, (x * 14 - (big_number >> 16)));
152MAKE_DERIVED(__COUNTER__, (x * 14 ^ (big_number >> 16)));
154using variant_type = std::variant<
155 Derived0, Derived1, Derived2, Derived3, Derived4, Derived5, Derived6,
156 Derived7, Derived8, Derived9, Derived10, Derived11, Derived12, Derived13,
157 Derived14, Derived15, Derived16, Derived17, Derived18, Derived19, Derived20,
158 Derived21, Derived22, Derived23, Derived24, Derived25, Derived26, Derived27,
159 Derived28, Derived29, Derived30, Derived31, Derived32, Derived33, Derived34,
160 Derived35, Derived36, Derived37, Derived38, Derived39, Derived40, Derived41,
161 Derived42, Derived43, Derived44, Derived45, Derived46, Derived47, Derived48,
162 Derived49, Derived50, Derived51, Derived52, Derived53, Derived54, Derived55,
163 Derived56, Derived57, Derived58, Derived59, Derived60, Derived61, Derived62,
165using variant_type_throws = std::variant<
166 Derived0, Derived1, Derived2, Derived3, Derived4, Derived5, Derived6,
167 Derived7, Derived8, Derived9, Derived10, Derived11, Derived12, Derived13,
168 Derived14, Derived15, Derived16, Derived17, Derived18, Derived19, Derived20,
169 Derived21, Derived22, Derived23, Derived24, Derived25, Derived26, Derived27,
170 Derived28, Derived29, Derived30, Derived31, Derived32, Derived33, Derived34,
171 Derived35, Derived36, Derived37, Derived38, Derived39, Derived40, Derived41,
172 Derived42, Derived43, Derived44, Derived45, Derived46, Derived47, Derived48,
173 Derived49, Derived50, Derived51, Derived52, Derived53, Derived54, Derived55,
174 Derived56, Derived57, Derived58, Derived59, Derived60, Derived61, Derived62,
176constexpr std::array<
decltype(&fptr_d0), 64> fptrs{
177 fptr_d0, fptr_d1, fptr_d2, fptr_d3, fptr_d4, fptr_d5, fptr_d6,
178 fptr_d7, fptr_d8, fptr_d9, fptr_d10, fptr_d11, fptr_d12, fptr_d13,
179 fptr_d14, fptr_d15, fptr_d16, fptr_d17, fptr_d18, fptr_d19, fptr_d20,
180 fptr_d21, fptr_d22, fptr_d23, fptr_d24, fptr_d25, fptr_d26, fptr_d27,
181 fptr_d28, fptr_d29, fptr_d30, fptr_d31, fptr_d32, fptr_d33, fptr_d34,
182 fptr_d35, fptr_d36, fptr_d37, fptr_d38, fptr_d39, fptr_d40, fptr_d41,
183 fptr_d42, fptr_d43, fptr_d44, fptr_d45, fptr_d46, fptr_d47, fptr_d48,
184 fptr_d49, fptr_d50, fptr_d51, fptr_d52, fptr_d53, fptr_d54, fptr_d55,
185 fptr_d56, fptr_d57, fptr_d58, fptr_d59, fptr_d60, fptr_d61, fptr_d62,
189template <
unsigned N = 4u>
190auto make_fptr(
unsigned v)
noexcept -> fptr {
193 return {v, &fptr_d1};
195 return {v, &fptr_d2};
197 return {v, &fptr_d3};
199 return {v, &fptr_d4};
201 __builtin_unreachable();
203 return {v, fptrs[v % N]};
206template <std::size_t Num, std::size_t... Is,
typename T,
typename F>
207auto do_push_elem(std::index_sequence<Is...>, T& d,
unsigned v, F f) {
208 const auto i = v % Num;
215template <std::
size_t Num,
typename T,
typename F>
216auto push_elem(T& d,
unsigned v, F f) {
217 return do_push_elem<Num>(std::make_index_sequence<Num>{}, d, v, f);
220template <
typename... Ts>
221auto baseline_generic(std::vector<Ts>... args) {
224 [](
unsigned accum,
const Ts& x) { return accum + x(); })
231 const auto start = std::chrono::steady_clock::now();
233 constexpr unsigned count = 1000;
235 constexpr unsigned count = 100;
240 std::cout <<
"Sanitizers active\n";
243 std::vector<std::pair<unsigned, std::string_view>> reproducibility_test;
245 Base,
sizeof(Derived1),
248 auto push_checksum = [&](
unsigned s, std::string_view name) {
249 auto begin = reproducibility_test.begin();
250 auto end = reproducibility_test.end();
253 [&](
const auto& p) {
return p.second == name; })
255 reproducibility_test.emplace_back(s, name);
326 std::vector<Derived0> d1;
327 std::vector<Derived1> d2;
328 std::vector<Derived2> d3;
329 std::vector<Derived3> d4;
332 auto v =
static_cast<unsigned>(h(i));
348 BENCHMARK_ADVANCED(
"baseline")(Catch::Benchmark::Chronometer meter) {
351 for (
const auto& x : d1) {
354 for (
const auto& x : d2) {
357 for (
const auto& x : d3) {
360 for (
const auto& x : d4) {
365 push_checksum(accum,
"baseline");
367 BENCHMARK_ADVANCED(
"baseline_generic")
368 (Catch::Benchmark::Chronometer meter) {
370 meter.measure([&] { accum = baseline_generic(d1, d2, d3, d4); });
371 push_checksum(accum,
"baseline_generic");
374 BENCHMARK_ADVANCED(
"raw pointer")(Catch::Benchmark::Chronometer meter) {
375 std::vector<Base*> d;
378 auto v =
static_cast<unsigned>(h(i));
381 d.push_back(
new Derived0(v));
384 d.push_back(
new Derived1(v));
387 d.push_back(
new Derived2(v));
390 d.push_back(
new Derived3(v));
401 push_checksum(accum,
"raw pointer");
406 BENCHMARK_ADVANCED(
"unique_ptr")
407 (Catch::Benchmark::Chronometer meter) {
408 std::vector<std::unique_ptr<Base>> d;
414 d.push_back(std::make_unique<Derived0>(v));
417 d.push_back(std::make_unique<Derived1>(v));
420 d.push_back(std::make_unique<Derived2>(v));
423 d.push_back(std::make_unique<Derived3>(v));
429 for (
const auto& x : d) {
434 push_checksum(accum,
"unique pointer");
436 BENCHMARK_ADVANCED(
"poly_obj")(Catch::Benchmark::Chronometer meter) {
437 std::vector<poly_t> d;
443 d.push_back(poly_t::make<Derived0>(v));
446 d.push_back(poly_t::make<Derived1>(v));
449 d.push_back(poly_t::make<Derived2>(v));
452 d.push_back(poly_t::make<Derived3>(v));
458 for (
const auto& x : d) {
463 push_checksum(accum,
"poly_obj");
465 BENCHMARK_ADVANCED(
"function pointer")(Catch::Benchmark::Chronometer meter) {
469 auto v =
static_cast<unsigned>(h(i));
470 d.push_back(make_fptr(v));
480 push_checksum(accum,
"raw pointer");
482 BENCHMARK_ADVANCED(
"function pointer (wrapped)")
483 (Catch::Benchmark::Chronometer meter) {
487 auto v =
static_cast<unsigned>(h(i));
488 d.push_back(make_fptr(v));
498 push_checksum(accum,
"raw pointer");
500 BENCHMARK_ADVANCED(
"std::function")(Catch::Benchmark::Chronometer meter) {
501 std::vector<std::function<unsigned()>> d;
504 auto v =
static_cast<unsigned>(h(i));
507 d.emplace_back(Derived0(v));
510 d.emplace_back(Derived1(v));
513 d.emplace_back(Derived2(v));
516 d.emplace_back(Derived3(v));
522 for (
const auto& x : d) {
527 push_checksum(accum,
"std::function");
530 std::vector<std::variant<Derived0, Derived1, Derived2, Derived3>> d;
533 auto v =
static_cast<unsigned>(h(i));
536 d.emplace_back(Derived0(v));
539 d.emplace_back(Derived1(v));
542 d.emplace_back(Derived2(v));
545 d.emplace_back(Derived3(v));
549 BENCHMARK_ADVANCED(
"std::visit(v, f)")
550 (Catch::Benchmark::Chronometer meter) {
553 for (
const auto& x : d) {
554 accum +=
std::visit([](
const auto& v) {
return v(); }, x);
558 push_checksum(accum,
"std::visit(v, f)");
560 BENCHMARK_ADVANCED(
"kblib::visit(v, f...)")
561 (Catch::Benchmark::Chronometer meter) {
564 for (
const auto& x : d) {
565 accum +=
kblib::visit(x, [](
const auto& v) {
return v(); });
569 push_checksum(accum,
"kblib::visit(v, f...)");
571 BENCHMARK_ADVANCED(
"kblib::visit(v)(f...)")
572 (Catch::Benchmark::Chronometer meter) {
575 for (
const auto& x : d) {
576 accum +=
kblib::visit(x)([](
const auto& v) {
return v(); });
580 push_checksum(accum,
"kblib::visit(v)(f...)");
582 BENCHMARK_ADVANCED(
"visit_indexed(v, f...)")
583 (Catch::Benchmark::Chronometer meter) {
586 for (
const auto& x : d) {
588 x, [](
auto,
const auto& v) {
return v(); });
592 push_checksum(accum,
"visit_indexed(v, f...)");
594 BENCHMARK_ADVANCED(
"kblib::visit2(v, f...)")
595 (Catch::Benchmark::Chronometer meter) {
598 for (
const auto& x : d) {
599 accum +=
kblib::visit2(x, [](
const auto& v) {
return v(); });
603 push_checksum(accum,
"kblib::visit2(v, f...)");
605 BENCHMARK_ADVANCED(
"kblib::visit2_nop(v, f...)")
606 (Catch::Benchmark::Chronometer meter) {
609 for (
const auto& x : d) {
614 push_checksum(accum,
"kblib::visit2_nop(v, f...)");
616 BENCHMARK_ADVANCED(
"std::get_if")
617 (Catch::Benchmark::Chronometer meter) {
620 for (
const auto& x : d) {
621 if (
auto*
const p = std::get_if<0>(&x)) {
623 }
else if (
auto*
const p = std::get_if<1>(&x)) {
625 }
else if (
auto*
const p = std::get_if<2>(&x)) {
627 }
else if (
auto*
const p = std::get_if<3>(&x)) {
633 push_checksum(accum,
"std::get_if");
635 BENCHMARK_ADVANCED(
"switch (v.index())")
636 (Catch::Benchmark::Chronometer meter) {
639 for (
const auto& x : d) {
642 accum += std::get<0>(x)();
645 accum += std::get<1>(x)();
648 accum += std::get<2>(x)();
651 accum += std::get<3>(x)();
658 push_checksum(accum,
"switch (v.index())");
664 BENCHMARK_ADVANCED(
"raw pointer, ch")
665 (Catch::Benchmark::Chronometer meter) {
666 std::vector<Base*> d;
669 auto v =
static_cast<unsigned>(h(i));
672 d.emplace_back(
new Derived0(v));
675 d.emplace_back(
new Derived1(v));
678 d.emplace_back(
new Derived2(v));
681 d.emplace_back(
new Derived3(v));
685 d.push_back(
new thrower(v));
687 d.push_back(
nullptr);
705 BENCHMARK_ADVANCED(
"unique_ptr, ch")
706 (Catch::Benchmark::Chronometer meter) {
707 std::vector<std::unique_ptr<Base>> d;
713 d.push_back(std::make_unique<Derived0>(v));
716 d.push_back(std::make_unique<Derived1>(v));
719 d.push_back(std::make_unique<Derived2>(v));
722 d.push_back(std::make_unique<Derived3>(v));
726 d.push_back(std::make_unique<thrower>(v));
728 d.push_back(
nullptr);
735 for (
const auto& x : d) {
743 BENCHMARK_ADVANCED(
"poly_obj, ch")(Catch::Benchmark::Chronometer meter) {
744 std::vector<poly_t> d;
750 d.push_back(poly_t::make<Derived0>(v));
753 d.push_back(poly_t::make<Derived1>(v));
756 d.push_back(poly_t::make<Derived2>(v));
759 d.push_back(poly_t::make<Derived3>(v));
763 d.push_back(poly_t::make<thrower>(v));
765 d.emplace_back(
nullptr);
772 for (
const auto& x : d) {
780 std::vector<std::function<unsigned()>> df;
781 std::vector<std::variant<Derived0, Derived1, Derived2, Derived3, thrower>>
785 auto v =
static_cast<unsigned>(h(i));
788 df.emplace_back(Derived0(v));
789 d.emplace_back(Derived0(v));
792 df.emplace_back(Derived1(v));
793 d.emplace_back(Derived1(v));
796 df.emplace_back(Derived2(v));
797 d.emplace_back(Derived2(v));
800 df.emplace_back(Derived3(v));
801 d.emplace_back(Derived3(v));
806 auto& b = df.emplace_back(thrower());
813 auto& b = d.emplace_back(std::in_place_type_t<thrower>{}, 0);
819 CHECK(d.size() == df.size());
820 CHECK(df.size() == count);
821 CHECK(d.size() == count);
822 BENCHMARK_ADVANCED(
"std::function, ch")
823 (Catch::Benchmark::Chronometer meter) {
826 for (
const auto& x : df) {
834 BENCHMARK_ADVANCED(
"kblib::visit2_nop(v, f...), ch")
835 (Catch::Benchmark::Chronometer meter) {
838 for (
const auto& x : d) {
844 BENCHMARK_ADVANCED(
"std::get_if, ch")
845 (Catch::Benchmark::Chronometer meter) {
848 for (
const auto& x : d) {
849 if (
auto*
const p = std::get_if<0>(&x)) {
851 }
else if (
auto*
const p = std::get_if<1>(&x)) {
853 }
else if (
auto*
const p = std::get_if<2>(&x)) {
855 }
else if (
auto*
const p = std::get_if<3>(&x)) {
857 }
else if (
auto*
const p = std::get_if<4>(&x)) {
864 BENCHMARK_ADVANCED(
"switch (v.index()), ch")
865 (Catch::Benchmark::Chronometer meter) {
868 for (
const auto& x : d) {
871 accum += std::get<0>(x)();
874 accum += std::get<1>(x)();
877 accum += std::get<2>(x)();
880 accum += std::get<3>(x)();
883 accum += std::get<4>(x)();
891 BENCHMARK_ADVANCED(
"std::function, ex")
892 (Catch::Benchmark::Chronometer meter) {
895 for (
const auto& x : df) {
898 }
catch (
const std::bad_function_call&) {
904 BENCHMARK_ADVANCED(
"std::visit(v, f), ex")
905 (Catch::Benchmark::Chronometer meter) {
908 for (
const auto& x : d) {
910 accum +=
std::visit([](
const auto& v) {
return v(); }, x);
911 }
catch (
const std::bad_variant_access&) {
917 BENCHMARK_ADVANCED(
"kblib::visit(v, f...), ex")
918 (Catch::Benchmark::Chronometer meter) {
921 for (
const auto& x : d) {
923 accum +=
kblib::visit(x, [](
const auto& v) {
return v(); });
924 }
catch (
const std::bad_variant_access&) {
930 BENCHMARK_ADVANCED(
"kblib::visit(v)(f...), ex")
931 (Catch::Benchmark::Chronometer meter) {
934 for (
const auto& x : d) {
936 accum +=
kblib::visit(x)([](
const auto& v) {
return v(); });
937 }
catch (
const std::bad_variant_access&) {
943 BENCHMARK_ADVANCED(
"visit_indexed(v, f...), ex")
944 (Catch::Benchmark::Chronometer meter) {
947 for (
const auto& x : d) {
950 x, [](
auto,
const auto& v) {
return v(); });
951 }
catch (
const std::bad_variant_access&) {
957 BENCHMARK_ADVANCED(
"kblib::visit2(v, f...), ex")
958 (Catch::Benchmark::Chronometer meter) {
961 for (
const auto& x : d) {
963 accum +=
kblib::visit2(x, [](
const auto& v) {
return v(); });
964 }
catch (
const std::bad_variant_access&) {
971 auto end = std::chrono::steady_clock::now();
972 std::chrono::duration<float, std::ratio<1, 1>> time = end - start;
973 std::cout <<
"\n\nProfiling took " << time.count() <<
" seconds\n";
976 unsigned expected_value = reproducibility_test[0].first;
978 std::cout <<
"Checksum: " << expected_value;
980 for (
auto i :
kblib::range(std::size_t(1), reproducibility_test.size())) {
981 const auto& run = reproducibility_test[i];
982 if (run.first != expected_value) {
983 WARN(i <<
": " << run.second <<
": " << run.first
984 <<
" != " << expected_value);
986 REQUIRE(run.first == expected_value);
Inline polymorphic object. Generally mimics the interfaces of std::optional and std::variant.
constexpr auto find_if(ForwardIt begin, EndIt end, UnaryPredicate &&pred) noexcept(noexcept(kblib::invoke(pred, *begin))) -> ForwardIt
Finds the first value in range [begin, end) for which pred returns true. If not found,...
constexpr auto range(Value min, Value max, Delta step=0) -> range_t< Value, Delta >
Constructs a range from beginning, end, and step amount. The range is half-open, that is min is in th...
constexpr auto visit_indexed(Variant &&variant, Fs &&... fs) -> decltype(auto)
Visit a variant, but pass the index (as an integral_constant) to the visitor. This allows for a visit...
constexpr auto visit2_nop(V &&v, F &&f, Fs &&... fs) -> void
constexpr auto accumulate(InputIt first, InputIt last, T init) -> T
A constexpr version of std::accumulate.
constexpr auto visit2(V &&v, F &&f, Fs &&... fs) -> decltype(auto)
constexpr auto visit(V &&v, F &&f, Fs &&... fs) -> decltype(auto)
Wraps std::visit to provide an interface taking one variant and any number of functors providing an o...
Provides poly_obj, which enables polymorphism to be used without unnecessary per-object dynamic alloc...
The primary template has to exist, but not be constructible, in order to be compatible with std::hash...
The prime to use for the FNVa hash algorithm, as a type trait.
poly_obj_traits is a traits class template which abstracts the allowed operations on a polymorphic ty...
TEST_CASE("poly_obj performance(4_old)")
#define MAKE_DERIVED(Number, Expr)