kblib 0.2.3
General utilities library for modern C++
visitation_benchmarks.cpp
Go to the documentation of this file.
1/* *****************************************************************************
2 * %{QMAKE_PROJECT_NAME}
3 * Copyright (c) %YEAR% killerbee
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <https://www.gnu.org/licenses/>.
17 * ****************************************************************************/
18#include "kblib/poly_obj.h"
19
20#define CATCH_CONFIG_ENABLE_BENCHMARKING
21#include "catch.hpp"
22
23//#define FAST_TEST
24
25#if KBLIB_USE_CXX17
26
27# ifndef FAST_TEST
28
29namespace {
30struct Base {
31 virtual auto operator()() const noexcept -> unsigned = 0;
32 Base() = default;
33 Base(const Base&) = default;
34 Base(Base&&) noexcept = default;
35 virtual ~Base() = default;
36
37 // this is used, but the compiler doesn't think so because it's in a .cpp
38 // file
39 [[maybe_unused]] constexpr static inline std::size_t max_derived_size
40 = sizeof(unsigned);
41};
42
43struct thrower final : Base {
44 auto operator()() const noexcept -> unsigned override { return member; }
45
46 unsigned member;
47
48 thrower(unsigned i = 0)
49 : member(i) {}
50 thrower(const thrower&) = default;
51
52 auto operator=(const thrower& other) & -> thrower& {
53 check_throw(other.member);
54 member = other.member;
55 return *this;
56 }
57
58 private:
59 static auto check_throw(unsigned v) -> void {
60 if ((v & 7u) == 0) {
61 throw 0;
62 }
63 }
64};
65
66struct fptr {
67 unsigned member;
68 auto (*p)(const fptr*) noexcept -> unsigned;
69 auto operator()() const noexcept -> unsigned { return p(this); }
70};
71
72# define MAKE_DERIVED2(Number, Expr) \
73 auto fptr_d##Number(const fptr* p) noexcept->unsigned { \
74 const auto x = p->member; \
75 return Expr; \
76 } \
77 struct Derived##Number final : Base { \
78 auto operator()() const noexcept -> unsigned override { \
79 return Expr; \
80 } \
81 unsigned x; \
82 Derived##Number(unsigned i) \
83 : x(i) {} \
84 }
85# define MAKE_DERIVED(Number, Expr) MAKE_DERIVED2(Number, Expr)
86
87constexpr auto big_number = kblib::fnv::fnv_prime<unsigned>::value;
88
89MAKE_DERIVED(__COUNTER__, x);
90MAKE_DERIVED(__COUNTER__, x * 2);
91MAKE_DERIVED(__COUNTER__, x / 2);
92MAKE_DERIVED(__COUNTER__, ~x);
93MAKE_DERIVED(__COUNTER__, x + 42);
94MAKE_DERIVED(__COUNTER__, x * 2 + 42);
95MAKE_DERIVED(__COUNTER__, -x);
96MAKE_DERIVED(__COUNTER__, x - 42);
97MAKE_DERIVED(__COUNTER__, 42 - x);
98MAKE_DERIVED(__COUNTER__, (x * x));
99MAKE_DERIVED(__COUNTER__, (x * x * 2));
100MAKE_DERIVED(__COUNTER__, -x - 42);
101MAKE_DERIVED(__COUNTER__, x & 42);
102MAKE_DERIVED(__COUNTER__, x | 42);
103MAKE_DERIVED(__COUNTER__, x << 2);
104MAKE_DERIVED(__COUNTER__, x >> 2);
105MAKE_DERIVED(__COUNTER__, ~x << 2);
106MAKE_DERIVED(__COUNTER__, ~x >> 2);
107MAKE_DERIVED(__COUNTER__, (x >> 8) | (x << 24));
108MAKE_DERIVED(__COUNTER__, (x >> 16) | (x << 16));
109MAKE_DERIVED(__COUNTER__, (x >> 24) | (x << 8));
110MAKE_DERIVED(__COUNTER__, (~x >> 8) | (~x << 24));
111MAKE_DERIVED(__COUNTER__, (~x >> 16) | (~x << 16));
112MAKE_DERIVED(__COUNTER__, (~x >> 24) | (~x << 8));
113MAKE_DERIVED(__COUNTER__, (static_cast<void>(x), 42));
114MAKE_DERIVED(__COUNTER__, (x >> 16) | (~x << 16));
115MAKE_DERIVED(__COUNTER__, (~x >> 16) | (x << 16));
116MAKE_DERIVED(__COUNTER__, x ^ 42);
117MAKE_DERIVED(__COUNTER__, x ^ ((x >> 16) | (x << 16)));
118MAKE_DERIVED(__COUNTER__, -~x);
119MAKE_DERIVED(__COUNTER__, ~-x);
120MAKE_DERIVED(__COUNTER__, x * 42);
121MAKE_DERIVED(__COUNTER__, -x * 42);
122MAKE_DERIVED(__COUNTER__, -x / 42);
123MAKE_DERIVED(__COUNTER__, x % 42);
124MAKE_DERIVED(__COUNTER__, -(x % 42));
125MAKE_DERIVED(__COUNTER__, (x * big_number));
126MAKE_DERIVED(__COUNTER__, x ^ big_number);
127MAKE_DERIVED(__COUNTER__, x + big_number);
128MAKE_DERIVED(__COUNTER__, x - big_number);
129MAKE_DERIVED(__COUNTER__, big_number - x);
130MAKE_DERIVED(__COUNTER__, x / big_number);
131MAKE_DERIVED(__COUNTER__, x % big_number);
132MAKE_DERIVED(__COUNTER__, big_number / x);
133MAKE_DERIVED(__COUNTER__, big_number % x);
134MAKE_DERIVED(__COUNTER__, (void(x), big_number));
135MAKE_DERIVED(__COUNTER__, x + (big_number >> 16));
136MAKE_DERIVED(__COUNTER__, x + (big_number >> 24));
137MAKE_DERIVED(__COUNTER__, (x << 16) + big_number);
138MAKE_DERIVED(__COUNTER__, (x << 16) - big_number);
139MAKE_DERIVED(__COUNTER__, big_number - (x << 16));
140MAKE_DERIVED(__COUNTER__, (x << 16) ^ big_number);
141MAKE_DERIVED(__COUNTER__, (x << 16) / big_number);
142MAKE_DERIVED(__COUNTER__, big_number / (x << 16));
143MAKE_DERIVED(__COUNTER__, (x << 16) & big_number);
144MAKE_DERIVED(__COUNTER__, (x << 16) + (big_number & 0x0000FFFF));
145MAKE_DERIVED(__COUNTER__, (x << 16) % big_number);
146MAKE_DERIVED(__COUNTER__, big_number % (x << 16));
147MAKE_DERIVED(__COUNTER__, x * 42 + big_number);
148MAKE_DERIVED(__COUNTER__, (x * (big_number % 42)));
149MAKE_DERIVED(__COUNTER__, (x * (-big_number % 42)));
150MAKE_DERIVED(__COUNTER__, (x * 14 + (big_number >> 16)));
151MAKE_DERIVED(__COUNTER__, (x * 14 - (big_number >> 16)));
152MAKE_DERIVED(__COUNTER__, (x * 14 ^ (big_number >> 16)));
153
154using variant_type = std::variant<
155 Derived0, Derived1, Derived2, Derived3, Derived4, Derived5, Derived6,
156 Derived7, Derived8, Derived9, Derived10, Derived11, Derived12, Derived13,
157 Derived14, Derived15, Derived16, Derived17, Derived18, Derived19, Derived20,
158 Derived21, Derived22, Derived23, Derived24, Derived25, Derived26, Derived27,
159 Derived28, Derived29, Derived30, Derived31, Derived32, Derived33, Derived34,
160 Derived35, Derived36, Derived37, Derived38, Derived39, Derived40, Derived41,
161 Derived42, Derived43, Derived44, Derived45, Derived46, Derived47, Derived48,
162 Derived49, Derived50, Derived51, Derived52, Derived53, Derived54, Derived55,
163 Derived56, Derived57, Derived58, Derived59, Derived60, Derived61, Derived62,
164 Derived63>;
165using variant_type_throws = std::variant<
166 Derived0, Derived1, Derived2, Derived3, Derived4, Derived5, Derived6,
167 Derived7, Derived8, Derived9, Derived10, Derived11, Derived12, Derived13,
168 Derived14, Derived15, Derived16, Derived17, Derived18, Derived19, Derived20,
169 Derived21, Derived22, Derived23, Derived24, Derived25, Derived26, Derived27,
170 Derived28, Derived29, Derived30, Derived31, Derived32, Derived33, Derived34,
171 Derived35, Derived36, Derived37, Derived38, Derived39, Derived40, Derived41,
172 Derived42, Derived43, Derived44, Derived45, Derived46, Derived47, Derived48,
173 Derived49, Derived50, Derived51, Derived52, Derived53, Derived54, Derived55,
174 Derived56, Derived57, Derived58, Derived59, Derived60, Derived61, Derived62,
175 Derived63, thrower>;
176constexpr std::array<decltype(&fptr_d0), 64> fptrs{
177 fptr_d0, fptr_d1, fptr_d2, fptr_d3, fptr_d4, fptr_d5, fptr_d6,
178 fptr_d7, fptr_d8, fptr_d9, fptr_d10, fptr_d11, fptr_d12, fptr_d13,
179 fptr_d14, fptr_d15, fptr_d16, fptr_d17, fptr_d18, fptr_d19, fptr_d20,
180 fptr_d21, fptr_d22, fptr_d23, fptr_d24, fptr_d25, fptr_d26, fptr_d27,
181 fptr_d28, fptr_d29, fptr_d30, fptr_d31, fptr_d32, fptr_d33, fptr_d34,
182 fptr_d35, fptr_d36, fptr_d37, fptr_d38, fptr_d39, fptr_d40, fptr_d41,
183 fptr_d42, fptr_d43, fptr_d44, fptr_d45, fptr_d46, fptr_d47, fptr_d48,
184 fptr_d49, fptr_d50, fptr_d51, fptr_d52, fptr_d53, fptr_d54, fptr_d55,
185 fptr_d56, fptr_d57, fptr_d58, fptr_d59, fptr_d60, fptr_d61, fptr_d62,
186 fptr_d63,
187};
188
189template <unsigned N = 4u>
190auto make_fptr(unsigned v) noexcept -> fptr {
191 switch (v % N) {
192 case 0:
193 return {v, &fptr_d1};
194 case 1:
195 return {v, &fptr_d2};
196 case 2:
197 return {v, &fptr_d3};
198 case 3:
199 return {v, &fptr_d4};
200 default:
201 __builtin_unreachable();
202 }
203 return {v, fptrs[v % N]};
204}
205
206template <std::size_t Num, std::size_t... Is, typename T, typename F>
207auto do_push_elem(std::index_sequence<Is...>, T& d, unsigned v, F f) {
208 const auto i = v % Num;
209 ((i == Is
210 and (d.emplace_back(f(kblib::constant<std::size_t, Is>{}, v)), true))
211 or ...);
212 return;
213}
214
215template <std::size_t Num, typename T, typename F>
216auto push_elem(T& d, unsigned v, F f) {
217 return do_push_elem<Num>(std::make_index_sequence<Num>{}, d, v, f);
218};
219
220template <typename... Ts>
221auto baseline_generic(std::vector<Ts>... args) {
222 return (
223 std::accumulate(args.begin(), args.end(), 0u,
224 [](unsigned accum, const Ts& x) { return accum + x(); })
225 + ... + 0);
226}
227
228} // namespace
229
230TEST_CASE("poly_obj performance(4_old)") {
231 const auto start = std::chrono::steady_clock::now();
232# ifdef NDEBUG
233 constexpr unsigned count = 1000;
234# else
235 constexpr unsigned count = 100;
236# endif
237
238# ifdef SANITIZERS
239# define STR(s) # s
240 std::cout << "Sanitizers active\n";
241# endif
242
243 std::vector<std::pair<unsigned, std::string_view>> reproducibility_test;
244 using poly_t = kblib::poly_obj<
245 Base, sizeof(Derived1),
247
248 auto push_checksum = [&](unsigned s, std::string_view name) {
249 auto begin = reproducibility_test.begin();
250 auto end = reproducibility_test.end();
251 // Take only the first result for each run type
252 if (std::find_if(begin, end,
253 [&](const auto& p) { return p.second == name; })
254 == end) {
255 reproducibility_test.emplace_back(s, name);
256 }
257 };
258
259 /*
260 * Release / Debug, all times in microseconds:
261 *
262 * Four separate contiguous arrays
263 * baseline: 5.377 / 37.549
264 * Polymorphism, always valid
265 * raw pointer: 10.470 / 51.205
266 * unique_ptr: 11.826 / 70.346
267 * poly_obj: 14.644 / 79.075
268 * Type erasure, always valid
269 * std::function: 15.247 / 106.562
270 * std::visit(v, f): 27.375 / 238.055
271 * kblib::visit(v, f...): 37.308 / 258.965
272 * kblib::visit(v)(f...): 37.253 / 288.355
273 * visit_indexed: 30.203 / 208.894
274 * kblib::visit2: 8.407 / 211.649
275 * kblib::visit2_nop: 8.650 / 222.502
276 * std::get_if: 8.580 / 158.164
277 * switch (v.index): 8.984 / 170.739
278 * Polymorphism, checking for invalid
279 * raw pointer, ch: 15.859 / 56.917
280 * unique_ptr, ch: 18.344 / 108.046
281 * poly_obj, ch: 18.290 / 96.168
282 * Type erasure, checking for invalid
283 * std::function, ch: 25.200 / 122.038
284 * kblib::visit2_nop, ch: 12.327 / 322.764
285 * std::get_if, ch: 12.386 / 232.006
286 * switch(v.index()), ch: 16.817 / 203.752
287 * Type erasure, exceptions for invalid
288 * std::function, ex: 23.758 / 126.152
289 * std::visit(v, f), ex: 40.646 / 310.900
290 * kblib::visit(v, f...), ex: 44.016 / 328.484
291 * kblib::visit(v)(f...), ex: 46.345 / 377.681
292 * visit_indexed, ex: 40.066 / 261.261
293 * kblib::visit2, ex: 12.353 / 315.509
294 *
295 * Overall test runtime: 80s / 286s
296 *
297 * Conclusions:
298 *
299 * In release builds, visit_indexed, std::function, and std::visit are very
300 * slow, contiguous access is very fast, everything else is comparable to
301 * each other, except that exceptions are, of course, very slow.
302 *
303 * In debug builds, type erasure seems to be extremely slow, enough even to
304 * mostly drown out the exceptions, but the fastest is std::function by far.
305 * visit2 is a bit faster in release builds, but poly_obj isn't much slower
306 * and is much much faster than it in debug. In every instance, poly_obj
307 * compares favorably to unique_ptr, which is good, because it is meant to
308 * replace it in some uses.
309 *
310 * std::get_if and switch are predictably the fastest way of accessing a
311 * variant in debug, but in release are tied with visit2. I do not think
312 * that the messy and verbose code is justified by the debug performance
313 * gain compared to the other options.
314 *
315 * If you want to use exceptions for invalid objects, kblib::visit2 is the
316 * fastest way, followed by std::function. The others are equally twice as
317 * slow.
318 *
319 * I cannot tell why visit_indexed is so much slower than visit2 in this
320 * test. Their code is very similar. It must be the additional parameter
321 * causing delays in setting up the call.
322 *
323 */
324
325 {
326 std::vector<Derived0> d1;
327 std::vector<Derived1> d2;
328 std::vector<Derived2> d3;
329 std::vector<Derived3> d4;
331 for (auto i : kblib::range(count)) {
332 auto v = static_cast<unsigned>(h(i));
333 switch (v % 4) {
334 case 0:
335 d1.emplace_back(v);
336 break;
337 case 1:
338 d2.emplace_back(v);
339 break;
340 case 2:
341 d3.emplace_back(v);
342 break;
343 case 3:
344 d4.emplace_back(v);
345 }
346 }
347
348 BENCHMARK_ADVANCED("baseline")(Catch::Benchmark::Chronometer meter) {
349 unsigned accum{};
350 meter.measure([&] {
351 for (const auto& x : d1) {
352 accum += x();
353 }
354 for (const auto& x : d2) {
355 accum += x();
356 }
357 for (const auto& x : d3) {
358 accum += x();
359 }
360 for (const auto& x : d4) {
361 accum += x();
362 }
363 return accum;
364 });
365 push_checksum(accum, "baseline");
366 };
367 BENCHMARK_ADVANCED("baseline_generic")
368 (Catch::Benchmark::Chronometer meter) {
369 unsigned accum{};
370 meter.measure([&] { accum = baseline_generic(d1, d2, d3, d4); });
371 push_checksum(accum, "baseline_generic");
372 };
373 }
374 BENCHMARK_ADVANCED("raw pointer")(Catch::Benchmark::Chronometer meter) {
375 std::vector<Base*> d;
377 for (auto i : kblib::range(count)) {
378 auto v = static_cast<unsigned>(h(i));
379 switch (v % 4) {
380 case 0:
381 d.push_back(new Derived0(v));
382 break;
383 case 1:
384 d.push_back(new Derived1(v));
385 break;
386 case 2:
387 d.push_back(new Derived2(v));
388 break;
389 case 3:
390 d.push_back(new Derived3(v));
391 }
392 }
393
394 unsigned accum{};
395 meter.measure([&] {
396 for (auto x : d) {
397 accum += (*x)();
398 }
399 return accum;
400 });
401 push_checksum(accum, "raw pointer");
402 for (auto x : d) {
403 delete x;
404 }
405 };
406 BENCHMARK_ADVANCED("unique_ptr")
407 (Catch::Benchmark::Chronometer meter) {
408 std::vector<std::unique_ptr<Base>> d;
410 for (auto i : kblib::range(count)) {
411 auto v = h(i);
412 switch (v % 4) {
413 case 0:
414 d.push_back(std::make_unique<Derived0>(v));
415 break;
416 case 1:
417 d.push_back(std::make_unique<Derived1>(v));
418 break;
419 case 2:
420 d.push_back(std::make_unique<Derived2>(v));
421 break;
422 case 3:
423 d.push_back(std::make_unique<Derived3>(v));
424 }
425 }
426
427 unsigned accum{};
428 meter.measure([&] {
429 for (const auto& x : d) {
430 accum += (*x)();
431 }
432 return accum;
433 });
434 push_checksum(accum, "unique pointer");
435 };
436 BENCHMARK_ADVANCED("poly_obj")(Catch::Benchmark::Chronometer meter) {
437 std::vector<poly_t> d;
439 for (auto i : kblib::range(count)) {
440 auto v = h(i);
441 switch (v % 4) {
442 case 0:
443 d.push_back(poly_t::make<Derived0>(v));
444 break;
445 case 1:
446 d.push_back(poly_t::make<Derived1>(v));
447 break;
448 case 2:
449 d.push_back(poly_t::make<Derived2>(v));
450 break;
451 case 3:
452 d.push_back(poly_t::make<Derived3>(v));
453 }
454 }
455
456 unsigned accum{};
457 meter.measure([&] {
458 for (const auto& x : d) {
459 accum += x();
460 }
461 return accum;
462 });
463 push_checksum(accum, "poly_obj");
464 };
465 BENCHMARK_ADVANCED("function pointer")(Catch::Benchmark::Chronometer meter) {
466 std::vector<fptr> d;
468 for (auto i : kblib::range(count)) {
469 auto v = static_cast<unsigned>(h(i));
470 d.push_back(make_fptr(v));
471 }
472
473 unsigned accum{};
474 meter.measure([&] {
475 for (auto x : d) {
476 accum += x.p(&x);
477 }
478 return accum;
479 });
480 push_checksum(accum, "raw pointer");
481 };
482 BENCHMARK_ADVANCED("function pointer (wrapped)")
483 (Catch::Benchmark::Chronometer meter) {
484 std::vector<fptr> d;
486 for (auto i : kblib::range(count)) {
487 auto v = static_cast<unsigned>(h(i));
488 d.push_back(make_fptr(v));
489 }
490
491 unsigned accum{};
492 meter.measure([&] {
493 for (auto x : d) {
494 accum += x();
495 }
496 return accum;
497 });
498 push_checksum(accum, "raw pointer");
499 };
500 BENCHMARK_ADVANCED("std::function")(Catch::Benchmark::Chronometer meter) {
501 std::vector<std::function<unsigned()>> d;
503 for (auto i : kblib::range(count)) {
504 auto v = static_cast<unsigned>(h(i));
505 switch (v % 4) {
506 case 0:
507 d.emplace_back(Derived0(v));
508 break;
509 case 1:
510 d.emplace_back(Derived1(v));
511 break;
512 case 2:
513 d.emplace_back(Derived2(v));
514 break;
515 case 3:
516 d.emplace_back(Derived3(v));
517 }
518 }
519
520 unsigned accum{};
521 meter.measure([&] {
522 for (const auto& x : d) {
523 accum += x();
524 }
525 return accum;
526 });
527 push_checksum(accum, "std::function");
528 };
529 {
530 std::vector<std::variant<Derived0, Derived1, Derived2, Derived3>> d;
532 for (auto i : kblib::range(count)) {
533 auto v = static_cast<unsigned>(h(i));
534 switch (v % 4) {
535 case 0:
536 d.emplace_back(Derived0(v));
537 break;
538 case 1:
539 d.emplace_back(Derived1(v));
540 break;
541 case 2:
542 d.emplace_back(Derived2(v));
543 break;
544 case 3:
545 d.emplace_back(Derived3(v));
546 break;
547 }
548 }
549 BENCHMARK_ADVANCED("std::visit(v, f)")
550 (Catch::Benchmark::Chronometer meter) {
551 unsigned accum{};
552 meter.measure([&] {
553 for (const auto& x : d) {
554 accum += std::visit([](const auto& v) { return v(); }, x);
555 }
556 return accum;
557 });
558 push_checksum(accum, "std::visit(v, f)");
559 };
560 BENCHMARK_ADVANCED("kblib::visit(v, f...)")
561 (Catch::Benchmark::Chronometer meter) {
562 unsigned accum{};
563 meter.measure([&] {
564 for (const auto& x : d) {
565 accum += kblib::visit(x, [](const auto& v) { return v(); });
566 }
567 return accum;
568 });
569 push_checksum(accum, "kblib::visit(v, f...)");
570 };
571 BENCHMARK_ADVANCED("kblib::visit(v)(f...)")
572 (Catch::Benchmark::Chronometer meter) {
573 unsigned accum{};
574 meter.measure([&] {
575 for (const auto& x : d) {
576 accum += kblib::visit(x)([](const auto& v) { return v(); });
577 }
578 return accum;
579 });
580 push_checksum(accum, "kblib::visit(v)(f...)");
581 };
582 BENCHMARK_ADVANCED("visit_indexed(v, f...)")
583 (Catch::Benchmark::Chronometer meter) {
584 unsigned accum{};
585 meter.measure([&] {
586 for (const auto& x : d) {
587 accum += kblib::visit_indexed(
588 x, [](auto, const auto& v) { return v(); });
589 }
590 return accum;
591 });
592 push_checksum(accum, "visit_indexed(v, f...)");
593 };
594 BENCHMARK_ADVANCED("kblib::visit2(v, f...)")
595 (Catch::Benchmark::Chronometer meter) {
596 unsigned accum{};
597 meter.measure([&] {
598 for (const auto& x : d) {
599 accum += kblib::visit2(x, [](const auto& v) { return v(); });
600 }
601 return accum;
602 });
603 push_checksum(accum, "kblib::visit2(v, f...)");
604 };
605 BENCHMARK_ADVANCED("kblib::visit2_nop(v, f...)")
606 (Catch::Benchmark::Chronometer meter) {
607 unsigned accum{};
608 meter.measure([&] {
609 for (const auto& x : d) {
610 kblib::visit2_nop(x, [&](const auto& v) { accum += v(); });
611 }
612 return accum;
613 });
614 push_checksum(accum, "kblib::visit2_nop(v, f...)");
615 };
616 BENCHMARK_ADVANCED("std::get_if")
617 (Catch::Benchmark::Chronometer meter) {
618 unsigned accum{};
619 meter.measure([&] {
620 for (const auto& x : d) {
621 if (auto* const p = std::get_if<0>(&x)) {
622 accum += (*p)();
623 } else if (auto* const p = std::get_if<1>(&x)) {
624 accum += (*p)();
625 } else if (auto* const p = std::get_if<2>(&x)) {
626 accum += (*p)();
627 } else if (auto* const p = std::get_if<3>(&x)) {
628 accum += (*p)();
629 }
630 }
631 return accum;
632 });
633 push_checksum(accum, "std::get_if");
634 };
635 BENCHMARK_ADVANCED("switch (v.index())")
636 (Catch::Benchmark::Chronometer meter) {
637 unsigned accum{};
638 meter.measure([&] {
639 for (const auto& x : d) {
640 switch (x.index()) {
641 case 0:
642 accum += std::get<0>(x)();
643 break;
644 case 1:
645 accum += std::get<1>(x)();
646 break;
647 case 2:
648 accum += std::get<2>(x)();
649 break;
650 case 3:
651 accum += std::get<3>(x)();
652 break;
653 default:;
654 }
655 }
656 return accum;
657 });
658 push_checksum(accum, "switch (v.index())");
659 };
660 }
661
662 // Test speed when some objects are invalid
663 {
664 BENCHMARK_ADVANCED("raw pointer, ch")
665 (Catch::Benchmark::Chronometer meter) {
666 std::vector<Base*> d;
668 for (auto i : kblib::range(count)) {
669 auto v = static_cast<unsigned>(h(i));
670 switch (v % 5) {
671 case 0:
672 d.emplace_back(new Derived0(v));
673 break;
674 case 1:
675 d.emplace_back(new Derived1(v));
676 break;
677 case 2:
678 d.emplace_back(new Derived2(v));
679 break;
680 case 3:
681 d.emplace_back(new Derived3(v));
682 break;
683 case 4:
684 try {
685 d.push_back(new thrower(v));
686 } catch (int) {
687 d.push_back(nullptr);
688 }
689 }
690 }
691
692 unsigned accum{};
693 meter.measure([&] {
694 for (auto x : d) {
695 if (x) {
696 accum += (*x)();
697 }
698 }
699 return accum;
700 });
701 for (auto x : d) {
702 delete x;
703 }
704 };
705 BENCHMARK_ADVANCED("unique_ptr, ch")
706 (Catch::Benchmark::Chronometer meter) {
707 std::vector<std::unique_ptr<Base>> d;
709 for (auto i : kblib::range(count)) {
710 auto v = h(i);
711 switch (v % 4) {
712 case 0:
713 d.push_back(std::make_unique<Derived0>(v));
714 break;
715 case 1:
716 d.push_back(std::make_unique<Derived1>(v));
717 break;
718 case 2:
719 d.push_back(std::make_unique<Derived2>(v));
720 break;
721 case 3:
722 d.push_back(std::make_unique<Derived3>(v));
723 break;
724 case 4:
725 try {
726 d.push_back(std::make_unique<thrower>(v));
727 } catch (int) {
728 d.push_back(nullptr);
729 }
730 }
731 }
732
733 unsigned accum{};
734 meter.measure([&] {
735 for (const auto& x : d) {
736 if (x) {
737 accum += (*x)();
738 }
739 }
740 return accum;
741 });
742 };
743 BENCHMARK_ADVANCED("poly_obj, ch")(Catch::Benchmark::Chronometer meter) {
744 std::vector<poly_t> d;
746 for (auto i : kblib::range(count)) {
747 auto v = h(i);
748 switch (v % 5) {
749 case 0:
750 d.push_back(poly_t::make<Derived0>(v));
751 break;
752 case 1:
753 d.push_back(poly_t::make<Derived1>(v));
754 break;
755 case 2:
756 d.push_back(poly_t::make<Derived2>(v));
757 break;
758 case 3:
759 d.push_back(poly_t::make<Derived3>(v));
760 break;
761 case 4:
762 try {
763 d.push_back(poly_t::make<thrower>(v));
764 } catch (int) {
765 d.emplace_back(nullptr);
766 }
767 }
768 }
769
770 unsigned accum{};
771 meter.measure([&] {
772 for (const auto& x : d) {
773 if (x) {
774 accum += x();
775 }
776 }
777 return accum;
778 });
779 };
780 std::vector<std::function<unsigned()>> df;
781 std::vector<std::variant<Derived0, Derived1, Derived2, Derived3, thrower>>
782 d;
784 for (auto i : kblib::range(count)) {
785 auto v = static_cast<unsigned>(h(i));
786 switch (v % 5) {
787 case 0:
788 df.emplace_back(Derived0(v));
789 d.emplace_back(Derived0(v));
790 break;
791 case 1:
792 df.emplace_back(Derived1(v));
793 d.emplace_back(Derived1(v));
794 break;
795 case 2:
796 df.emplace_back(Derived2(v));
797 d.emplace_back(Derived2(v));
798 break;
799 case 3:
800 df.emplace_back(Derived3(v));
801 d.emplace_back(Derived3(v));
802 break;
803 // Test speed of exception throwing and catching
804 case 4:
805 try {
806 auto& b = df.emplace_back(thrower());
807 b = thrower(v);
808 } catch (int) {
809 }
810 // These have to be done in separate try blocks because otherwise
811 // df would throw and d wouldn't get pushed
812 try {
813 auto& b = d.emplace_back(std::in_place_type_t<thrower>{}, 0);
814 b = thrower(v);
815 } catch (int) {
816 }
817 }
818 }
819 CHECK(d.size() == df.size());
820 CHECK(df.size() == count);
821 CHECK(d.size() == count);
822 BENCHMARK_ADVANCED("std::function, ch")
823 (Catch::Benchmark::Chronometer meter) {
824 unsigned accum{};
825 meter.measure([&] {
826 for (const auto& x : df) {
827 if (x) {
828 accum += x();
829 }
830 }
831 return accum;
832 });
833 };
834 BENCHMARK_ADVANCED("kblib::visit2_nop(v, f...), ch")
835 (Catch::Benchmark::Chronometer meter) {
836 unsigned accum{};
837 meter.measure([&] {
838 for (const auto& x : d) {
839 kblib::visit2_nop(x, [&](const auto& v) { accum += v(); });
840 }
841 return accum;
842 });
843 };
844 BENCHMARK_ADVANCED("std::get_if, ch")
845 (Catch::Benchmark::Chronometer meter) {
846 unsigned accum{};
847 meter.measure([&] {
848 for (const auto& x : d) {
849 if (auto* const p = std::get_if<0>(&x)) {
850 accum += (*p)();
851 } else if (auto* const p = std::get_if<1>(&x)) {
852 accum += (*p)();
853 } else if (auto* const p = std::get_if<2>(&x)) {
854 accum += (*p)();
855 } else if (auto* const p = std::get_if<3>(&x)) {
856 accum += (*p)();
857 } else if (auto* const p = std::get_if<4>(&x)) {
858 accum += (*p)();
859 }
860 }
861 return accum;
862 });
863 };
864 BENCHMARK_ADVANCED("switch (v.index()), ch")
865 (Catch::Benchmark::Chronometer meter) {
866 unsigned accum{};
867 meter.measure([&] {
868 for (const auto& x : d) {
869 switch (x.index()) {
870 case 0:
871 accum += std::get<0>(x)();
872 break;
873 case 1:
874 accum += std::get<1>(x)();
875 break;
876 case 2:
877 accum += std::get<2>(x)();
878 break;
879 case 3:
880 accum += std::get<3>(x)();
881 break;
882 case 4:
883 accum += std::get<4>(x)();
884 break;
885 default:;
886 }
887 }
888 return accum;
889 });
890 };
891 BENCHMARK_ADVANCED("std::function, ex")
892 (Catch::Benchmark::Chronometer meter) {
893 unsigned accum{};
894 meter.measure([&] {
895 for (const auto& x : df) {
896 try {
897 accum += x();
898 } catch (const std::bad_function_call&) {
899 }
900 }
901 return accum;
902 });
903 };
904 BENCHMARK_ADVANCED("std::visit(v, f), ex")
905 (Catch::Benchmark::Chronometer meter) {
906 unsigned accum{};
907 meter.measure([&] {
908 for (const auto& x : d) {
909 try {
910 accum += std::visit([](const auto& v) { return v(); }, x);
911 } catch (const std::bad_variant_access&) {
912 }
913 }
914 return accum;
915 });
916 };
917 BENCHMARK_ADVANCED("kblib::visit(v, f...), ex")
918 (Catch::Benchmark::Chronometer meter) {
919 unsigned accum{};
920 meter.measure([&] {
921 for (const auto& x : d) {
922 try {
923 accum += kblib::visit(x, [](const auto& v) { return v(); });
924 } catch (const std::bad_variant_access&) {
925 }
926 }
927 return accum;
928 });
929 };
930 BENCHMARK_ADVANCED("kblib::visit(v)(f...), ex")
931 (Catch::Benchmark::Chronometer meter) {
932 unsigned accum{};
933 meter.measure([&] {
934 for (const auto& x : d) {
935 try {
936 accum += kblib::visit(x)([](const auto& v) { return v(); });
937 } catch (const std::bad_variant_access&) {
938 }
939 }
940 return accum;
941 });
942 };
943 BENCHMARK_ADVANCED("visit_indexed(v, f...), ex")
944 (Catch::Benchmark::Chronometer meter) {
945 unsigned accum{};
946 meter.measure([&] {
947 for (const auto& x : d) {
948 try {
949 accum += kblib::visit_indexed(
950 x, [](auto, const auto& v) { return v(); });
951 } catch (const std::bad_variant_access&) {
952 }
953 }
954 return accum;
955 });
956 };
957 BENCHMARK_ADVANCED("kblib::visit2(v, f...), ex")
958 (Catch::Benchmark::Chronometer meter) {
959 unsigned accum{};
960 meter.measure([&] {
961 for (const auto& x : d) {
962 try {
963 accum += kblib::visit2(x, [](const auto& v) { return v(); });
964 } catch (const std::bad_variant_access&) {
965 }
966 }
967 return accum;
968 });
969 };
970 }
971 auto end = std::chrono::steady_clock::now();
972 std::chrono::duration<float, std::ratio<1, 1>> time = end - start;
973 std::cout << "\n\nProfiling took " << time.count() << " seconds\n";
974
975 // All runs should produce identical results
976 unsigned expected_value = reproducibility_test[0].first;
977
978 std::cout << "Checksum: " << expected_value;
979
980 for (auto i : kblib::range(std::size_t(1), reproducibility_test.size())) {
981 const auto& run = reproducibility_test[i];
982 if (run.first != expected_value) {
983 WARN(i << ": " << run.second << ": " << run.first
984 << " != " << expected_value);
985 }
986 REQUIRE(run.first == expected_value);
987 }
988}
989
990# endif // not defined(FAST_TEST)
991
992#endif // KBLIB_USE_CXX17
Inline polymorphic object. Generally mimics the interfaces of std::optional and std::variant.
Definition: poly_obj.h:481
constexpr auto find_if(ForwardIt begin, EndIt end, UnaryPredicate &&pred) noexcept(noexcept(kblib::invoke(pred, *begin))) -> ForwardIt
Finds the first value in range [begin, end) for which pred returns true. If not found,...
Definition: algorithm.h:327
constexpr auto range(Value min, Value max, Delta step=0) -> range_t< Value, Delta >
Constructs a range from beginning, end, and step amount. The range is half-open, that is min is in th...
Definition: iterators.h:621
constexpr auto visit_indexed(Variant &&variant, Fs &&... fs) -> decltype(auto)
Visit a variant, but pass the index (as an integral_constant) to the visitor. This allows for a visit...
Definition: variant.h:176
constexpr auto visit2_nop(V &&v, F &&f, Fs &&... fs) -> void
Definition: variant.h:306
constexpr auto accumulate(InputIt first, InputIt last, T init) -> T
A constexpr version of std::accumulate.
Definition: algorithm.h:162
constexpr auto visit2(V &&v, F &&f, Fs &&... fs) -> decltype(auto)
Definition: variant.h:295
constexpr auto visit(V &&v, F &&f, Fs &&... fs) -> decltype(auto)
Wraps std::visit to provide an interface taking one variant and any number of functors providing an o...
Definition: variant.h:221
Provides poly_obj, which enables polymorphism to be used without unnecessary per-object dynamic alloc...
The primary template has to exist, but not be constructible, in order to be compatible with std::hash...
Definition: hash.h:334
The prime to use for the FNVa hash algorithm, as a type trait.
Definition: hash.h:108
poly_obj_traits is a traits class template which abstracts the allowed operations on a polymorphic ty...
Definition: poly_obj.h:374
TEST_CASE("poly_obj performance(4_old)")
#define MAKE_DERIVED(Number, Expr)