diff --git a/benchmark/histogram_iteration.cpp b/benchmark/histogram_iteration.cpp index 3aa1d3dc..b7594693 100644 --- a/benchmark/histogram_iteration.cpp +++ b/benchmark/histogram_iteration.cpp @@ -100,7 +100,7 @@ template static void Indexed(benchmark::State& state, Tag, d1, coverage cov) { auto h = make_histogram(Tag(), d1(), state.range(0)); for (auto _ : state) { - for (auto x : indexed(h, cov)) { + for (auto&& x : indexed(h, cov)) { benchmark::DoNotOptimize(*x); benchmark::DoNotOptimize(x.index()); } @@ -111,7 +111,7 @@ template static void Indexed(benchmark::State& state, Tag, d2, coverage cov) { auto h = make_histogram(Tag(), d2(), state.range(0)); for (auto _ : state) { - for (auto x : indexed(h, cov)) { + for (auto&& x : indexed(h, cov)) { benchmark::DoNotOptimize(*x); benchmark::DoNotOptimize(x.index(0)); benchmark::DoNotOptimize(x.index(1)); @@ -123,7 +123,7 @@ template static void Indexed(benchmark::State& state, Tag, d3, coverage cov) { auto h = make_histogram(Tag(), d3(), state.range(0)); for (auto _ : state) { - for (auto x : indexed(h, cov)) { + for (auto&& x : indexed(h, cov)) { benchmark::DoNotOptimize(*x); benchmark::DoNotOptimize(x.index(0)); benchmark::DoNotOptimize(x.index(1)); @@ -134,8 +134,8 @@ static void Indexed(benchmark::State& state, Tag, d3, coverage cov) { #define BENCH(Type, Tag, Dim, Cov) \ BENCHMARK_CAPTURE(Type, (Tag, Dim, Cov), Tag{}, Dim_t{}, coverage::Cov) \ - ->RangeMultiplier(2) \ - ->Range(4, 128) + ->RangeMultiplier(4) \ + ->Range(4, 256) BENCH(Naive, tuple, 1, inner); BENCH(Indexed, tuple, 1, inner); diff --git a/doc/doxygen_postprocessing.py b/doc/doxygen_postprocessing.py index 65bcde47..edfbc2f7 100644 --- a/doc/doxygen_postprocessing.py +++ b/doc/doxygen_postprocessing.py @@ -5,7 +5,7 @@ import re def log(*args): - print("PP:", *args) + print("post-processing:", *args) def select(condition, *tags): @@ -74,7 +74,8 @@ for item in select(lambda x:True, "class", "struct", "function"): parent = parent_map[item] log("removing undocumented", item.tag, item.get("name"), "from", parent.tag, parent.get("name")) - parent_map[item].remove(item) + if item in parent_map[item]: + parent_map[item].remove(item) elif purpose.text.strip().lower() == "implementation detail": log("replacing", item.tag, item.get("name"), "with unspecified typedef") name = item.get("name") @@ -85,6 +86,8 @@ for item in select(lambda x:True, "class", "struct", "function"): type.append(unspecified) item.append(type) +parent_map = {c:p for p in tree.iter() for c in p} + # hide methods and constructors explicitly declared as "implementation detail" for item in select(is_detail, "constructor", "method"): name = item.get("name") diff --git a/doc/iteration_performance.dat b/doc/iteration_performance.dat index ee5723aa..1e98e18a 100644 --- a/doc/iteration_performance.dat +++ b/doc/iteration_performance.dat @@ -1,111 +1,668 @@ -------------------------------------------------------------------------------------- -Benchmark Time CPU Iterations -------------------------------------------------------------------------------------- -Naive/(tuple, 1, inner)/4 5.96 ns 5.96 ns 107227613 -Naive/(tuple, 1, inner)/8 12.8 ns 12.8 ns 57237710 -Naive/(tuple, 1, inner)/16 29.5 ns 29.5 ns 23650894 -Naive/(tuple, 1, inner)/32 68.7 ns 68.7 ns 10197442 -Naive/(tuple, 1, inner)/64 168 ns 168 ns 4188998 -Naive/(tuple, 1, inner)/128 321 ns 321 ns 2160592 -Indexed/(tuple, 1, inner)/4 2.14 ns 2.14 ns 350938105 -Indexed/(tuple, 1, inner)/8 4.72 ns 4.72 ns 154159405 -Indexed/(tuple, 1, inner)/16 7.86 ns 7.86 ns 92269707 -Indexed/(tuple, 1, inner)/32 25.1 ns 25.1 ns 27919367 -Indexed/(tuple, 1, inner)/64 30.4 ns 30.4 ns 22619898 -Indexed/(tuple, 1, inner)/128 53.2 ns 53.2 ns 12929879 -Naive/(vector, 1, inner)/4 13.0 ns 13.0 ns 52795441 -Naive/(vector, 1, inner)/8 26.3 ns 26.3 ns 27237634 -Naive/(vector, 1, inner)/16 51.9 ns 51.9 ns 13736742 -Naive/(vector, 1, inner)/32 103 ns 103 ns 6787482 -Naive/(vector, 1, inner)/64 215 ns 215 ns 3198797 -Naive/(vector, 1, inner)/128 452 ns 452 ns 1577023 -Indexed/(vector, 1, inner)/4 8.97 ns 8.97 ns 79977567 -Indexed/(vector, 1, inner)/8 13.6 ns 13.6 ns 53950668 -Indexed/(vector, 1, inner)/16 23.3 ns 23.3 ns 29860386 -Indexed/(vector, 1, inner)/32 78.7 ns 78.7 ns 8672481 -Indexed/(vector, 1, inner)/64 143 ns 143 ns 4817357 -Indexed/(vector, 1, inner)/128 280 ns 280 ns 2559447 -Naive/(vector_of_variant, 1, inner)/4 51.8 ns 51.8 ns 13135495 -Naive/(vector_of_variant, 1, inner)/8 110 ns 110 ns 6483190 -Naive/(vector_of_variant, 1, inner)/16 214 ns 214 ns 3332363 -Naive/(vector_of_variant, 1, inner)/32 408 ns 408 ns 1688299 -Naive/(vector_of_variant, 1, inner)/64 795 ns 795 ns 884442 -Naive/(vector_of_variant, 1, inner)/128 1596 ns 1596 ns 440538 -Indexed/(vector_of_variant, 1, inner)/4 17.2 ns 17.2 ns 42335022 -Indexed/(vector_of_variant, 1, inner)/8 21.9 ns 21.9 ns 30641988 -Indexed/(vector_of_variant, 1, inner)/16 34.2 ns 34.2 ns 20488332 -Indexed/(vector_of_variant, 1, inner)/32 83.0 ns 83.0 ns 8404939 -Indexed/(vector_of_variant, 1, inner)/64 153 ns 153 ns 4584970 -Indexed/(vector_of_variant, 1, inner)/128 286 ns 286 ns 2439548 -Naive/(tuple, 2, inner)/4 49.0 ns 49.0 ns 14277246 -Naive/(tuple, 2, inner)/8 188 ns 188 ns 3728378 -Naive/(tuple, 2, inner)/16 735 ns 735 ns 952961 -Naive/(tuple, 2, inner)/32 2924 ns 2924 ns 238582 -Naive/(tuple, 2, inner)/64 11916 ns 11916 ns 58984 -Naive/(tuple, 2, inner)/128 47203 ns 47199 ns 15002 -Indexed/(tuple, 2, inner)/4 35.7 ns 35.7 ns 19501880 -Indexed/(tuple, 2, inner)/8 103 ns 103 ns 6813330 -Indexed/(tuple, 2, inner)/16 427 ns 427 ns 1654014 -Indexed/(tuple, 2, inner)/32 1736 ns 1736 ns 405975 -Indexed/(tuple, 2, inner)/64 10312 ns 10312 ns 68880 -Indexed/(tuple, 2, inner)/128 39495 ns 39493 ns 17730 -Naive/(vector, 2, inner)/4 47.2 ns 47.2 ns 13581227 -Naive/(vector, 2, inner)/8 193 ns 193 ns 3624511 -Naive/(vector, 2, inner)/16 865 ns 865 ns 811691 -Naive/(vector, 2, inner)/32 3120 ns 3120 ns 224241 -Naive/(vector, 2, inner)/64 12238 ns 12237 ns 59350 -Naive/(vector, 2, inner)/128 46191 ns 46189 ns 14793 -Indexed/(vector, 2, inner)/4 33.3 ns 33.3 ns 21130965 -Indexed/(vector, 2, inner)/8 118 ns 118 ns 5964793 -Indexed/(vector, 2, inner)/16 434 ns 434 ns 1696498 -Indexed/(vector, 2, inner)/32 2740 ns 2740 ns 249654 -Indexed/(vector, 2, inner)/64 10509 ns 10505 ns 67846 -Indexed/(vector, 2, inner)/128 40151 ns 40150 ns 16918 -Naive/(vector_of_variant, 2, inner)/4 453 ns 452 ns 1552650 -Naive/(vector_of_variant, 2, inner)/8 1733 ns 1733 ns 383013 -Naive/(vector_of_variant, 2, inner)/16 6836 ns 6835 ns 104279 -Naive/(vector_of_variant, 2, inner)/32 26360 ns 26360 ns 26654 -Naive/(vector_of_variant, 2, inner)/64 104132 ns 104130 ns 6682 -Naive/(vector_of_variant, 2, inner)/128 418688 ns 418682 ns 1682 -Indexed/(vector_of_variant, 2, inner)/4 48.0 ns 48.0 ns 15128777 -Indexed/(vector_of_variant, 2, inner)/8 148 ns 148 ns 4496692 -Indexed/(vector_of_variant, 2, inner)/16 439 ns 439 ns 1595670 -Indexed/(vector_of_variant, 2, inner)/32 2680 ns 2680 ns 261458 -Indexed/(vector_of_variant, 2, inner)/64 10159 ns 10159 ns 68852 -Indexed/(vector_of_variant, 2, inner)/128 39420 ns 39418 ns 17752 -Naive/(tuple, 3, inner)/4 485 ns 485 ns 1441541 -Naive/(tuple, 3, inner)/8 3805 ns 3805 ns 184095 -Naive/(tuple, 3, inner)/16 29768 ns 29766 ns 23576 -Naive/(tuple, 3, inner)/32 235860 ns 235852 ns 2963 -Naive/(tuple, 3, inner)/64 1905665 ns 1905589 ns 368 -Naive/(tuple, 3, inner)/128 15065575 ns 15064751 ns 46 -Indexed/(tuple, 3, inner)/4 114 ns 114 ns 6131123 -Indexed/(tuple, 3, inner)/8 822 ns 822 ns 851250 -Indexed/(tuple, 3, inner)/16 5973 ns 5973 ns 116809 -Indexed/(tuple, 3, inner)/32 73666 ns 73665 ns 9488 -Indexed/(tuple, 3, inner)/64 560104 ns 560076 ns 1252 -Indexed/(tuple, 3, inner)/128 4342178 ns 4342062 ns 162 -Naive/(vector, 3, inner)/4 547 ns 547 ns 1345453 -Naive/(vector, 3, inner)/8 4089 ns 4089 ns 169797 -Naive/(vector, 3, inner)/16 32296 ns 32293 ns 21554 -Naive/(vector, 3, inner)/32 257591 ns 257584 ns 2759 -Naive/(vector, 3, inner)/64 2168135 ns 2168063 ns 331 -Naive/(vector, 3, inner)/128 17159329 ns 17158944 ns 42 -Indexed/(vector, 3, inner)/4 118 ns 118 ns 6018175 -Indexed/(vector, 3, inner)/8 866 ns 866 ns 824553 -Indexed/(vector, 3, inner)/16 6355 ns 6355 ns 112697 -Indexed/(vector, 3, inner)/32 75642 ns 75640 ns 9242 -Indexed/(vector, 3, inner)/64 566600 ns 566578 ns 1236 -Indexed/(vector, 3, inner)/128 4441349 ns 4441226 ns 160 -Naive/(vector_of_variant, 3, inner)/4 2496 ns 2495 ns 284164 -Naive/(vector_of_variant, 3, inner)/8 18593 ns 18593 ns 37745 -Naive/(vector_of_variant, 3, inner)/16 144301 ns 144295 ns 4864 -Naive/(vector_of_variant, 3, inner)/32 1139630 ns 1139605 ns 614 -Naive/(vector_of_variant, 3, inner)/64 9125144 ns 9125012 ns 75 -Naive/(vector_of_variant, 3, inner)/128 72429636 ns 72426568 ns 10 -Indexed/(vector_of_variant, 3, inner)/4 140 ns 140 ns 5002436 -Indexed/(vector_of_variant, 3, inner)/8 864 ns 864 ns 814569 -Indexed/(vector_of_variant, 3, inner)/16 6233 ns 6233 ns 112591 -Indexed/(vector_of_variant, 3, inner)/32 76552 ns 76549 ns 9240 -Indexed/(vector_of_variant, 3, inner)/64 563781 ns 563764 ns 1238 -Indexed/(vector_of_variant, 3, inner)/128 4441317 ns 4441217 ns 160 +{ + "context": { + "date": "2019-05-09 19:20:27", + "executable": "./benchmark/histogram_iteration", + "num_cpus": 4, + "mhz_per_cpu": 3000, + "cpu_scaling_enabled": true, + "caches": [ + { + "type": "Data", + "level": 1, + "size": 32000000, + "num_sharing": 2 + }, + { + "type": "Instruction", + "level": 1, + "size": 32000000, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 2, + "size": 256000000, + "num_sharing": 2 + }, + { + "type": "Unified", + "level": 3, + "size": 4096000000, + "num_sharing": 4 + } + ], + "library_build_type": "release" + }, + "benchmarks": [ + { + "name": "Naive/(tuple, 1, inner)/2", + "iterations": 206731020, + "real_time": 3.3683881934817235e+00, + "cpu_time": 3.3679911268275071e+00, + "time_unit": "ns" + }, + { + "name": "Naive/(tuple, 1, inner)/4", + "iterations": 110869776, + "real_time": 6.1177169240587093e+00, + "cpu_time": 6.1172973236637551e+00, + "time_unit": "ns" + }, + { + "name": "Naive/(tuple, 1, inner)/16", + "iterations": 29711808, + "real_time": 2.3479127625078444e+01, + "cpu_time": 2.3477822520931749e+01, + "time_unit": "ns" + }, + { + "name": "Naive/(tuple, 1, inner)/64", + "iterations": 6609753, + "real_time": 1.0470953377551155e+02, + "cpu_time": 1.0470407109009972e+02, + "time_unit": "ns" + }, + { + "name": "Naive/(tuple, 1, inner)/256", + "iterations": 1751513, + "real_time": 3.9532748029827735e+02, + "cpu_time": 3.9529119053070104e+02, + "time_unit": "ns" + }, + { + "name": "Indexed/(tuple, 1, inner)/2", + "iterations": 257875730, + "real_time": 2.7052694800181820e+00, + "cpu_time": 2.7046212026234508e+00, + "time_unit": "ns" + }, + { + "name": "Indexed/(tuple, 1, inner)/4", + "iterations": 158638840, + "real_time": 4.4245859526037705e+00, + "cpu_time": 4.4229814401063443e+00, + "time_unit": "ns" + }, + { + "name": "Indexed/(tuple, 1, inner)/16", + "iterations": 34057734, + "real_time": 2.0140500539427315e+01, + "cpu_time": 2.0132456610295907e+01, + "time_unit": "ns" + }, + { + "name": "Indexed/(tuple, 1, inner)/64", + "iterations": 6310477, + "real_time": 1.1157260140579641e+02, + "cpu_time": 1.1153754747858203e+02, + "time_unit": "ns" + }, + { + "name": "Indexed/(tuple, 1, inner)/256", + "iterations": 1351151, + "real_time": 5.1810916544417455e+02, + "cpu_time": 5.1787196471748859e+02, + "time_unit": "ns" + }, + { + "name": "Naive/(vector, 1, inner)/2", + "iterations": 131255622, + "real_time": 4.7881068210572124e+00, + "cpu_time": 4.7875653966273646e+00, + "time_unit": "ns" + }, + { + "name": "Naive/(vector, 1, inner)/4", + "iterations": 77087015, + "real_time": 9.4653836706091905e+00, + "cpu_time": 9.4610575724069754e+00, + "time_unit": "ns" + }, + { + "name": "Naive/(vector, 1, inner)/16", + "iterations": 19668295, + "real_time": 3.3413869427824523e+01, + "cpu_time": 3.3412764858367233e+01, + "time_unit": "ns" + }, + { + "name": "Naive/(vector, 1, inner)/64", + "iterations": 4945372, + "real_time": 1.4664591015633584e+02, + "cpu_time": 1.4658447210846816e+02, + "time_unit": "ns" + }, + { + "name": "Naive/(vector, 1, inner)/256", + "iterations": 1217207, + "real_time": 5.5767214697086843e+02, + "cpu_time": 5.5744531620340626e+02, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector, 1, inner)/2", + "iterations": 81441944, + "real_time": 8.3857499521232413e+00, + "cpu_time": 8.3840116954968522e+00, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector, 1, inner)/4", + "iterations": 63037624, + "real_time": 1.1193144795626990e+01, + "cpu_time": 1.1186970244944508e+01, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector, 1, inner)/16", + "iterations": 24552017, + "real_time": 2.8861548523750898e+01, + "cpu_time": 2.8853131333364683e+01, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector, 1, inner)/64", + "iterations": 4602537, + "real_time": 1.5045527064663489e+02, + "cpu_time": 1.5043488384775640e+02, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector, 1, inner)/256", + "iterations": 1215923, + "real_time": 5.5660174123074376e+02, + "cpu_time": 5.5653807190093573e+02, + "time_unit": "ns" + }, + { + "name": "Naive/(vector_of_variant, 1, inner)/2", + "iterations": 69285423, + "real_time": 9.3240263829505370e+00, + "cpu_time": 9.3235503086990885e+00, + "time_unit": "ns" + }, + { + "name": "Naive/(vector_of_variant, 1, inner)/4", + "iterations": 37840489, + "real_time": 1.8756742625701495e+01, + "cpu_time": 1.8752293132364120e+01, + "time_unit": "ns" + }, + { + "name": "Naive/(vector_of_variant, 1, inner)/16", + "iterations": 9131461, + "real_time": 7.3190741657208235e+01, + "cpu_time": 7.3187546658743685e+01, + "time_unit": "ns" + }, + { + "name": "Naive/(vector_of_variant, 1, inner)/64", + "iterations": 2310857, + "real_time": 3.0190301217256041e+02, + "cpu_time": 3.0187253300399050e+02, + "time_unit": "ns" + }, + { + "name": "Naive/(vector_of_variant, 1, inner)/256", + "iterations": 591166, + "real_time": 1.1632894161693268e+03, + "cpu_time": 1.1632501378631366e+03, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector_of_variant, 1, inner)/2", + "iterations": 92192765, + "real_time": 7.3654489047653451e+00, + "cpu_time": 7.3650777802357892e+00, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector_of_variant, 1, inner)/4", + "iterations": 73084019, + "real_time": 9.4376963587645992e+00, + "cpu_time": 9.4373806809940284e+00, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector_of_variant, 1, inner)/16", + "iterations": 26473700, + "real_time": 2.6664344575716040e+01, + "cpu_time": 2.6661095389008782e+01, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector_of_variant, 1, inner)/64", + "iterations": 4571385, + "real_time": 1.5056884992138143e+02, + "cpu_time": 1.5055476141256980e+02, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector_of_variant, 1, inner)/256", + "iterations": 1217571, + "real_time": 5.5584271718022501e+02, + "cpu_time": 5.5581865123265800e+02, + "time_unit": "ns" + }, + { + "name": "Naive/(tuple, 2, inner)/2", + "iterations": 49462513, + "real_time": 1.4079908637056153e+01, + "cpu_time": 1.4079583926518207e+01, + "time_unit": "ns" + }, + { + "name": "Naive/(tuple, 2, inner)/4", + "iterations": 11875006, + "real_time": 5.5912055623566594e+01, + "cpu_time": 5.5911381013196973e+01, + "time_unit": "ns" + }, + { + "name": "Naive/(tuple, 2, inner)/16", + "iterations": 751019, + "real_time": 9.1227254969594492e+02, + "cpu_time": 9.1223847998519318e+02, + "time_unit": "ns" + }, + { + "name": "Naive/(tuple, 2, inner)/64", + "iterations": 47055, + "real_time": 1.5028270555663295e+04, + "cpu_time": 1.5024706662416380e+04, + "time_unit": "ns" + }, + { + "name": "Naive/(tuple, 2, inner)/256", + "iterations": 2799, + "real_time": 2.4269341693392285e+05, + "cpu_time": 2.4266524508753093e+05, + "time_unit": "ns" + }, + { + "name": "Indexed/(tuple, 2, inner)/2", + "iterations": 77370124, + "real_time": 8.6837826057658063e+00, + "cpu_time": 8.6834535511407491e+00, + "time_unit": "ns" + }, + { + "name": "Indexed/(tuple, 2, inner)/4", + "iterations": 25078764, + "real_time": 2.7915058493342677e+01, + "cpu_time": 2.7913483216318031e+01, + "time_unit": "ns" + }, + { + "name": "Indexed/(tuple, 2, inner)/16", + "iterations": 1630537, + "real_time": 4.2734812089440675e+02, + "cpu_time": 4.2732770492175462e+02, + "time_unit": "ns" + }, + { + "name": "Indexed/(tuple, 2, inner)/64", + "iterations": 76847, + "real_time": 9.0104866032538503e+03, + "cpu_time": 9.0101389514229486e+03, + "time_unit": "ns" + }, + { + "name": "Indexed/(tuple, 2, inner)/256", + "iterations": 5060, + "real_time": 1.3753572826036997e+05, + "cpu_time": 1.3752358952569167e+05, + "time_unit": "ns" + }, + { + "name": "Naive/(vector, 2, inner)/2", + "iterations": 38162873, + "real_time": 1.9294241631210902e+01, + "cpu_time": 1.9292749185838318e+01, + "time_unit": "ns" + }, + { + "name": "Naive/(vector, 2, inner)/4", + "iterations": 10112390, + "real_time": 6.9086567369670036e+01, + "cpu_time": 6.9083721949015214e+01, + "time_unit": "ns" + }, + { + "name": "Naive/(vector, 2, inner)/16", + "iterations": 680760, + "real_time": 1.0135387478716416e+03, + "cpu_time": 1.0134921235090197e+03, + "time_unit": "ns" + }, + { + "name": "Naive/(vector, 2, inner)/64", + "iterations": 42671, + "real_time": 1.6325137072055433e+04, + "cpu_time": 1.6324346910079437e+04, + "time_unit": "ns" + }, + { + "name": "Naive/(vector, 2, inner)/256", + "iterations": 2569, + "real_time": 2.7181347722760245e+05, + "cpu_time": 2.7179566407162294e+05, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector, 2, inner)/2", + "iterations": 41678346, + "real_time": 1.6726858498559142e+01, + "cpu_time": 1.6724971091703008e+01, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector, 2, inner)/4", + "iterations": 15728070, + "real_time": 4.4268851740772050e+01, + "cpu_time": 4.4267395681733163e+01, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector, 2, inner)/16", + "iterations": 1531865, + "real_time": 4.5714002343739037e+02, + "cpu_time": 4.5710810547926553e+02, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector, 2, inner)/64", + "iterations": 73053, + "real_time": 9.2542523236943744e+03, + "cpu_time": 9.2536710059819998e+03, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector, 2, inner)/256", + "iterations": 4994, + "real_time": 1.3891980016153606e+05, + "cpu_time": 1.3891205326391666e+05, + "time_unit": "ns" + }, + { + "name": "Naive/(vector_of_variant, 2, inner)/2", + "iterations": 20104745, + "real_time": 3.4506676309610846e+01, + "cpu_time": 3.4504440369673965e+01, + "time_unit": "ns" + }, + { + "name": "Naive/(vector_of_variant, 2, inner)/4", + "iterations": 5334056, + "real_time": 1.2953831943251666e+02, + "cpu_time": 1.2953181856358586e+02, + "time_unit": "ns" + }, + { + "name": "Naive/(vector_of_variant, 2, inner)/16", + "iterations": 337504, + "real_time": 1.9932967253765871e+03, + "cpu_time": 1.9932290906181800e+03, + "time_unit": "ns" + }, + { + "name": "Naive/(vector_of_variant, 2, inner)/64", + "iterations": 21759, + "real_time": 3.2318591479248385e+04, + "cpu_time": 3.2316641251895864e+04, + "time_unit": "ns" + }, + { + "name": "Naive/(vector_of_variant, 2, inner)/256", + "iterations": 1271, + "real_time": 5.2014565617777588e+05, + "cpu_time": 5.2012665696302510e+05, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector_of_variant, 2, inner)/2", + "iterations": 42461766, + "real_time": 1.6106938510307256e+01, + "cpu_time": 1.6106278834469553e+01, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector_of_variant, 2, inner)/4", + "iterations": 21033073, + "real_time": 3.3283205882599404e+01, + "cpu_time": 3.3281275066177784e+01, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector_of_variant, 2, inner)/16", + "iterations": 1881121, + "real_time": 3.7411198375871635e+02, + "cpu_time": 3.7408103465965212e+02, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector_of_variant, 2, inner)/64", + "iterations": 70381, + "real_time": 9.9003461587726615e+03, + "cpu_time": 9.8997365766329312e+03, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector_of_variant, 2, inner)/256", + "iterations": 4584, + "real_time": 1.5204522185940639e+05, + "cpu_time": 1.5204021444153567e+05, + "time_unit": "ns" + }, + { + "name": "Naive/(tuple, 3, inner)/2", + "iterations": 16067499, + "real_time": 4.3624799292245591e+01, + "cpu_time": 4.3622996584596294e+01, + "time_unit": "ns" + }, + { + "name": "Naive/(tuple, 3, inner)/4", + "iterations": 1768238, + "real_time": 3.9595480755533742e+02, + "cpu_time": 3.9593423792498453e+02, + "time_unit": "ns" + }, + { + "name": "Naive/(tuple, 3, inner)/16", + "iterations": 25259, + "real_time": 2.6991563403072916e+04, + "cpu_time": 2.6990050833366444e+04, + "time_unit": "ns" + }, + { + "name": "Naive/(tuple, 3, inner)/64", + "iterations": 204, + "real_time": 3.5919159803883051e+06, + "cpu_time": 3.5914769558823537e+06, + "time_unit": "ns" + }, + { + "name": "Naive/(tuple, 3, inner)/256", + "iterations": 1, + "real_time": 1.0654720300008193e+09, + "cpu_time": 1.0653928119999989e+09, + "time_unit": "ns" + }, + { + "name": "Indexed/(tuple, 3, inner)/2", + "iterations": 26814584, + "real_time": 2.6028973785268370e+01, + "cpu_time": 2.6027282168539248e+01, + "time_unit": "ns" + }, + { + "name": "Indexed/(tuple, 3, inner)/4", + "iterations": 4883130, + "real_time": 1.4305504829999745e+02, + "cpu_time": 1.4304884060018975e+02, + "time_unit": "ns" + }, + { + "name": "Indexed/(tuple, 3, inner)/16", + "iterations": 94913, + "real_time": 7.0721960006083600e+03, + "cpu_time": 7.0717863411756216e+03, + "time_unit": "ns" + }, + { + "name": "Indexed/(tuple, 3, inner)/64", + "iterations": 1179, + "real_time": 5.8209385241983552e+05, + "cpu_time": 5.8206476929601643e+05, + "time_unit": "ns" + }, + { + "name": "Indexed/(tuple, 3, inner)/256", + "iterations": 20, + "real_time": 3.5935903449717447e+07, + "cpu_time": 3.5934060049999774e+07, + "time_unit": "ns" + }, + { + "name": "Naive/(vector, 3, inner)/2", + "iterations": 13678916, + "real_time": 4.8195136150091649e+01, + "cpu_time": 4.8192346454939027e+01, + "time_unit": "ns" + }, + { + "name": "Naive/(vector, 3, inner)/4", + "iterations": 1937463, + "real_time": 3.5476346644732018e+02, + "cpu_time": 3.5474442660324530e+02, + "time_unit": "ns" + }, + { + "name": "Naive/(vector, 3, inner)/16", + "iterations": 31201, + "real_time": 2.2349923816471412e+04, + "cpu_time": 2.2348826864523620e+04, + "time_unit": "ns" + }, + { + "name": "Naive/(vector, 3, inner)/64", + "iterations": 355, + "real_time": 1.9842437154885617e+06, + "cpu_time": 1.9840622591549244e+06, + "time_unit": "ns" + }, + { + "name": "Naive/(vector, 3, inner)/256", + "iterations": 2, + "real_time": 4.6312754050086367e+08, + "cpu_time": 4.6309484399999690e+08, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector, 3, inner)/2", + "iterations": 21016376, + "real_time": 3.3452103921514173e+01, + "cpu_time": 3.3450528435540441e+01, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector, 3, inner)/4", + "iterations": 4537194, + "real_time": 1.5552969280062862e+02, + "cpu_time": 1.5551612428298361e+02, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector, 3, inner)/16", + "iterations": 93733, + "real_time": 7.2719495481427412e+03, + "cpu_time": 7.2716927976272091e+03, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector, 3, inner)/64", + "iterations": 1151, + "real_time": 5.9254785490932909e+05, + "cpu_time": 5.9250809470026579e+05, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector, 3, inner)/256", + "iterations": 19, + "real_time": 3.5903861368608072e+07, + "cpu_time": 3.5902786000000060e+07, + "time_unit": "ns" + }, + { + "name": "Naive/(vector_of_variant, 3, inner)/2", + "iterations": 7489153, + "real_time": 9.1880777438608661e+01, + "cpu_time": 9.1875263063793767e+01, + "time_unit": "ns" + }, + { + "name": "Naive/(vector_of_variant, 3, inner)/4", + "iterations": 982358, + "real_time": 6.8480715075247087e+02, + "cpu_time": 6.8475983602720146e+02, + "time_unit": "ns" + }, + { + "name": "Naive/(vector_of_variant, 3, inner)/16", + "iterations": 15612, + "real_time": 4.4376148411612274e+04, + "cpu_time": 4.4373882205995513e+04, + "time_unit": "ns" + }, + { + "name": "Naive/(vector_of_variant, 3, inner)/64", + "iterations": 214, + "real_time": 3.2605690467220335e+06, + "cpu_time": 3.2604443130841018e+06, + "time_unit": "ns" + }, + { + "name": "Naive/(vector_of_variant, 3, inner)/256", + "iterations": 1, + "real_time": 6.7460591900453436e+08, + "cpu_time": 6.7452916799999225e+08, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector_of_variant, 3, inner)/2", + "iterations": 22526633, + "real_time": 3.0708660633189826e+01, + "cpu_time": 3.0706014032367921e+01, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector_of_variant, 3, inner)/4", + "iterations": 5472366, + "real_time": 1.2638860723142412e+02, + "cpu_time": 1.2638097579730733e+02, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector_of_variant, 3, inner)/16", + "iterations": 113927, + "real_time": 5.9392282514248682e+03, + "cpu_time": 5.9388695743765775e+03, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector_of_variant, 3, inner)/64", + "iterations": 1095, + "real_time": 6.3374649588981876e+05, + "cpu_time": 6.3370015890411206e+05, + "time_unit": "ns" + }, + { + "name": "Indexed/(vector_of_variant, 3, inner)/256", + "iterations": 18, + "real_time": 3.9354077277999349e+07, + "cpu_time": 3.9349510444443874e+07, + "time_unit": "ns" + } + ] +} diff --git a/doc/iteration_performance.py b/doc/iteration_performance.py index 11c440b0..e7038280 100644 --- a/doc/iteration_performance.py +++ b/doc/iteration_performance.py @@ -9,17 +9,17 @@ import matplotlib as mpl mpl.rcParams.update(mpl.rcParamsDefault) bench = defaultdict(lambda:[]) -for iline, line in enumerate(open(sys.argv[1])): - if iline < 3: - continue +data = json.load(open(sys.argv[1])) + +for benchmark in data["benchmarks"]: # Naive/(tuple, 3, inner)/4 3.44 ns - m = re.match("(\S+)/\((\S+), (\d), (\S+)\)/(\d+)\s*([0-9\.]+) ns", line) + m = re.match("(\S+)/\((\S+), (\d), (\S+)\)/(\d+)", benchmark["name"]) name = m.group(1) hist = m.group(2) dim = int(m.group(3)) cov = m.group(4) nbins = int(m.group(5)) - time = float(m.group(6)) + time = benchmark["cpu_time"] bench[(name, hist, dim, cov)].append((int(nbins) ** dim, time)) fig, ax = plt.subplots(1, 3, figsize=(10, 5), sharex=True, sharey=True) @@ -39,8 +39,8 @@ for iaxis, axis_type in enumerate(("tuple", "vector", "vector_of_variant")): v = np.sort(v, axis=0).T # if "semi_dynamic" in axis: continue name2, col, ls = { - "Naive": ("nested for", "r", "--"), - "Indexed": ("indexed", "b", ":")}.get(name, (name, "k", "-")) + "Naive": ("nested for", "0.5", ":"), + "Indexed": ("indexed", "r", "-")}.get(name, (name, "k", "-")) h = plt.plot(v[0], v[1] / v[0], color=col, ls=ls, lw=dim, label=r"%s: $D=%i$" % (name2, dim))[0] handles.append(h) @@ -50,5 +50,4 @@ plt.sca(ax[0]) plt.ylabel("CPU time in ns per bin") plt.legend(handles=handles, fontsize="x-small", frameon=False, handlelength=4, ncol=2) plt.figtext(0.5, 0.05, "number of bins", ha="center") -plt.savefig("iteration_performance.svg") plt.show() diff --git a/doc/iteration_performance.svg b/doc/iteration_performance.svg index 6c132724..9bcb7168 100644 --- a/doc/iteration_performance.svg +++ b/doc/iteration_performance.svg @@ -32,10 +32,10 @@ z +" id="m6eda4d4cc4" style="stroke:#000000;stroke-width:0.8;"/> - + @@ -76,7 +76,7 @@ Q 19.53125 74.21875 31.78125 74.21875 z " id="DejaVuSans-48"/> - + @@ -86,51 +86,10 @@ z - + - - - - - - - - - - - - - - - - - - - + - - + + - + - - - - - - - - - - - - - - - - - - - + - + - - + + - + - - + + - +" id="DejaVuSansid="m97f4f6a214" style="stroke:#000000;stroke-width:0.8;"/> - + - + - + @@ -673,14 +222,14 @@ L -3.5 0 - + - + - + - + @@ -688,130 +237,116 @@ L -3.5 0 - + +" id="m41b55c5ae5" style="stroke:#000000;stroke-width:0.6;"/> - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - - - - - - - - - - - - - - - + - + + - - + + - - + + - - + + - - + + - - + + - + - - + + - - + + - + @@ -1320,15 +849,41 @@ z - - + + - - + + - + + + + @@ -1343,15 +898,15 @@ L 100.852 73.276934 - - + + - - + + - + @@ -1366,13 +921,13 @@ L 100.852 83.463553 - - + + - - + + - + @@ -1435,15 +990,15 @@ z - - + + - - + + - + @@ -1461,15 +1016,15 @@ L 198.706 73.276934 - - + + - - + + - + @@ -1499,635 +1054,235 @@ z " style="fill:#ffffffz - + - + - + diff --git a/include/boost/histogram/indexed.hpp b/include/boost/histogram/indexed.hpp index 28197e07..100a6973 100644 --- a/include/boost/histogram/indexed.hpp +++ b/include/boost/histogram/indexed.hpp @@ -19,8 +19,7 @@ namespace boost { namespace histogram { -/** - Coverage mode of the indexed range generator. +/** Coverage mode of the indexed range generator. Defines options for the iteration strategy. */ @@ -29,50 +28,66 @@ enum class coverage { all, /*!< iterate over all bins, including underflow and overflow */ }; -/** Forward range over histogram bins with multi-dimensional index. +/** Input iterator range over histogram bins with multi-dimensional index. The iterator returned by begin() can only be incremented. begin() may only be called - once, calling it a second time returns the end() iterator. The iterator provided by - end() must not be incremented. + once, calling it a second time returns the end() iterator. If several copies of the + input iterators exist, the other copies become invalid if one of them is incremented. */ template class BOOST_HISTOGRAM_NODISCARD indexed_range { private: - using max_dim = mp11::mp_size_t< - detail::buffer_size::axes_type>::value>; - struct cache_item { - axis::index_type idx, begin, end, extent; - }; + using histogram_type = Histogram; + static constexpr std::size_t buffer_size = detail::buffer_size< + typename detail::remove_cvref_t::axes_type>::value; public: using value_iterator = decltype(std::declval().begin()); using value_reference = typename value_iterator::reference; class range_iterator; - /** - Pointer-like class to access value and index of current cell. +private: + struct state_type { + struct index_data { + axis::index_type idx, begin, end, extent; + }; + + state_type(histogram_type& h) : hist_(h) {} + + histogram_type& hist_; + index_data indices_[buffer_size]; + }; + +public: + class detached_accessor; // forward declaration + + /** Pointer-like class to access value and index of current cell. Its methods allow one to query the current indices and bins. Furthermore, it acts - like a pointer to the cell value. + like a pointer to the cell value. The accessor is coupled to the current + range_iterator. Moving the range_iterator forward invalidates the accessor. Use the + detached_accessor class if you must store accessors for later use, but be aware + that a detached_accessor has a state many times larger than a pointer. */ class accessor { public: /// Array-like view into the current multi-dimensional index. class index_view { + using index_pointer = const typename state_type::index_data*; + public: + using reference = const axis::index_type&; + /// implementation detail class const_iterator - : public detail::iterator_adaptor { + : public detail::iterator_adaptor { public: - const axis::index_type& operator*() const noexcept { - return const_iterator::base()->idx; - } + reference operator*() const noexcept { return const_iterator::base()->idx; } private: - explicit const_iterator(const cache_item* i) noexcept + explicit const_iterator(index_pointer i) noexcept : const_iterator::iterator_adaptor_(i) {} - friend class accessor; + friend class index_view; }; const_iterator begin() const noexcept { return const_iterator{begin_}; } @@ -80,128 +95,146 @@ public: std::size_t size() const noexcept { return static_cast(end_ - begin_); } - axis::index_type operator[](unsigned d) const noexcept { return begin_[d].idx; } - axis::index_type at(unsigned d) const { return begin_[d].idx; } + reference operator[](unsigned d) const noexcept { return begin_[d].idx; } + reference at(unsigned d) const { return begin_[d].idx; } private: /// implementation detail - index_view(const cache_item* b, const cache_item* e) : begin_(b), end_(e) {} + index_view(index_pointer b, index_pointer e) : begin_(b), end_(e) {} - const cache_item *begin_, *end_; + index_pointer begin_, end_; friend class accessor; }; /// Returns the cell reference. - value_reference get() const noexcept { return *(iter_.iter_); } + value_reference get() const noexcept { return *iter_; } /// @copydoc get() value_reference operator*() const noexcept { return get(); } /// Access fields and methods of the cell object. - value_iterator operator->() const noexcept { return iter_.iter_; } + value_iterator operator->() const noexcept { return iter_; } /// Access current index. /// @param d axis dimension. axis::index_type index(unsigned d = 0) const noexcept { - return iter_.parent_->cache_[d].idx; + return state_.indices_[d].idx; } /// Access indices as an iterable range. index_view indices() const noexcept { - return {iter_.parent_->cache_, iter_.parent_->cache_ + iter_.parent_->hist_.rank()}; + return {state_.indices_, state_.indices_ + state_.hist_.rank()}; } /// Access current bin. /// @tparam N axis dimension. template decltype(auto) bin(std::integral_constant = {}) const { - return iter_.parent_->hist_.axis(std::integral_constant()) - .bin(index(N)); + return state_.hist_.axis(std::integral_constant()).bin(index(N)); } /// Access current bin. /// @param d axis dimension. - decltype(auto) bin(unsigned d) const { - return iter_.parent_->hist_.axis(d).bin(index(d)); - } + decltype(auto) bin(unsigned d) const { return state_.hist_.axis(d).bin(index(d)); } - /** - Computes density in current cell. + /** Computes density in current cell. The density is computed as the cell value divided by the product of bin widths. Axes without bin widths, like axis::category, are treated as having unit bin with. */ double density() const { double x = 1; - auto it = iter_.parent_->cache_; - iter_.parent_->hist_.for_each_axis([&](const auto& a) { - const auto w = axis::traits::width_as(a, it++->idx); + unsigned d = 0; + state_.hist_.for_each_axis([&](const auto& a) { + const auto w = axis::traits::width_as(a, this->index(d++)); x *= w ? w : 1; }); return get() / x; } - private: - /// implementation detail - accessor(range_iterator i) : iter_(i) {} + protected: + accessor(state_type& s, value_iterator i) noexcept : state_(s), iter_(i) {} - range_iterator iter_; + state_type& state_; + value_iterator iter_; friend class range_iterator; + friend class detached_accessor; + }; + + /// Accessor that owns a copy of the iterator state. + class detached_accessor : public accessor { + public: + detached_accessor(const accessor& x) : accessor(state_, x.iter_), state_(x.state_) {} + detached_accessor(const detached_accessor& x) + : detached_accessor(static_cast(x)) {} + detached_accessor& operator=(const detached_accessor& x) { + state_ = x.state_; + accessor::iter_ = x.iter_; + return *this; + } + + private: + state_type state_; }; /// implementation detail class range_iterator { - using detail_arrow_t = detail::operator_arrow_dispatch_t; - public: - using value_type = typename value_iterator::value_type; - using reference = accessor; - using pointer = typename detail_arrow_t::result_type; + using value_type = accessor; + using reference = accessor&; + using pointer = accessor*; using difference_type = void; using iterator_category = std::input_iterator_tag; - accessor operator*() const noexcept { return {*this}; } - pointer operator->() const noexcept { return detail_arrow_t::apply(operator*()); } + private: + struct value_proxy { + detached_accessor operator*() { return ref; } + detached_accessor ref; + }; + + public: + reference operator*() noexcept { return value_; } + pointer operator->() noexcept { return &value_; } range_iterator& operator++() { std::size_t stride = 1; - BOOST_ASSERT(parent_); - auto c = parent_->cache_; + auto c = value_.state_.indices_; ++c->idx; - ++iter_; - while (c->idx == c->end && (c != (parent_->cache_ + parent_->hist_.rank() - 1))) { + ++value_.iter_; + while (c->idx == c->end && + (c != (value_.state_.indices_ + value_.state_.hist_.rank() - 1))) { c->idx = c->begin; - iter_ -= (c->end - c->begin) * stride; + value_.iter_ -= (c->end - c->begin) * stride; stride *= c->extent; ++c; ++c->idx; - iter_ += stride; + value_.iter_ += stride; } return *this; } - range_iterator operator++(int) { - auto tmp = *this; + value_proxy operator++(int) { + value_proxy x{value_}; operator++(); - return tmp; + return x; } - bool operator==(const range_iterator& x) const noexcept { return iter_ == x.iter_; } + bool operator==(const range_iterator& x) const noexcept { + return value_.iter_ == x.value_.iter_; + } bool operator!=(const range_iterator& x) const noexcept { return !operator==(x); } private: - range_iterator(indexed_range* p, value_iterator i) noexcept : parent_(p), iter_(i) {} - - mutable indexed_range* parent_; - value_iterator iter_; + range_iterator(state_type& s, value_iterator i) noexcept : value_(s, i) {} + accessor value_; friend class indexed_range; }; indexed_range(Histogram& hist, coverage cov) - : hist_(hist), begin_(hist_.begin()), end_(begin_) { - auto ca = cache_; - const auto clast = ca + hist_.rank() - 1; + : state_(hist), begin_(hist.begin()), end_(begin_) { std::size_t stride = 1; - hist_.for_each_axis([ca, clast, cov, &stride, this](const auto& a) mutable { + auto ca = state_.indices_; + const auto clast = ca + state_.hist_.rank() - 1; + state_.hist_.for_each_axis([ca, clast, cov, &stride, this](const auto& a) mutable { using opt = axis::traits::static_options; constexpr int under = opt::test(axis::option::underflow); constexpr int over = opt::test(axis::option::overflow); @@ -215,10 +248,7 @@ public: ca->idx = ca->begin; begin_ += (ca->begin + under) * stride; - if (ca < clast) - end_ += (ca->begin + under) * stride; - else - end_ += (ca->end + under) * stride; + end_ += ((ca < clast ? ca->begin : ca->end) + under) * stride; stride *= ca->extent; ++ca; @@ -228,18 +258,16 @@ public: range_iterator begin() noexcept { auto begin = begin_; begin_ = end_; - return begin == end_ ? end() : range_iterator{this, begin}; + return {state_, begin}; } - range_iterator end() noexcept { return {nullptr, end_}; } + range_iterator end() noexcept { return {state_, end_}; } private: - Histogram& hist_; + state_type state_; value_iterator begin_, end_; - mutable cache_item cache_[max_dim::value]; }; -/** - Generates a range over the histogram entries. +/** Generates an indexed iterator range over the histogram cells. Use this in a range-based for loop: @@ -247,11 +275,14 @@ private: for (auto x : indexed(hist)) { ... } ``` - The iterators dereference to an indexed_range::accessor, which has methods to query the + This highly optimized loop is at least comparable in speed to a hand-written loop over + the histogram cells and often much faster, depending on the histogram configuration. The + iterators dereference to an indexed_range::accessor, which has methods to query the current indices and bins and acts like a pointer to the cell value. Accessors, like - pointers, are cheap to copy but get invalidated when the forward range - iterator is incremented. For example, one cannot store accessors in a container for - later use. + pointers, are cheap to copy but get invalidated when the range iterator is incremented. + Likewise, any copies of a range iterator become invalid if one of them is incremented. + A indexed_range::detached_accessor can be stored for later use, but manually copying the + data of interest from the accessor is usually more efficient. @returns indexed_range diff --git a/test/indexed_test.cpp b/test/indexed_test.cpp index dd6502e4..13890f46 100644 --- a/test/indexed_test.cpp +++ b/test/indexed_test.cpp @@ -35,6 +35,7 @@ void run_1d_tests(mp_list) { // calling begin second time yields end iterator but does not confuse the original range BOOST_TEST(ind.begin() == ind.end()); BOOST_TEST_EQ(it->indices().size(), 1); + BOOST_TEST_EQ(it->indices()[0], Coverage() == coverage::all ? -1 : 0); if (Coverage() == coverage::all) { BOOST_TEST_EQ(it->index(0), -1); @@ -50,10 +51,11 @@ void run_1d_tests(mp_list) { BOOST_TEST_EQ(**it, 3); BOOST_TEST_EQ(it->bin(0), h.axis().bin(1)); ++it; - BOOST_TEST_EQ(it->index(0), 2); - BOOST_TEST_EQ(**it, 4); - BOOST_TEST_EQ(it->bin(0), h.axis().bin(2)); - ++it; + // check post-increment + auto prev = *it++; + BOOST_TEST_EQ(prev.index(0), 2); + BOOST_TEST_EQ(*prev, 4); + BOOST_TEST_EQ(prev.bin(0), h.axis().bin(2)); if (Coverage() == coverage::all) { BOOST_TEST_EQ(it->index(0), 3); BOOST_TEST_EQ(**it, 5);