Skip to content

Instantly share code, notes, and snippets.

@brson
Last active January 8, 2019 22:36
Show Gist options
  • Save brson/13586d9f12f3af5c8377628c3d0f12d0 to your computer and use it in GitHub Desktop.
Save brson/13586d9f12f3af5c8377628c3d0f12d0 to your computer and use it in GitHub Desktop.
TiKV rust upgrade to nightly-2012-12-06 benchmarks
+ cargo benchcmp misc-before.txt misc-after.txt
name misc-before.txt ns/iter misc-after.txt ns/iter diff ns/iter diff % speedup
_bench_check_requirement 0 0 0 NaN% x NaN
channel::bench_channel::bench_crossbeam_channel 39 39 0 0.00% x 1.00
channel::bench_channel::bench_mio_channel 155 148 -7 -4.52% x 1.05
channel::bench_channel::bench_receiver_stream 1,858,467 1,869,328 10,861 0.58% x 0.99
channel::bench_channel::bench_receiver_stream_batch 1,103,490 1,103,264 -226 -0.02% x 1.00
channel::bench_channel::bench_thread_channel 28 27 -1 -3.57% x 1.04
channel::bench_channel::bench_util_channel 40 39 -1 -2.50% x 1.03
channel::bench_channel::bench_util_loose 85 89 4 4.71% x 0.96
coprocessor::codec::bench_record_prefix_bigendian_check 940 941 1 0.11% x 1.00
coprocessor::codec::bench_record_prefix_equal_check 1,100 1,101 1 0.09% x 1.00
coprocessor::codec::bench_record_prefix_littleendian_check 817 818 1 0.12% x 1.00
coprocessor::codec::bench_record_prefix_start_with 817 826 9 1.10% x 0.99
coprocessor::codec::bench_table_prefix_check 816 818 2 0.25% x 1.00
coprocessor::codec::bench_table_prefix_start_with 816 819 3 0.37% x 1.00
coprocessor::codec::chunk::bench_chunk_build_offical 63,145 62,897 -248 -0.39% x 1.00
coprocessor::codec::chunk::bench_chunk_build_tidb 29,878 28,900 -978 -3.27% x 1.03
coprocessor::codec::chunk::bench_chunk_iter_offical 33,841 37,339 3,498 10.34% x 0.91
coprocessor::codec::chunk::bench_chunk_iter_tidb 38,088 38,639 551 1.45% x 0.99
coprocessor::codec::chunk::bench_encode_chunk 18,284 21,912 3,628 19.84% x 0.83
coprocessor::dag::expr::scalar::bench_get_scalar_args_with_map 3,387 3,361 -26 -0.77% x 1.01
coprocessor::dag::expr::scalar::bench_get_scalar_args_with_match 1,137 1,133 -4 -0.35% x 1.00
raftkv::bench_async_snapshot 1,945 1,888 -57 -2.93% x 1.03
raftkv::bench_async_snapshots_noop 164 177 13 7.93% x 0.93
raftkv::bench_async_write 1,257 1,228 -29 -2.31% x 1.02
serialization::bench_serialization::bench_decode_one 494 474 -20 -4.05% x 1.04
serialization::bench_serialization::bench_decode_two 879 864 -15 -1.71% x 1.02
serialization::bench_serialization::bench_encode_one 583 580 -3 -0.51% x 1.01
serialization::bench_serialization::bench_encode_two 970 1,010 40 4.12% x 0.96
writebatch::bench_writebatch::bench_writebatch_1 39,876,213 36,526,210 -3,350,003 -8.40% x 1.09
writebatch::bench_writebatch::bench_writebatch_1024 14,261,664 13,616,758 -644,906 -4.52% x 1.05
writebatch::bench_writebatch::bench_writebatch_128 14,308,605 13,507,428 -801,177 -5.60% x 1.06
writebatch::bench_writebatch::bench_writebatch_16 16,245,834 15,099,810 -1,146,024 -7.05% x 1.08
writebatch::bench_writebatch::bench_writebatch_2 27,658,050 25,670,234 -1,987,816 -7.19% x 1.08
writebatch::bench_writebatch::bench_writebatch_256 13,912,289 12,997,538 -914,751 -6.58% x 1.07
writebatch::bench_writebatch::bench_writebatch_32 14,924,121 13,702,075 -1,222,046 -8.19% x 1.09
writebatch::bench_writebatch::bench_writebatch_4 21,888,783 20,360,655 -1,528,128 -6.98% x 1.08
writebatch::bench_writebatch::bench_writebatch_512 13,762,431 12,134,391 -1,628,040 -11.83% x 1.13
writebatch::bench_writebatch::bench_writebatch_64 14,229,843 12,149,712 -2,080,131 -14.62% x 1.17
writebatch::bench_writebatch::bench_writebatch_8 18,194,036 15,890,786 -2,303,250 -12.66% x 1.14
writebatch::bench_writebatch::bench_writebatch_with_capacity 10,942 5,934 -5,008 -45.77% x 1.84
writebatch::bench_writebatch::bench_writebatch_without_capacity 10,317 6,282 -4,035 -39.11% x 1.64
+ cargo benchcmp tikv-before.txt tikv-after.txt
name tikv-before.txt ns/iter tikv-after.txt ns/iter diff ns/iter diff % speedup
coprocessor::codec::batch::column::benches::bench_batch_decode 27,960 28,260 300 1.07% x 0.99
coprocessor::codec::batch::column::benches::bench_push_datum_int 20,659 20,143 -516 -2.50% x 1.03
coprocessor::codec::batch::column::benches::bench_retain 2,638 2,388 -250 -9.48% x 1.10
coprocessor::codec::batch::rows::benches::bench_lazy_batch_column_by_vec_clone 46,848 47,163 315 0.67% x 0.99
coprocessor::codec::batch::rows::benches::bench_lazy_batch_column_by_vec_push_raw_10bytes 53,429 53,680 251 0.47% x 1.00
coprocessor::codec::batch::rows::benches::bench_lazy_batch_column_clone 16,516 16,154 -362 -2.19% x 1.02
coprocessor::codec::batch::rows::benches::bench_lazy_batch_column_clone_10bytes 53,795 54,003 208 0.39% x 1.00
coprocessor::codec::batch::rows::benches::bench_lazy_batch_column_clone_and_decode 43,104 42,998 -106 -0.25% x 1.00
coprocessor::codec::batch::rows::benches::bench_lazy_batch_column_clone_and_decode_decoded 321 411 90 28.04% x 0.78
coprocessor::codec::batch::rows::benches::bench_lazy_batch_column_clone_decoded 312 411 99 31.73% x 0.76
coprocessor::codec::batch::rows::benches::bench_lazy_batch_column_clone_naive 81,452 79,672 -1,780 -2.19% x 1.02
coprocessor::codec::batch::rows::benches::bench_lazy_batch_column_push_raw_10bytes 55,041 55,829 788 1.43% x 0.99
coprocessor::codec::batch::rows::benches::bench_lazy_batch_column_push_raw_4bytes 17,837 18,226 389 2.18% x 0.98
coprocessor::codec::batch::rows::benches::bench_lazy_batch_column_push_raw_9bytes 17,837 18,202 365 2.05% x 0.98
raftstore::store::fsm::router::tests::bench_send 153 162 9 5.88% x 0.94
storage::txn::store::benches::bench_fixture_batch_get 11,884 9,438 -2,446 -20.58% x 1.26
storage::txn::store::benches::bench_fixture_get 92 104 12 13.04% x 0.88
storage::txn::store::benches::bench_fixture_scanner 448,679 479,242 30,563 6.81% x 0.94
storage::txn::store::benches::bench_fixture_scanner_next 435,325 433,957 -1,368 -0.31% x 1.00
storage::txn::store::benches::bench_fixture_scanner_scan 569,422 531,086 -38,336 -6.73% x 1.07
util::codec::bytes::tests::bench_decode 2,396 3,096 700 29.22% x 0.77
util::codec::bytes::tests::bench_decode_inplace 1,151 1,920 769 66.81% x 0.60
util::codec::bytes::tests::bench_decode_inplace_small 36 40 4 11.11% x 0.90
util::codec::bytes::tests::bench_decode_small 70 69 -1 -1.43% x 1.01
util::codec::bytes::tests::bench_encode 47 46 -1 -2.13% x 1.02
util::rocksdb::properties::tests::bench_mvcc_properties 2,189 1,999 -190 -8.68% x 1.10
@breezewish
Copy link

Bench result over efficient codec PR:

$ cargo benchcmp base.txt new_version.txt 
 name                                                                              base.txt ns/iter  new_version.txt ns/iter  diff ns/iter   diff %  speedup 
 byte::benches::bench_memcmp_decode_first_asc_large                                90                88                                 -2   -2.22%   x 1.02 
 byte::benches::bench_memcmp_decode_first_asc_large_original                       250               316                                66   26.40%   x 0.79 
 byte::benches::bench_memcmp_decode_first_desc_large                               114               110                                -4   -3.51%   x 1.04 
 byte::benches::bench_memcmp_decode_first_desc_large_original                      258               343                                85   32.95%   x 0.75 
 byte::benches::bench_memcmp_decode_first_in_place_asc_large                       121               168                                47   38.84%   x 0.72 
 byte::benches::bench_memcmp_decode_first_in_place_asc_large_original              166               234                                68   40.96%   x 0.71 
 byte::benches::bench_memcmp_decode_first_in_place_desc_large                      148               185                                37   25.00%   x 0.80 
 byte::benches::bench_memcmp_decode_first_in_place_desc_large_original             188               252                                64   34.04%   x 0.75 
 byte::benches::bench_memcmp_encode_all_asc_large                                  94                89                                 -5   -5.32%   x 1.06 
 byte::benches::bench_memcmp_encode_all_asc_large_naive                            179               160                               -19  -10.61%   x 1.12 
 byte::benches::bench_memcmp_encode_all_asc_large_original                         513               512                                -1   -0.19%   x 1.00 
 byte::benches::bench_memcmp_encode_all_asc_small                                  17                18                                  1    5.88%   x 0.94 
 byte::benches::bench_memcmp_encode_all_asc_small_naive                            26                23                                 -3  -11.54%   x 1.13 
 byte::benches::bench_memcmp_encode_all_desc_large                                 130               134                                 4    3.08%   x 0.97 
 byte::benches::bench_memcmp_encode_all_desc_large_original                        1,275             1,267                              -8   -0.63%   x 1.01 
 byte::benches::bench_memcmp_encode_all_desc_small                                 27                27                                  0    0.00%   x 1.00 
 number::benches::bench_decode_u64_le_buffer_decoder                               1                 1                                   0    0.00%   x 1.00 
 number::benches::bench_decode_u64_le_byteorder                                    2                 2                                   0    0.00%   x 1.00 
 number::benches::bench_decode_u64_le_bytes_buf                                    1                 1                                   0    0.00%   x 1.00 
 number::benches::bench_decode_u64_le_number_codec                                 1                 1                                   0    0.00%   x 1.00 
 number::benches::bench_decode_u64_le_number_codec_over_slice                      2                 2                                   0    0.00%   x 1.00 
 number::benches::bench_decode_u64_le_original                                     2                 2                                   0    0.00%   x 1.00 
 number::benches::bench_decode_varint_normal_number_codec_large_buffer             5                 6                                   1   20.00%   x 0.83 
 number::benches::bench_decode_varint_normal_number_codec_small_buffer             8                 8                                   0    0.00%   x 1.00 
 number::benches::bench_decode_varint_normal_number_codec_small_buffer_with_extra  8                 8                                   0    0.00%   x 1.00 
 number::benches::bench_decode_varint_normal_original_large_buffer                 8                 8                                   0    0.00%   x 1.00 
 number::benches::bench_decode_varint_normal_original_small_buffer                 8                 8                                   0    0.00%   x 1.00 
 number::benches::bench_decode_varint_normal_original_small_buffer_with_extra      8                 8                                   0    0.00%   x 1.00 
 number::benches::bench_decode_varint_small_number_codec_large_buffer              2                 3                                   1   50.00%   x 0.67 
 number::benches::bench_decode_varint_small_number_codec_small_buffer              3                 3                                   0    0.00%   x 1.00 
 number::benches::bench_decode_varint_small_original_small_buffer                  2                 2                                   0    0.00%   x 1.00 
 number::benches::bench_encode_u64_le_buffer_encoder_slice                         1                 1                                   0    0.00%   x 1.00 
 number::benches::bench_encode_u64_le_buffer_encoder_vec                           1                 2                                   1  100.00%   x 0.50 
 number::benches::bench_encode_u64_le_byteorder                                    8                 8                                   0    0.00%   x 1.00 
 number::benches::bench_encode_u64_le_number_codec                                 2                 2                                   0    0.00%   x 1.00 
 number::benches::bench_encode_varint_naive                                        9                 10                                  1   11.11%   x 0.90 
 number::benches::bench_encode_varint_number_codec                                 4                 5                                   1   25.00%   x 0.80 
 number::benches::bench_encode_varint_original                                     9                 10                                  1   11.11%   x 0.90 
 number::benches::bench_encode_varint_protobuf                                     46                88                                 42   91.30%   x 0.52 

With 2019-01-01:

$ cargo benchcmp base.txt new_version_2.txt
 name                                                                              base.txt ns/iter  new_version_2.txt ns/iter  diff ns/iter   diff %  speedup 
 byte::benches::bench_memcmp_decode_first_asc_large                                90                90                                    0    0.00%   x 1.00 
 byte::benches::bench_memcmp_decode_first_asc_large_original                       250               327                                  77   30.80%   x 0.76 
 byte::benches::bench_memcmp_decode_first_desc_large                               114               113                                  -1   -0.88%   x 1.01 
 byte::benches::bench_memcmp_decode_first_desc_large_original                      258               334                                  76   29.46%   x 0.77 
 byte::benches::bench_memcmp_decode_first_in_place_asc_large                       121               159                                  38   31.40%   x 0.76 
 byte::benches::bench_memcmp_decode_first_in_place_asc_large_original              166               225                                  59   35.54%   x 0.74 
 byte::benches::bench_memcmp_decode_first_in_place_desc_large                      148               186                                  38   25.68%   x 0.80 
 byte::benches::bench_memcmp_decode_first_in_place_desc_large_original             188               244                                  56   29.79%   x 0.77 
 byte::benches::bench_memcmp_encode_all_asc_large                                  94                89                                   -5   -5.32%   x 1.06 
 byte::benches::bench_memcmp_encode_all_asc_large_naive                            179               168                                 -11   -6.15%   x 1.07 
 byte::benches::bench_memcmp_encode_all_asc_large_original                         513               523                                  10    1.95%   x 0.98 
 byte::benches::bench_memcmp_encode_all_asc_small                                  17                18                                    1    5.88%   x 0.94 
 byte::benches::bench_memcmp_encode_all_asc_small_naive                            26                23                                   -3  -11.54%   x 1.13 
 byte::benches::bench_memcmp_encode_all_desc_large                                 130               120                                 -10   -7.69%   x 1.08 
 byte::benches::bench_memcmp_encode_all_desc_large_original                        1,275             1,267                                -8   -0.63%   x 1.01 
 byte::benches::bench_memcmp_encode_all_desc_small                                 27                28                                    1    3.70%   x 0.96 
 number::benches::bench_decode_u64_le_buffer_decoder                               1                 1                                     0    0.00%   x 1.00 
 number::benches::bench_decode_u64_le_byteorder                                    2                 2                                     0    0.00%   x 1.00 
 number::benches::bench_decode_u64_le_bytes_buf                                    1                 1                                     0    0.00%   x 1.00 
 number::benches::bench_decode_u64_le_number_codec                                 1                 1                                     0    0.00%   x 1.00 
 number::benches::bench_decode_u64_le_number_codec_over_slice                      2                 2                                     0    0.00%   x 1.00 
 number::benches::bench_decode_u64_le_original                                     2                 2                                     0    0.00%   x 1.00 
 number::benches::bench_decode_varint_normal_number_codec_large_buffer             5                 5                                     0    0.00%   x 1.00 
 number::benches::bench_decode_varint_normal_number_codec_small_buffer             8                 8                                     0    0.00%   x 1.00 
 number::benches::bench_decode_varint_normal_number_codec_small_buffer_with_extra  8                 8                                     0    0.00%   x 1.00 
 number::benches::bench_decode_varint_normal_original_large_buffer                 8                 8                                     0    0.00%   x 1.00 
 number::benches::bench_decode_varint_normal_original_small_buffer                 8                 8                                     0    0.00%   x 1.00 
 number::benches::bench_decode_varint_normal_original_small_buffer_with_extra      8                 8                                     0    0.00%   x 1.00 
 number::benches::bench_decode_varint_small_number_codec_large_buffer              2                 3                                     1   50.00%   x 0.67 
 number::benches::bench_decode_varint_small_number_codec_small_buffer              3                 3                                     0    0.00%   x 1.00 
 number::benches::bench_decode_varint_small_original_small_buffer                  2                 2                                     0    0.00%   x 1.00 
 number::benches::bench_encode_u64_le_buffer_encoder_slice                         1                 1                                     0    0.00%   x 1.00 
 number::benches::bench_encode_u64_le_buffer_encoder_vec                           1                 2                                     1  100.00%   x 0.50 
 number::benches::bench_encode_u64_le_byteorder                                    8                 8                                     0    0.00%   x 1.00 
 number::benches::bench_encode_u64_le_number_codec                                 2                 2                                     0    0.00%   x 1.00 
 number::benches::bench_encode_varint_naive                                        9                 9                                     0    0.00%   x 1.00 
 number::benches::bench_encode_varint_number_codec                                 4                 5                                     1   25.00%   x 0.80 
 number::benches::bench_encode_varint_original                                     9                 10                                    1   11.11%   x 0.90 
 number::benches::bench_encode_varint_protobuf                                     46                84                                   38   82.61%   x 0.55 

@breezewish
Copy link

breezewish commented Jan 8, 2019

I investigated why bench_memcmp_decode_first_in_place_asc_large becomes slower, turns out the cause is that Vec clone become slower:

#[bench]
fn bench(b: &mut test::Bencher) {
    let raw = vec![0u8; 1000];
    b.iter(|| {
        test::black_box(test::black_box(&raw).clone());
    });
}

In rustc 1.29 it is 34 ns/iter (+/- 6), in rustc 1.33 it is 121 ns/iter (+/- 21).

I checked the assembly generated by try_decode_first_in_place and bench_memcmp_decode_first_in_place_asc_large, they are mostly the same (only some register changes and some instruction reordering). So the slowness comes from the Rust core.

I opened an issue in rust-lang/rust: rust-lang/rust#57437

@brson
Copy link
Author

brson commented Jan 8, 2019

Thank you so much for your help @breeswich 🙏

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment