jdnewmil/deque_test.md

## deque_test.md

      
    Raw
  

              deque_test.md
            
          
    Deque Comparisons

Jeff Newmiller
2022-12-29
Scope

Regarding how to speed up deques discussion on
Reddit…
A vector-based R deque

A vector is allocated with a reserve above and below where the data will
be stored, which is resized as needed when in use.
An environment is returned which contains the object data and associated
methods.
library(microbenchmark)

deque1 <- function( reserve_size ) {
  init_size <- as.integer( reserve_size )
  stopifnot( 1L <= reserve_size )
  high <- low <- reserve_size
  data <- integer( 2 * reserve_size )
  is_empty <- function() {
    high == low
  }
  push_front <- function( item ) {
    if ( length( data ) < high ) {
      if ( 2 * reserve_size < low ) {
        old_indexes <- seq( low, high - 1L )
        new_indexes <- old_indexes - reserve_size
        data[ new_indexes ] <<- data[ old_indexes ]
        low <<- low - reserve_size
        high <<- high - reserve_size
      } else {
        data <<- c( data, integer( reserve_size ) )
      }
    }
    data[[ high ]] <<- item
    high <<- high + 1L
    invisible()
  }
  pop_back <- function() {
    if ( is_empty() ) {
      stop( "empty deque1" )
    }
    result <- data[[ low ]]
    low <<- low + 1
    result
  }
  push_back <- function( item ) {
    if ( 1 == low ) {
      if ( 2 * reserve_size < ( length( data ) - high ) ) {
        old_indexes <- seq( low, high - 1L )
        new_indexes <- old_indexes + reserve_size
        data[ new_indexes ] <<- data[ old_indexes ]
      } else {
        data <<- c( integer( reserve_size ), data )
      }
      low <<- low + reserve_size
      high <<- high + reserve_size
    }
    low <<- low - 1L
    data[[ low ]] <<- item
    invisible()
  }
  pop_front <- function() {
    if ( is_empty() ) {
      stop( "empty deque1" )
    }
    high <<- high - 1
    result <- data[[ high ]]
    result
  }
  peek_front <- function() {
    if ( is_empty() ) {
      stop( "empty deque1" )
    }
    data[[ high ]]
  }
  peek_back <- function() {
    if ( is_empty() ) {
      stop( "empty deque1" )
    }
    data[[ low ]]
  }
  environment()
}
Confirm that deque1 works…
x <- deque1( 2 )
x$push_front( 1L )
x$push_front( 2L )
x$push_front( 3L )
x$push_front( 4L )
x$push_back( 5L )
x$push_back( 6L )
x$data
## [1] 0 6 5 1 2 3 4 0

x$pop_front()
## [1] 4

x$push_front( 7L )
x$data
## [1] 0 6 5 1 2 3 7 0

x$pop_back()
## [1] 6

x$pop_back()
## [1] 5

x$pop_back()
## [1] 1

x$pop_back()
## [1] 2

x$pop_back()
## [1] 3

x$pop_back()
## [1] 7

x$data
## [1] 0 6 5 1 2 3 7 0

x$is_empty()
## [1] TRUE

x$push_front( 8L )
x$push_front( 9L )
x$data
## [1] 0 6 5 1 2 8 9 8

x$push_front( 10L )
x$push_front( 11L )
x$data
## [1]  0  6  5  8  9 10 11 10

A list-based R deque

A list is allocated with a reserve above and below where the data will
be stored, which is resized as needed when in use.
An environment is returned which contains the object data and associated
methods.
deque1l <- function( reserve_size ) {
  init_size <- as.integer( reserve_size )
  stopifnot( 1L <= reserve_size )
  high <- low <- reserve_size
  data <- vector( mode = "list", 2 * reserve_size )
  is_empty <- function() {
    high == low
  }
  push_front <- function( item ) {
    if ( length( data ) < high ) {
      if ( 2 * reserve_size < low ) {
        old_indexes <- seq( low, high - 1L )
        new_indexes <- old_indexes - reserve_size
        data[ new_indexes ] <<- data[ old_indexes ]
        low <<- low - reserve_size
        high <<- high - reserve_size
      } else {
        data <<- c( data, vector( mode = "list", reserve_size ) )
      }
    }
    data[[ high ]] <<- item
    high <<- high + 1L
    invisible()
  }
  pop_back <- function() {
    if ( is_empty() ) {
      stop( "empty deque1l" )
    }
    result <- data[[ low ]]
    low <<- low + 1
    result
  }
  push_back <- function( item ) {
    if ( 1 == low ) {
      if ( 2 * reserve_size < ( length( data ) - high ) ) {
        old_indexes <- seq( low, high - 1L )
        new_indexes <- old_indexes + reserve_size
        data[ new_indexes ] <<- data[ old_indexes ]
      } else {
        data <<- c( vector( mode = "list", reserve_size ), data )
      }
      low <<- low + reserve_size
      high <<- high + reserve_size
    }
    low <<- low - 1L
    data[[ low ]] <<- item
    invisible()
  }
  pop_front <- function() {
    if ( is_empty() ) {
      stop( "empty deque1l" )
    }
    high <<- high - 1
    result <- data[[ high ]]
    result
  }
  peek_front <- function() {
    if ( is_empty() ) {
      stop( "empty deque1l" )
    }
    data[[ high ]]
  }
  peek_back <- function() {
    if ( is_empty() ) {
      stop( "empty deque1;" )
    }
    data[[ low ]]
  }
  environment()
}
Confirm that deque1l works…
x <- deque1l( 2 )
x$push_front( 1L )
x$push_front( 2L )
x$push_front( 3L )
x$push_front( 4L )
x$push_back( 5L )
x$push_back( 6L )
x$data
## [[1]]
## NULL
## 
## [[2]]
## [1] 6
## 
## [[3]]
## [1] 5
## 
## [[4]]
## [1] 1
## 
## [[5]]
## [1] 2
## 
## [[6]]
## [1] 3
## 
## [[7]]
## [1] 4
## 
## [[8]]
## NULL

x$pop_front()
## [1] 4

x$push_front( 7L )
x$data
## [[1]]
## NULL
## 
## [[2]]
## [1] 6
## 
## [[3]]
## [1] 5
## 
## [[4]]
## [1] 1
## 
## [[5]]
## [1] 2
## 
## [[6]]
## [1] 3
## 
## [[7]]
## [1] 7
## 
## [[8]]
## NULL

x$pop_back()
## [1] 6

x$pop_back()
## [1] 5

x$pop_back()
## [1] 1

x$pop_back()
## [1] 2

x$pop_back()
## [1] 3

x$pop_back()
## [1] 7

x$data
## [[1]]
## NULL
## 
## [[2]]
## [1] 6
## 
## [[3]]
## [1] 5
## 
## [[4]]
## [1] 1
## 
## [[5]]
## [1] 2
## 
## [[6]]
## [1] 3
## 
## [[7]]
## [1] 7
## 
## [[8]]
## NULL

x$is_empty()
## [1] TRUE

x$push_front( 8L )
x$push_front( 9L )
x$data
## [[1]]
## NULL
## 
## [[2]]
## [1] 6
## 
## [[3]]
## [1] 5
## 
## [[4]]
## [1] 1
## 
## [[5]]
## [1] 2
## 
## [[6]]
## [1] 8
## 
## [[7]]
## [1] 9
## 
## [[8]]
## [1] 8

x$push_front( 10L )
x$push_front( 11L )
x$data
## [[1]]
## NULL
## 
## [[2]]
## [1] 6
## 
## [[3]]
## [1] 5
## 
## [[4]]
## [1] 8
## 
## [[5]]
## [1] 9
## 
## [[6]]
## [1] 10
## 
## [[7]]
## [1] 11
## 
## [[8]]
## [1] 10

A naive vector concatenation implementation

This implementation asks the R memory allocator for a variety of
distinctly-sized memory blocks as the deque is used.
deque0 <- function() {
  data <- integer( 0 )
  is_empty <- function() {
    0 == length( data )
  }
  push_front <- function( item ) {
    data <<- c( data, item )
    invisible()
  }
  pop_back <- function() {
    if ( is_empty() ) {
      stop( "empty deque0" )
    }
    result <- data[[ 1 ]]
    data <<- data[ -1 ]
    result
  }
  push_back <- function( item ) {
    data <<- c( item, data )
    invisible()
  }
  pop_front <- function() {
    if ( is_empty() ) {
      stop( "empty deque0" )
    }
    result <- data[[ length( data ) ]]
    data <<- data[ -length( data ) ]
    result
  }
  peek_front <- function() {
    if ( is_empty() ) {
      stop( "empty deque0" )
    }
    data[[ length( data ) ]]
  }
  peek_back <- function() {
    if ( is_empty() ) {
      stop( "empty deque0" )
    }
    data[[ 1 ]]
  }
  environment()
}
Same testing as above…
x <- deque0()
x$push_front( 1L )
x$push_front( 2L )
x$push_front( 3L )
x$push_front( 4L )
x$push_back( 5L )
x$push_back( 6L )
x$data
## [1] 6 5 1 2 3 4

x$pop_front()
## [1] 4

x$push_front( 7L )
x$data
## [1] 6 5 1 2 3 7

x$pop_back()
## [1] 6

x$pop_back()
## [1] 5

x$pop_back()
## [1] 1

x$pop_back()
## [1] 2

x$pop_back()
## [1] 3

x$pop_back()
## [1] 7

x$data
## integer(0)

x$is_empty()
## [1] TRUE

x$push_front( 8L )
x$push_front( 9L )
x$data
## [1] 8 9

x$push_front( 10L )
x$push_front( 11L )
x$data
## [1]  8  9 10 11

A dequer-based deque implementation

The dequer package uses a C-based doubly-linked list to reduce
copying.
deque_dequer <- function() {
  data <- dequer::deque()
  size <- 0L
  is_empty <- function() {
    0 == size
  }
  push_front <- function( item ) {
    dequer::push( data, item )
    invisible()
  }
  pop_back <- function() {
    dequer::popback( data )
  }
  push_back <- function( item ) {
    dequer::pushback( data, item )
    invisible()
  }
  pop_front <- function() {
    dequer::pop( data )
  }
  peek_front <- function() {
    NA_integer_  # not implemented
  }
  peek_back <- function() {
    NA_integer_  # not implemented
  }
  environment()
}
A dummy deque implementation

This is used to isolate benchmark scaffolding overhead.
deque_dummy <- function() {
  is_empty <- function() {
    FALSE
  }
  push_front <- function( item ) {
    invisible()
  }
  pop_back <- function() {
    NA_integer_
  }
  push_back <- function( item ) {
    invisible()
  }
  pop_front <- function() {
    NA_integer_
  }
  peek_front <- function() {
    NA_integer_
  }
  peek_back <- function() {
    NA_integer_
  }
  environment()
}
Performance benchmarking

The rando_deque function invokes the specified series of deque
operations on the specified deque object. moves is a series of
integers in the range 0 to 3.
rando_deque <- function( dq, moves ) {
  for ( i in seq_along( moves ) ) {
    if ( 0 == moves[ i ] ) {
      dq$push_front( i )
    } else if ( 1 == moves[ i ] ) {
      dq$push_back( i )
    } else if ( 2 == moves[ i ] ) {
      if ( ! dq$is_empty() ) {
        dq$pop_front()
      }
    } else {
      if ( ! dq$is_empty() ) {
        dq$pop_back()
      }
    }
  }
}
# clear memory ... mostly useful if you re-run the benchmarking
gc()
##           used (Mb) gc trigger (Mb) max used (Mb)
## Ncells  544063 29.1    1205975 64.5  1205975 64.5
## Vcells 1044505  8.0    8388608 64.0  1800479 13.8

set.seed( 42 )
# a slight bias to cause the deque to push more often than pop
# emphasizes the performance discrepancies
moves <- sample(
  0:3
, 10000L
, replace = TRUE
, prob = c( 0.26, 0.26, 0.24, 0.24 )
)
raw <- microbenchmark(
  rando_deque( deque0(), moves )
, rando_deque( deque_dequer(), moves )
, rando_deque( deque1l( 100 ), moves )
, rando_deque( deque1( 100 ), moves )
, rando_deque( deque_dummy(), moves )
, times = 100L
, unit = "milliseconds"
)
raw
## Unit: milliseconds
##                                expr       min        lq      mean    median
##        rando_deque(deque0(), moves) 18.445917 20.277774  54.89155 25.226743
##  rando_deque(deque_dequer(), moves) 15.853435 18.536572 169.05088 25.443787
##    rando_deque(deque1l(100), moves) 10.121858 10.922809  45.98897 12.430132
##     rando_deque(deque1(100), moves) 10.074483 11.116362  32.49120 12.096595
##   rando_deque(deque_dummy(), moves)  4.551602  4.898337  11.17592  5.567538
##         uq       max neval
##  30.378020  918.3399   100
##  34.183066 3123.2980   100
##  17.206857 1314.5680   100
##  16.911341 1406.6940   100
##   9.011526  276.3798   100

raw_summary <- summary( raw )
dummy_row <- nrow( raw_summary )
deque1_row <- dummy_row - 1L
# time excluding rando_deque overhead
dqtime_only <- sweep(
  as.matrix( raw_summary[ -dummy_row, -1 ] )
, 2
, as.matrix( raw_summary[ dummy_row, -1 ] )
)
# ratio of times relative to deque1
dqfactor_only <- sweep(
  dqtime_only[ seq.int( deque1_row - 1L ), ]
, 2
, dqtime_only[ deque1_row, ]
, FUN = '/'
)
rownames( dqtime_only ) <- raw_summary[[ 1 ]][ -dummy_row ]
rownames( dqfactor_only ) <- rownames( dqtime_only )[ -deque1_row ]
dqtime_only
##                                          min        lq      mean    median
## rando_deque(deque0(), moves)       13.894315 15.379437  43.71563 19.659205
## rando_deque(deque_dequer(), moves) 11.301833 13.638235 157.87496 19.876250
## rando_deque(deque1l(100), moves)    5.570256  6.024472  34.81306  6.862595
## rando_deque(deque1(100), moves)     5.522881  6.218025  21.31528  6.529058
##                                           uq       max neval
## rando_deque(deque0(), moves)       21.366494  641.9601     0
## rando_deque(deque_dequer(), moves) 25.171540 2846.9182     0
## rando_deque(deque1l(100), moves)    8.195332 1038.1882     0
## rando_deque(deque1(100), moves)     7.899815 1130.3142     0

dqfactor_only
##                                         min        lq     mean   median
## rando_deque(deque0(), moves)       2.515773 2.4733639 2.050906 3.011033
## rando_deque(deque_dequer(), moves) 2.046365 2.1933388 7.406657 3.044275
## rando_deque(deque1l(100), moves)   1.008578 0.9688723 1.633244 1.051085
##                                          uq       max neval
## rando_deque(deque0(), moves)       2.704683 0.5679484   NaN
## rando_deque(deque_dequer(), moves) 3.186346 2.5186962   NaN
## rando_deque(deque1l(100), moves)   1.037408 0.9184952   NaN

Observations

The dequer-based solution is generally the least performant option by
a factor of 2-to-4. The naive list concatenation deque0 performs
slightly better than the dequer option. The list and integer vector
solutions with reserves perform very similarly.