Last active
August 23, 2018 04:33
-
-
Save rocketnia/bc09c6cb11a11e66b123f64c54550f01 to your computer and use it in GitHub Desktop.
A vector type that automatically switches to a byte string representation based on the use of contracts.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#lang racket | |
; vector-representation-selection.rkt | |
; | |
; A vector type `my-vector` that automatically switches to a byte | |
; string representation if it's only reachable via values the contract | |
; `(my-vectorof byte?)` has been imposed on. Test this at the REPL | |
; like so: | |
; | |
; (require (submod "vector-representation-selection.rkt" demo)) | |
; | |
; The demo should display the message "Representation changed!" | |
; Copyright 2018 Ross Angle | |
; | |
; Licensed under the Apache License, Version 2.0 (the "License"); | |
; you may not use this file except in compliance with the License. | |
; You may obtain a copy of the License at | |
; | |
; http://www.apache.org/licenses/LICENSE-2.0 | |
; | |
; Unless required by applicable law or agreed to in writing, | |
; software distributed under the License is distributed on an | |
; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, | |
; either express or implied. See the License for the specific | |
; language governing permissions and limitations under the License. | |
(provide | |
(rename-out [-my-vector? my-vector?]) | |
my-vectorof | |
list->my-vector | |
my-vector->list | |
my-vector-length | |
my-vector-ref | |
my-vector-set! | |
announce-representation-change!) | |
; For testing, we allow a user-supplied procedure to be invoked | |
; whenever the representation of one of our vectors changes to a byte | |
; string. Since we access this from a private thread, we use a box | |
; instead of a parameter so that its configuration isn't thread-local. | |
(define/contract announce-representation-change! | |
(box/c (-> any)) | |
(box (lambda () (void)))) | |
; We accompany our vector representation with a bunch of metadata so | |
; that we can switch to a Racket byte string at the right time: | |
; | |
; `wrap-elem`: A function to call to apply contracts to vector | |
; elements as they enter and exit the vector. This allows us to | |
; implement the `my-vectorof` higher-order contract. | |
; | |
; `enforces-bytes`: A boolean keeping track of whether `wrap-elem` | |
; is known to enforce a contract at least as specific as `byte?`. | |
; This helps us determine whether this reference to the vector is | |
; prepared for the vector's representation to change to a Racket | |
; byte string. | |
; | |
; `viewed-internal`: A `my-vector-internal` value. The results of | |
; attaching contracts to a vector will be different `my-vector` | |
; values which refer to the same `my-vector-internal` value. | |
; | |
; `sem`: A semaphore that we use as a mutex to avoid race conditions | |
; when we access or modify `any-refs` or `impl`. | |
; | |
; `any-refs`: An `eq?`-based weak hash table that has an entry for | |
; each `my-vector` value that still exists and still might insert | |
; non-byte values into the vector. Once all of those values have | |
; been garbage-collected, this weak hash table will be empty, at | |
; which point the vector's representation can switch to a Racket | |
; byte string. | |
; | |
; `impl`: A mutable box containing either a Racket vector or a | |
; Racket byte string. This mutable box is how we switch the | |
; representation. | |
; | |
(struct my-vector-internal (sem any-refs impl)) | |
(struct my-vector (wrap-elem enforces-bytes viewed-internal)) | |
; This is where we register the finalizers for our `my-vector` values. | |
(define my-vector-executor (make-will-executor)) | |
; We can just run our finalizers any time, so when this module is | |
; instantiated, we start a thread to do that. | |
(void | |
(thread | |
(lambda () | |
(let next () | |
(will-execute my-vector-executor) | |
(next))))) | |
; We also start a second thread where we run the task that changes the | |
; representation of a vector. This task can be started due to a | |
; finalizer or due to a `my-vector-set!` operation. | |
(define my-vector-representation-change-task-runner-thread | |
(thread | |
(lambda () | |
(let next () | |
((thread-receive)) | |
(next))))) | |
(define/contract | |
(my-vector-change-representation-if-possible! internal) | |
(-> my-vector-internal? void?) | |
(match-define (my-vector-internal sem any-refs impl) internal) | |
(thread-send my-vector-representation-change-task-runner-thread | |
(lambda () | |
(call-with-semaphore sem | |
(lambda () | |
(define v (unbox impl)) | |
; If the vector has already changed representation, or if it | |
; can still be accessed via `my-vector` values that permit | |
; non-byte elements, then we're done. | |
(when (and (vector? v) (hash-empty? any-refs)) | |
; Otherwise, if the vector currently contains any non-byte | |
; references, then we're done for now, but a future | |
; `my-vector-set!` operation might give us another | |
; opportunity. | |
(let ([v-list (vector->list v)]) | |
(when (for/and ([elem v-list]) (byte? elem)) | |
; Otherwise, we're good to go, and we change the | |
; representation from a vector to a byte string. | |
((unbox announce-representation-change!)) | |
(set-box! impl (list->bytes v-list)))))))))) | |
; This does two things: If the given reference to the vector has a | |
; false `enforces-bytes`, it schedules a possible representation | |
; change (from a Racket vector to a Racket byte string) once this | |
; reference has been garbage-collected. But it also prevents such a | |
; change from happening *until* this reference has been | |
; garbage-collected. | |
(define/contract (my-vector-manage-representation-change! v) | |
(-> my-vector? my-vector?) | |
(match-define (my-vector _ enforces-bytes internal) v) | |
(match-define (my-vector-internal _ any-refs _) internal) | |
(unless enforces-bytes | |
(will-register my-vector-executor v | |
(lambda (v) | |
; Readying this will procedure has returned the value to being | |
; reachable, so it's still part of the weak hash table here. | |
; We explicitly remove it. | |
(hash-remove! any-refs v) | |
(my-vector-change-representation-if-possible! internal))) | |
(hash-set! any-refs v 'arbitrary-value)) | |
v) | |
(define/contract (list->my-vector lst) | |
(-> list? my-vector?) | |
; This `my-vector-manage-representation-change!` call will allow the | |
; representation to become a byte string instead of a Racket vector, | |
; but only after this reference has been garbage-collected. | |
(my-vector-manage-representation-change! | |
(my-vector (lambda (elem) elem) #f | |
(my-vector-internal (make-semaphore 1) (make-weak-hasheq) | |
(box (list->vector lst)))))) | |
; We define a contract, `(my-vectorof elem/c)`. When `elem/c` is | |
; `byte?`, the projections of this contract are vectors that do not | |
; hold back the representation from becoming a byte string. | |
(define/contract (my-vectorof elem/c) | |
(-> contract? contract?) | |
(make-contract | |
#:name 'my-vectorof | |
#:first-order | |
(match-lambda | |
[ | |
(my-vector wrap-elem _ (my-vector-internal _ _ (box v))) | |
(for/and ([elem v]) | |
(contract-first-order-passes? elem/c elem))] | |
[_ #f]) | |
#:projection | |
(lambda (blame) | |
(define elem/c-projection | |
((contract-projection elem/c) blame)) | |
(match-lambda | |
[ | |
(my-vector wrap-elem enforces-bytes internal) | |
(match-define (my-vector-internal sem _ _) internal) | |
(call-with-semaphore sem | |
(lambda () | |
; If the result still has a false `enforces-bytes`, this | |
; `my-vector-manage-representation-change!` call will | |
; cause the representation to remain a Racket vector at | |
; least until this reference has been garbage-collected. | |
(my-vector-manage-representation-change! | |
(my-vector | |
; We augment the vector's `wrap-elem` so that it | |
; also projects the value through the `elem/c` | |
; contract. | |
(lambda (elem) | |
(wrap-elem (elem/c-projection elem))) | |
; We augment the vector's `enforces-bytes` so that | |
; it takes into account whether the `elem/c` is | |
; known to be `byte?`. | |
(or enforces-bytes (eq? byte? elem/c)) | |
internal))))] | |
[ | |
v | |
(raise-blame-error blame v | |
'(expected "a my-vector" given: "~e") | |
v)])))) | |
; Besides `list->my-vector`, we define a few more operations that make | |
; these vectors useful. | |
; | |
; The implementation of `my-vector-length` is pretty simple, but the | |
; implementations of `my-vector-ref` and `my-vector->list` must invoke | |
; the vector's `wrap-elem` in the right places. | |
; | |
; The implementation of `my-vector-set!` must use not only `wrap-elem` | |
; but also `my-vector-change-representation-if-possible!` in case it | |
; has set up the right conditions for a representation change. | |
; NOTE: Invocations of `my-vector-set!` may trigger computations that | |
; that take time proportional to the length N of the vector, and this | |
; may happen multiple times (specifically if the task keeps | |
; discovering non-byte elements in the vector which prevent it from | |
; following through with the representation change). We could get | |
; amortized near-constant time instead if we did this: Keep track of | |
; the number of times `my-vector-set!` is called, and only change the | |
; representation once every N times. However, since we intend this | |
; module only to be an example of how `my-vectorof` can be | |
; implemented, we choose to keep the implementation simpler than that. | |
(define/contract (-my-vector? v) | |
(-> any/c boolean?) | |
(my-vector? v)) | |
(define/contract (my-vector-length v) | |
(-> my-vector? exact-nonnegative-integer?) | |
(match-define (my-vector _ _ (my-vector-internal _ _ (box impl))) v) | |
(sequence-length impl)) | |
(define/contract (my-vector-ref v i) | |
(-> my-vector? exact-nonnegative-integer? any/c) | |
(match-define | |
(my-vector wrap-elem _ (my-vector-internal _ _ (box impl))) | |
v) | |
(wrap-elem (sequence-ref impl i))) | |
(define/contract (my-vector-set! v i val) | |
(-> my-vector? exact-nonnegative-integer? any/c void?) | |
(match-define (my-vector wrap-elem _ internal) v) | |
(match-define (my-vector-internal _ _ (box impl)) internal) | |
(if (bytes? impl) | |
(bytes-set! impl i (wrap-elem val)) | |
(begin | |
(vector-set! impl i (wrap-elem val)) | |
; We may have just removed the last non-byte from the vector, so | |
; we schedule a task to change the representation if possible. | |
(my-vector-change-representation-if-possible! internal)))) | |
(define/contract (my-vector->list v) | |
(-> my-vector? list?) | |
(match-define | |
(my-vector wrap-elem _ (my-vector-internal _ _ (box impl))) | |
v) | |
(for/list ([elem impl]) | |
(wrap-elem elem))) | |
(module+ demo | |
(provide impose-my-vectorof-byte) | |
(define/contract (impose-my-vectorof-byte v) | |
(-> (my-vectorof byte?) any/c) | |
v) | |
; For this demo, we just create a vector, impose a | |
; (my-vectorof byte?) contract on it, make the original unreachable, | |
; and run the garbage collector. This causes the original | |
; `my-vector` value to have its will run, which causes the | |
; representation to change. We set up | |
; `announce-representation-change!` so that we can see it happen. | |
(set-box! announce-representation-change! | |
(lambda () | |
(displayln "Representation change!"))) | |
(define example | |
(impose-my-vectorof-byte (list->my-vector (list 1 2 3)))) | |
(collect-garbage) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment