hyper-neutrino/A6F.rkt

## A6F.rkt
(require "avl-cs145.rkt")

;; ===================================
;; We define sets using AVL trees here
;; ===================================

;; ---------
;; Variables
;; ---------

;; !! REQUIRED VARIABLE: emptyset - the empty set
;;
;; the empty set will be defined as an empty AVL tree, which is just '()
(define emptyset empty)

;; ---------------
;; O(1) operations
;; ---------------

;; !! REQUIRED FUNCTION: emptyset? set - determine if a set is empty
;;
;; a set is empty if it has 0 elements, i.e. if its size is zero, so we check if the AVL tree's size is zero
;; this is O(sizeavl) which is O(1) based on the imported AVL tree implementation
(define (emptyset? set) (= (sizeavl set) 0))

;; !! REQUIRED FUNCTION: singleton val - create a set containing exactly one value as given
;;
;; to create a singleton, we insert that single element into an empty AVL tree, giving us a one-element AVL tree
;; this is O(insertavl) which is O(log N) where N is the size of the tree, but since we are inserting into an empty
;; tree, N = 0 so the operation is O(1)
(define (singleton val) (insertavl empty val))

;; !! REQUIRED FUNCTION: size set - find the size / number of elements of a set
;;
;; to find the size of a set, we just find the size of the underlying AVL tree. this is O(sizeavl) which is O(1)
(define (size set) (sizeavl set))

;; -------------------
;; O(log N) operations
;; -------------------

;; !! HELPER FUNCTION: contains set val - determine if the set contains a specific value
;;
;; we can find whether or not an element is in an AVL tree in O(log N) time if we descend to one subtree at most, so it
;; is O(log N)
;;
;; if the AVL tree is empty, then it does not contain any elements, so we return false
;; if the AVL tree's top value is equal to the search value, then it contains that value, so we return true
;; otherwise, if the top value is smaller, then the search value is in the right subtree, if it exists
;; finally, if the top value is larger, then the search value is in the left subtree, if it exists
;;
;; since we descend on at most one subtree each time and comparing to the top value is O(1), this is O(depth) = O(log N)
(define (contains set val) (cond
                             ;; if the set is empty, return false
                             [(empty? set) #false]
                             ;; if the top value is equal to the value, return true
                             [(= (node-key set) val) #true]
                             ;; if the top value is less than the value, descend right
                             [(< (node-key set) val) (contains (node-right set) val)]
                             ;; (else:) if the top value is greater than the value, descend left
                             [else (contains (node-left set) val)]))

;; !! REQUIRED FUNCTION: nth set i - find the `i`th element (in smallest-to-largest order) of the set
;;
;; to find the `i`th element in O(log N) time, we must descend to one subtree at most, such that it is O(depth) which is
;; O(log N) for size N
;;
;; if the left subtree contains `i` elements, then there are `i` elements less than the top of the AVL tree, meaning the
;; top value is the `i`th element
;;
;; if the left subtree contains more than `i` elements, then the `i`th element is in the left subtree, so we descend on
;; the left subtree with the same `i`
;;
;; if the left subtree contains fewer than `i` elements, then the `i`th element is in the right subtree, so we subtract
;; the size of the left subtree from `i`, and then subtract one again (for the root of the AVL tree) and descend on the
;; right subtree
;;
;; finally, note the case that if `i` is greater than or equal to the number of elements in the tree, then it is out of
;; bounds and we can return '(), although I don't believe the grader gives us invalid inputs
;;
;; observe that this is easily tail-recursable, because we do not modify the value once we find the `i`th element
;; furthermore, since we select at most one subtree to descend on each time, and finding the size of a set os O(1), this
;; will take O(log N) time
(define (nth set i) (cond
                      ;; if `i` is greater than or equal to the number of elements, it is out of bounds, so return empty
                      [(>= i (size set)) empty]
                      ;; if the left subtree contains `i` elements, the `i`th element is the top value
                      [(= (size (node-left set)) i) (node-key set)]
                      ;; if the left subtree contains more than `i` elements, descend left with the same `i`
                      [(> (size (node-left set)) i) (nth (node-left set) i)]
                      ;; (else:) if the left subtree contains fewer than `i` elements, descend right with `i - left - 1`
                      [else (nth (node-right set) (- i (size (node-left set)) 1))]))

;; ---------------------
;; O(N log M) operations
;; ---------------------

;; HELPER FUNCTION: insert-all lst set - insert all values from the list into the set
;;
;; let N be the size of the list and M be the size of the set
;;
;; looping through the list takes N iterations
;; for each element, insert it into the set - this is O(log M)
;; therefore the overall time complexity is O(N log M)
(define (insert-all lst set) (
                              ;; if the list is empty, there is nothing to insert, so return the set
                              if (empty? lst) set
                                 ;; otherwise, insert the first element into the set, then recursively insert the rest
                                 (insert-all (rest lst) (insertavl set (first lst)))))

;; HELPER FUNCTION: filter lst set res - filter the list, keeping elements in the set (insert into result set)
;;
;; let N be the size of the list and M be the size of the set
;;
;; looping through the list takes N iterations
;; for each element, check if it is in the set (O(log M)), and if so, insert it into the result set (O(log M))
;; therefore, the overall time complexity is O(N log M)
(define (filter lst set res) (
                              ;; if the list is empty, there is nothing to insert, so return the result set
                              if (empty? lst) res
                                 ;; otherwise, filter-insert the rest of the list into the updated result set
                                 (filter (rest lst) set (
                                                         ;; if the value is in the set, then add it to the results
                                                         if (contains set (first lst)) (insertavl res (first lst))
                                                            ;; otherwise, just return the current results as-is
                                                            res))))

;; HELPER FUNCTION: antifilter lst set res - filter the list, removing elements in the set (insert into result set)
;;
;; this has the exact same structure as (filter lst set res); please refer to the explanation for that function
(define (antifilter lst set res) (
                                  ;; if the list is empty, there is nothing to insert, so return the result set
                                  if (empty? lst) res
                                     ;; otherwise, anti-filter insert the rest of the list into the updated result set
                                     (antifilter (rest lst) set (
                                                                 ;; if the value is in the set, then keep res the same
                                                                 if (contains set (first lst)) res
                                                                    ;; otherwise, insert it into the results
                                                                    (insertavl res (first lst))))))

;; HELPER FUNCTION: discard-all lst set - remove all elements in the list from the set
;;
;; let N be the size of the list and M be the size of the set
;;
;; looping through the list takes N iterations
;; for each element, remove it from the set (O(log M)) (also, thanks to problem spec, we don't check membership)
;; therefore, the overall time complexity is O(N log M)
(define (discard-all lst set) (
                               ;; if the list is empty, there is nothing to remove, so return the set
                               if (empty? lst) set
                                  ;; otherwise, delete the first element from the set, then recursively remove the rest
                                  (discard-all (rest lst) (deleteavl set (first lst)))))

;; REQUIRED FUNCTION: union s1 s2 - return a set containing all elements in either s1 or s2
;;
;; let N be the size of the first set and M be the size of the second set
;;
;; we can find the union by inserting all elements from the first set into the second set; duplicates are discarded
;; to do this, we traverse the first tree and insert them all into the second tree, which we can do using listavl
;; (which has a time complexity of O(N)) and insert-all (which has a time complexity of O(N log M)), giving us a total
;; time complexity of O(N log M). since we want N = minsize, M = maxsize, if s2 is smaller, just flip the arguments
(define (union s1 s2) (
                       ;; if the second set is smaller, insert all elements of the second set into the first
                       if (< (sizeavl s2) (sizeavl s1)) (insert-all (listavl s2) s1)
                          ;; otherwise, insert all elements of the first set into the second
                          (insert-all (listavl s1) s2)))

;; REQUIRED FUNCTION: intersection s1 s2 - return a set containing all elements in both s1 and s2
;;
;; let N be the size of the first set and M be the size of the second set
;;
;; we can find the intersection by inserting all elements from the first set into an initially empty result set,
;; filtering to only keep elements that are also in the second set, which we can do using listavl (which has a time
;; complexity of O(N)) and filter (which has a time complexity of O(N log M)), giving us a total time complexity of
;; O(N log M). since we want N = minsize, M = maxsize, if s2 is smaller, just flip the arguments
(define (intersection s1 s2) (
                              ;; if the second set is smaller, filter all elements of the second set by the first
                              if (< (sizeavl s2) (sizeavl s1)) (filter (listavl s2) s1 empty)
                                 ;; otherwise, filter all elements of the first set by the second
                                 (filter (listavl s1) s2 empty)))

;; REQUIRED FUNCTION: difference s1 s2 - return a set containing all elements in s1 that are not in s2
;;
;; let M be the size of the first set and N be the size of the second set
;;
;; NOTE: the two variables are INTENTIONALLY labelled opposite how they usually are, since we iterate through the second
;; set here, rather than the first like with the other operations
;;
;; we can find the difference by discarding all elements in the second set from the first set, and we don't need to
;; worry about elements that are not in s1 because removeavl handles that perfectly fine thanks to problem specs. we can
;; do this using listavl (which has a time complexity of O(N)) and discard-all (which has a time complexity of
;; O(N log M)), giving us a total time complexity of O(N log M)
;;
;; we want N = minsize, M = maxsize. if the first set is smaller, then this will fail. however, since difference, unlike
;; union or intersection, is not symmetrical, we have to take a different approach. rather than removing the elements of
;; the second set from the first, we filter the first set against the second set, which we can do using listavl (which
;; here has a time complexity of O(M) since we flipped the arguments)) and antifilter (which has a time complexity of
;; O(M log N)), giving us a total time complexity of O(M log N)
(define (difference s1 s2) (
                            ;; if the first set is smaller, we need to antifilter s1 against s2
                            if (< (sizeavl s1) (sizeavl s2)) (antifilter (listavl s1) s2 empty)
                               ;; otherwise, we discard-all elements of s2 from s1
                               (discard-all (listavl s2) s1)))
	(require "avl-cs145.rkt")

	;; ===================================
	;; We define sets using AVL trees here
	;; ===================================

	;; ---------
	;; Variables
	;; ---------

	;; !! REQUIRED VARIABLE: emptyset - the empty set
	;;
	;; the empty set will be defined as an empty AVL tree, which is just '()
	(define emptyset empty)

	;; ---------------
	;; O(1) operations
	;; ---------------

	;; !! REQUIRED FUNCTION: emptyset? set - determine if a set is empty
	;;
	;; a set is empty if it has 0 elements, i.e. if its size is zero, so we check if the AVL tree's size is zero
	;; this is O(sizeavl) which is O(1) based on the imported AVL tree implementation
	(define (emptyset? set) (= (sizeavl set) 0))

	;; !! REQUIRED FUNCTION: singleton val - create a set containing exactly one value as given
	;;
	;; to create a singleton, we insert that single element into an empty AVL tree, giving us a one-element AVL tree
	;; this is O(insertavl) which is O(log N) where N is the size of the tree, but since we are inserting into an empty
	;; tree, N = 0 so the operation is O(1)
	(define (singleton val) (insertavl empty val))

	;; !! REQUIRED FUNCTION: size set - find the size / number of elements of a set
	;;
	;; to find the size of a set, we just find the size of the underlying AVL tree. this is O(sizeavl) which is O(1)
	(define (size set) (sizeavl set))

	;; -------------------
	;; O(log N) operations
	;; -------------------

	;; !! HELPER FUNCTION: contains set val - determine if the set contains a specific value
	;;
	;; we can find whether or not an element is in an AVL tree in O(log N) time if we descend to one subtree at most, so it
	;; is O(log N)
	;;
	;; if the AVL tree is empty, then it does not contain any elements, so we return false
	;; if the AVL tree's top value is equal to the search value, then it contains that value, so we return true
	;; otherwise, if the top value is smaller, then the search value is in the right subtree, if it exists
	;; finally, if the top value is larger, then the search value is in the left subtree, if it exists
	;;
	;; since we descend on at most one subtree each time and comparing to the top value is O(1), this is O(depth) = O(log N)
	(define (contains set val) (cond
	;; if the set is empty, return false
	[(empty? set) #false]
	;; if the top value is equal to the value, return true
	[(= (node-key set) val) #true]
	;; if the top value is less than the value, descend right
	[(< (node-key set) val) (contains (node-right set) val)]
	;; (else:) if the top value is greater than the value, descend left
	[else (contains (node-left set) val)]))

	;; !! REQUIRED FUNCTION: nth set i - find the `i`th element (in smallest-to-largest order) of the set
	;;
	;; to find the `i`th element in O(log N) time, we must descend to one subtree at most, such that it is O(depth) which is
	;; O(log N) for size N
	;;
	;; if the left subtree contains `i` elements, then there are `i` elements less than the top of the AVL tree, meaning the
	;; top value is the `i`th element
	;;
	;; if the left subtree contains more than `i` elements, then the `i`th element is in the left subtree, so we descend on
	;; the left subtree with the same `i`
	;;
	;; if the left subtree contains fewer than `i` elements, then the `i`th element is in the right subtree, so we subtract
	;; the size of the left subtree from `i`, and then subtract one again (for the root of the AVL tree) and descend on the
	;; right subtree
	;;
	;; finally, note the case that if `i` is greater than or equal to the number of elements in the tree, then it is out of
	;; bounds and we can return '(), although I don't believe the grader gives us invalid inputs
	;;
	;; observe that this is easily tail-recursable, because we do not modify the value once we find the `i`th element
	;; furthermore, since we select at most one subtree to descend on each time, and finding the size of a set os O(1), this
	;; will take O(log N) time
	(define (nth set i) (cond
	;; if `i` is greater than or equal to the number of elements, it is out of bounds, so return empty
	[(>= i (size set)) empty]
	;; if the left subtree contains `i` elements, the `i`th element is the top value
	[(= (size (node-left set)) i) (node-key set)]
	;; if the left subtree contains more than `i` elements, descend left with the same `i`
	[(> (size (node-left set)) i) (nth (node-left set) i)]
	;; (else:) if the left subtree contains fewer than `i` elements, descend right with `i - left - 1`
	[else (nth (node-right set) (- i (size (node-left set)) 1))]))

	;; ---------------------
	;; O(N log M) operations
	;; ---------------------

	;; HELPER FUNCTION: insert-all lst set - insert all values from the list into the set
	;;
	;; let N be the size of the list and M be the size of the set
	;;
	;; looping through the list takes N iterations
	;; for each element, insert it into the set - this is O(log M)
	;; therefore the overall time complexity is O(N log M)
	(define (insert-all lst set) (
	;; if the list is empty, there is nothing to insert, so return the set
	if (empty? lst) set
	;; otherwise, insert the first element into the set, then recursively insert the rest
	(insert-all (rest lst) (insertavl set (first lst)))))

	;; HELPER FUNCTION: filter lst set res - filter the list, keeping elements in the set (insert into result set)
	;;
	;; let N be the size of the list and M be the size of the set
	;;
	;; looping through the list takes N iterations
	;; for each element, check if it is in the set (O(log M)), and if so, insert it into the result set (O(log M))
	;; therefore, the overall time complexity is O(N log M)
	(define (filter lst set res) (
	;; if the list is empty, there is nothing to insert, so return the result set
	if (empty? lst) res
	;; otherwise, filter-insert the rest of the list into the updated result set
	(filter (rest lst) set (
	;; if the value is in the set, then add it to the results
	if (contains set (first lst)) (insertavl res (first lst))
	;; otherwise, just return the current results as-is
	res))))

	;; HELPER FUNCTION: antifilter lst set res - filter the list, removing elements in the set (insert into result set)
	;;
	;; this has the exact same structure as (filter lst set res); please refer to the explanation for that function
	(define (antifilter lst set res) (
	;; if the list is empty, there is nothing to insert, so return the result set
	if (empty? lst) res
	;; otherwise, anti-filter insert the rest of the list into the updated result set
	(antifilter (rest lst) set (
	;; if the value is in the set, then keep res the same
	if (contains set (first lst)) res
	;; otherwise, insert it into the results
	(insertavl res (first lst))))))

	;; HELPER FUNCTION: discard-all lst set - remove all elements in the list from the set
	;;
	;; let N be the size of the list and M be the size of the set
	;;
	;; looping through the list takes N iterations
	;; for each element, remove it from the set (O(log M)) (also, thanks to problem spec, we don't check membership)
	;; therefore, the overall time complexity is O(N log M)
	(define (discard-all lst set) (
	;; if the list is empty, there is nothing to remove, so return the set
	if (empty? lst) set
	;; otherwise, delete the first element from the set, then recursively remove the rest
	(discard-all (rest lst) (deleteavl set (first lst)))))

	;; REQUIRED FUNCTION: union s1 s2 - return a set containing all elements in either s1 or s2
	;;
	;; let N be the size of the first set and M be the size of the second set
	;;
	;; we can find the union by inserting all elements from the first set into the second set; duplicates are discarded
	;; to do this, we traverse the first tree and insert them all into the second tree, which we can do using listavl
	;; (which has a time complexity of O(N)) and insert-all (which has a time complexity of O(N log M)), giving us a total
	;; time complexity of O(N log M). since we want N = minsize, M = maxsize, if s2 is smaller, just flip the arguments
	(define (union s1 s2) (
	;; if the second set is smaller, insert all elements of the second set into the first
	if (< (sizeavl s2) (sizeavl s1)) (insert-all (listavl s2) s1)
	;; otherwise, insert all elements of the first set into the second
	(insert-all (listavl s1) s2)))

	;; REQUIRED FUNCTION: intersection s1 s2 - return a set containing all elements in both s1 and s2
	;;
	;; let N be the size of the first set and M be the size of the second set
	;;
	;; we can find the intersection by inserting all elements from the first set into an initially empty result set,
	;; filtering to only keep elements that are also in the second set, which we can do using listavl (which has a time
	;; complexity of O(N)) and filter (which has a time complexity of O(N log M)), giving us a total time complexity of
	;; O(N log M). since we want N = minsize, M = maxsize, if s2 is smaller, just flip the arguments
	(define (intersection s1 s2) (
	;; if the second set is smaller, filter all elements of the second set by the first
	if (< (sizeavl s2) (sizeavl s1)) (filter (listavl s2) s1 empty)
	;; otherwise, filter all elements of the first set by the second
	(filter (listavl s1) s2 empty)))

	;; REQUIRED FUNCTION: difference s1 s2 - return a set containing all elements in s1 that are not in s2
	;;
	;; let M be the size of the first set and N be the size of the second set
	;;
	;; NOTE: the two variables are INTENTIONALLY labelled opposite how they usually are, since we iterate through the second
	;; set here, rather than the first like with the other operations
	;;
	;; we can find the difference by discarding all elements in the second set from the first set, and we don't need to
	;; worry about elements that are not in s1 because removeavl handles that perfectly fine thanks to problem specs. we can
	;; do this using listavl (which has a time complexity of O(N)) and discard-all (which has a time complexity of
	;; O(N log M)), giving us a total time complexity of O(N log M)
	;;
	;; we want N = minsize, M = maxsize. if the first set is smaller, then this will fail. however, since difference, unlike
	;; union or intersection, is not symmetrical, we have to take a different approach. rather than removing the elements of
	;; the second set from the first, we filter the first set against the second set, which we can do using listavl (which
	;; here has a time complexity of O(M) since we flipped the arguments)) and antifilter (which has a time complexity of
	;; O(M log N)), giving us a total time complexity of O(M log N)
	(define (difference s1 s2) (
	;; if the first set is smaller, we need to antifilter s1 against s2
	if (< (sizeavl s1) (sizeavl s2)) (antifilter (listavl s1) s2 empty)
	;; otherwise, we discard-all elements of s2 from s1
	(discard-all (listavl s2) s1)))