Skip to content

Instantly share code, notes, and snippets.

@yangchenyun
Last active August 29, 2015 14:14
Show Gist options
  • Save yangchenyun/4942386a91b203bea117 to your computer and use it in GitHub Desktop.
Save yangchenyun/4942386a91b203bea117 to your computer and use it in GitHub Desktop.
full sm-15 implementation
(require racket/flonum)
;; TODO: how to calculate the rep category instead of rep number?
;; repetition category - number that represents the repetition number
;; corresponding to a given interval in the theoretical learning process
;; in which the current status of the OF matrix is used.
;; SuperMemo will look at the optimum interval and
;; see how many times an item would need to be repeated to reach that
;; interval. That number is called the repetition category.
;; Grade Range
(define MAX-GRADE 5)
(define MIN-GRADE 0)
;; AF Range
(define MIN-AF 1.2)
(define AF-STEP-LENGTH 0.3)
(define AF-STEP 20)
(define MAX-AF (+ MIN-AF (* AF-STEP-LENGTH (- AF-STEP 1))))
(define REQUESTED-FI 0.1)
(define MAX-REP 20)
;; General Abstraction
;; item abstraction
;; (define (get-memory-lapse item))
;; (define (get-repetition item))
;; TODO: Note that the term first grade refers to the grade issued at first repetition (i.e. Repetitions=2), not at memorizing (i.e. Repetitions=1). The first issued grade does not affect the learning process except for determining which items enter the final drill
;; (define (get-first-grade item))
;; (define (get-last-grade item))
;; U-Factor - a number associated with each memorized element. It equals
;; to the ratio of the current interval and the previously used interval.
;; If the element has not been repeated yet, i.e. the previous interval
;; is not defined, U-Factor equals the first interval. The greater the
;; U-Factor the greater the increase of the interval between repetitions.
(define (get-ufactor item)
)
;; point is 2-dimensioned, it holds a relation/cordinate
(define (make-point x y ) (cons x y))
(define (x-of point) (car point))
(define (y-of point) (cdr point))
;; graph is 2-dimensioned map of *points*.
;; the points contained inside graph's closure is the only mutable data structure
;; for the whole program
;; it could be used with a regression model to derive estimated function
(define (make-graph)
(let ((points '()))
(lambda (cmd)
(cond ((eq? cmd 'apply-regression-model)
(lambda (regression-model)
(regression-model points)))
((eq? cmd 'register-point)
(lambda (point)
(set! points (cons point points))
'success))
((eq? cmd 'set-points)
(lambda (new-points)
(set! points new-points)
'success))
((eq? cmd 'get-points) points)
(else (error "graph: command not supported"))))))
(define (make-graph-with-points points)
(let ((graph (make-graph)))
((graph 'set-points) points)
graph))
;; Forgetting curve is a graph of ufactor to retention
;; it is a closure which holds all the points
;; It will take some time before your first forgetting curves are
;; plotted. For that reason, the initial value of the RF matrix is taken
;; from the model of a less-than-average student. The model of average
;; student is not used because the convergence from poorer student
;; parameters upwards is faster than the convergence in the opposite
;; direction.
;; TODO: INIT-CURVE, used to calculate initial RF, open up SM-15 and copy and paste
;; by hand?
(define (init-curve)
(make-graph-with-points (map make-point 'ufactor-list 'af-list)))
;; Group of curves are a 2d-vector indexed by repetition category and AF
(define curves
(let ((_curves '()))
(if (empty? _curves)
(set! _curves (map (lambda (rep)
(cons rep (map (lambda (af)
(cons af 'init-curve))
(range MIN-AF MAX-AF AF-STEP-LENGTH)
)))
(range 1 MAX-REP))) '())
_curves))
(define (curves-of rep afIndex)
(define (close-enough? a b) (> 0.0000001 (abs (- a b))))
(define (list-lookup list index)
(let ((comp (if (flonum? index) fl= eqv?)))
(cdar (filter (lambda (elem)
(close-enough? (car elem) index)) list))))
(list-lookup (list-lookup curves rep) afIndex))
(define RECALL-THRESHOLD 3)
(define (add-record-to-curve curve ufactor grade)
(let ((remembered (if (> grade RECALL-THRESHOLD) 100 0))
(p (make-point ufactor remembered)))
((curve 'register-point) p)
'success))
;; TODO: as point is stored as remembered or forgetten, but the
;; actual graph is calculated by retention, this graph needed
;; to be processed before run regression model on it
(define (fi-to-retention FI)
(/ (- FI)
(log (- 1 FI))))
;; a approximate function?
(define (retention-to-fi retention)
)
;; fit y = A * e^(Bx)
(define (exponential-model points)
(let* ((n (length points))
(X (map x-of points))
(Y (map y-of points))
(logY (map log Y))
(sqX (map sqr X))
(sumlogY (reduce + logY))
(sumsqX (reduce + sqX))
(sumX (reduce + X))
(sumXlogY (reduce + (map * X logY)))
(a (/ (- (* sumlogY sumsqX)
(* sumX sumXlogY))
(- (* n sumsqX)
(sqr sumX))))
(b (/ (- (* n sumXlogY)
(* sumX sumlogY))
(- (* n sumsqX)
(sqr sumX))))
(A (exp a))
(B b))
(lambda (cmd)
(cond ((eq? cmd 'A) A)
((eq? cmd 'B) B)
((eq? cmd 'x-to-y)
(lambda (x)
(* A (exp (* B x)))))
((eq? cmd 'y-to-x)
(lambda (y)
(/ (- (log y) (log A))
B)))
(else (error "exponential-model: cmd is not supported"))))))
;; fit y = a + bx
(define (linear-model points)
(let* ((n (length points))
(X (map x-of points))
(Y (map y-of points))
(sqX (map sqr X))
(sumsqX (reduce + sqX))
(sumX (reduce + X))
(sumXY (reduce + (map * X Y)))
(a (/ (- (* sumY sumsqX)
(* sumX sumXY))
(- (* n sumsqX)
(sqr sumX))))
(b (/ (- (* n sumXY)
(* sumX sumY))
(- (* n sumsqX)
(sqr sumX)))))
(lambda (cmd)
(cond ((eq? cmd 'a) a)
((eq? cmd 'b) b)
((eq? cmd 'x-to-y) (lambda (x) (+ a (* b x))))
((eq? cmd 'y-to-x) (lambda (y) (/ (- y a) b)))
(else (error "linear-model: cmd is not supported"))))))
;; fit y = bx
(define (linear-model-through-origin points)
(let* ((n (length points))
(X (map x-of points))
(Y (map y-of points))
(sumsqX (reduce + (map sqr X)))
(sumXY (reduce + (map * X Y)))
(b (/ sumXY sumsqX)))
(lambda (cmd)
(cond ((eq? cmd 'b) b)
((eq? cmd 'x-to-y) (lambda (x) (* b x)))
((eq? cmd 'y-to-x) (lambda (y) (/ y b)))
(else (error "linear-model-through-origin: cmd is not supported"))))))
;; TODO: abstraction for matrix
;; 7 RF matrix and Forgetting curves:
;; RFactor: the number which says at which U-Factor, the measured
;; forgetting index is approximated to be at REQUESTED-FI.
;; Individual entries of the RF matrix are computed from forgetting
;; curves approximated for each entry individually.
;; Each forgetting curve corresponds with a different value of the
;; repetition number and a different value of A-Factor (or memory lapses
;; in the case of the first repetition).
;; The value of the RF matrix entry corresponds to the moment in time
;; where the forgetting curve passes the knowledge retention point
;; derived from the requested forgetting index.
(define (RF rep afIndex)
(let* ((forgetting-curve (curves-of rep afIndex))
(estimated-forgetting-func
((forgetting-curve 'apply-regression-model)
'forgetting-model)))
((estimated-forgetting-func 'get-ufactor) (- 1 REQUESTED-FI))))
;; 8 TODO: Deriving OF matrix from the forgetting curves:
;; 1 fixed-point power approximation of the R-Factor decline along the RF matrix columns (the fixed point corresponds to second repetition at which the approximation curve passes through the A-Factor value),
;; 2 for all columns, computing D-Factor which expresses the decay constant of the power approximation,
;; 3 linear regression of D-Factor change across the RF matrix columns, and
;; 4 deriving the entire OF matrix from the slope and intercept of the straight line that makes up the best fit in the D-Factor graph.
;; Note that the first row of the OF matrix is computed in a different
;; way. It corresponds to the best-fit exponential curve obtained from
;; the first row of the RF matrix.
;; All the above steps are passed after each repetition. In other
;; words, the theoretically optimum value of the OF matrix is updated as
;; soon as *new forgetting curve data is collected*.
(define compute-OF
(lambda (rep afIndex)
))
(define (get-AF-for-OF-column col)
)
(define (look-for-col-match rep target-interval)
)
;; 1 Optimum interval: Inter-repetition intervals are computed using the following formula:
;; I(1)=OF[1,L+1]
;; I(n)=I(n-1)*OF[n,AF]
;; where:
;; * OF - matrix of optimal factors, which is modified in the course of repetitions
;; * OF[1,L+1] - value of the OF matrix entry taken from the first row and the L+1 column
;; * OF[n,AF] - value of the OF matrix entry that corresponds with the n-th repetition, and with item difficulty AF
;; * L - number of times a given item has been forgotten (from "memory Lapses")
;; * AF - number that reflects absolute difficulty of a given item (from "Absolute difficulty Factor")
;; * I(n) - n-th inter-repetition interval for a given item
;; Note the first repetition here means the first repetition after forgetting
;; TODO: implement advancement spacing effect algorithm
;; 1. The actual interval is irrelevant to determine the optimal interval
;; 2. start with rep-1, it recursively multiplies ufactors to find the absolute days
(define (nth-optimal-interval item)
(let* ((L (get-memory-lapse item))
(n (get-repetition item))
(OF (compute-OF))
(AF (calc-AF item n optimal-factor-matrix)))
(define (I n)
(if (= n 1)
(OF 1 (+ 1 L))
(* (I (- 1 n))
(OF n AF))))
(I n)))
;; 9 Item difficulty:
;; ..the higher it is, the easier the item.
;; The most difficult items have A-Factors equal to 1.2.
;; It is worth noting that AF=OF[2,AF]. In other words, AF denotes the
;; optimum interval increase factor after the second repetition. This is
;; also equivalent with the highest interval increase factor for a given
;; item.
;; The initial value of A-Factor is derived from the first grade obtained
;; by the item, and the correlation graph of the first grade and A-Factor
;; (G-AF graph).
;; TODO: This graph is updated after each repetition in which a new A-Factor
;; value is estimated and correlated with the item's first grade, and old
;; A-Factor estimation is removed from the graph.
;; Subsequent approximations of the real A-Factor value are done after
;; each repetition by using grades, OF matrix, and a correlation graph
;; that shows the correspondence of the grade with the expected
;; forgetting index (FI-G graph).
;; The grade used to compute the initial A-Factor is normalized,
;; i.e. adjusted for the difference between the actually used interval
;; and the optimum interval for the forgetting index equal 10%
;; Initial: Assume a linear relationship between grade and AF
(define INIT-G-AF-points (list (make-point MAX-GRADE MAX-AF)
(make-point MIN-GRADE MIN-AF)))
(define G-AF-graph (make-graph-with-points INIT-G-AF-points))
(define (add-entry-to-G-AF-graph grade AF)
(((G-AF-graph) 'register-point)
(make-point grade AF)))
(define (update-G-AF-graph first-grade new-AF)
(add-entry-to-G-AF-graph first-grade new-AF))
(define (calc-initial-AF first-grade)
((estimated-func (((G-AF-graph 'apply-regression-model) exponential-model)
'x-to-y)))
(estimated-func first-grade))
;; The FI-G graph is updated after each repetition by using the expected forgetting index and actual grade scores.
;; The expected forgetting index can easily be derived from the interval used between repetitions and the optimum interval computed from the OF matrix.
;; The higher the value of the expected forgetting index, the lower the grade.
;; From the grade and the FI-G graph, we can compute the estimated forgetting index
;; Notes:
;; Expected forgetting index is the expected probability of failing to recall a given element if the
;; repetition takes place at a given moment.
;; Estimated forgetting index is value of the forgetting index derived
;; from the scored grade on the basis of the FI-G graph.
(define INIT-FI-G-points (list (make-point 0 MAX-GRADE)
(make-point 100 MIN-GRADE)))
(define FI-G-graph (make-graph-with-points INIT-FI-G-points))
(define (add-entry-to-FI-G-graph forgetting-index grade)
(((FI-G-graph) 'register-point)
(make-point forgetting-index grade)))
(define (update-FI-G-graph grade used-interval optimum-interval)
;; TODO: verify the calculation of expectedFI
;; or use leverage the current forgetting curve for it?
;; pesudo code: OptimumInterval:=(UsedInterval*ln(0.9)/ln(1-EstimatedFI/100));
(let ((expected-FI (- 1 (exp (/ (/ used-interval (log 0.9))
optimum-interval)))))
(add-entry-to-G-AF-graph expected-FI grade)))
(define (calc-estimated-FI grade)
(let ((estimated-func
(((FI-G-graph 'apply-regression-model) exponential-model)
'y-to-x)))
(estimated-func grade)))
;; 11 Computing A-Factors:
;; From (1) the estimated forgetting index, (2) length of the interval and (3) the OF matrix, we can easily compute the most accurate value of A-Factor.
;; Note that A-Factor serves as an index to the OF matrix, while the
;; estimated forgetting index allows one to find the column of the OF
;; matrix for which the optimum interval corresponds with the actually
;; used interval corrected for the deviation of the estimated forgetting
;; index from the requested forgetting index.
;; function GetNewAFactorFromEstimatedFI(EstimatedFI);
;; OptimumInterval:=(UsedInterval*ln(0.9)/ln(1-EstimatedFI/100));
;; OFactor:=OptimumInterval/UsedInterval*UFactor;
;; for col:=20 downto 1 do {scan columns of the OF matrix}
;; if OFactor>EstimatedOFactor(col,RepetitionsCategory) then
;; break;
;; Result:=AFactorFromOFColumn(col); {new value of A-Factor}
;; At each repetition, a weighted average is taken of the old A-Factor
;; and the new estimated value of the A-Factor. The newly obtained
;; A-Factor is used in indexing the OF matrix when computing the new
;; optimum inter-repetition interval
(define (calc-AF item used-interval)
(let ((n (get-repetition item))
(grade (get-grade item))
(ufactor (get-ufactor item)))
(if (= n 1)
(calc-initial-AF grade)
(let* ((estimated-FI (calc-estimated-FI grade))
(normalized-interval (/ (* used-interval (log 0.9))
(log (- 1 estimated-FI))))
(estimated-optimum-interval (* normalized-interval
(/ 1 used-interval)
ufactor)))
(get-AF-for-OF-column
(look-for-col-match n estimated-optimum-interval))))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment